Example 06: Extracting a hidden API

Setup

Code
library(here)      # manage file paths
here() starts at /Users/kjhealy/Documents/courses/socdata.co
Code
library(socviz)    # data and some useful functions
library(tidyverse) # your friend and mine
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(httr2)
options(dplyr.summarise.inform = FALSE)
library(conflicted)

conflicts_prefer(
  dplyr::filter,
  dplyr::lag
)
[conflicted] Will prefer dplyr::filter over any other package.
[conflicted] Will prefer dplyr::lag over any other package.

Hidden API

As we showed in class, one trick is to filter by Fetch/XHR and sort by size to find the json endpoint. Then extract the call with Copy as cURL, and convert it to valid R directly from the clipboard with httr2::curl_translate(). If you get errors, look to see if you can edit the cookie slightly by removing some of the offending fields (they are delimited with a ;). In this case I got

Error: unexpected numeric constant in:
"GWUw3ZkJmVjdMNmgxSnZjUmt5aSUyQnVXa0s0T0dzOFk1eXkyNGwwUSUzRCUzRA; cto_bidid=vG-xRF8xNEhRZEhiT0V4UzBwNmklMkZtRXJGTm0zR0lETzFCWU9TZk43TkdJTk9sM25RaiUyRmhscEgyZFdLcFdmVVElMkJYUCUyQk9TeDNSWFZTTEgya
    dnt = "1"

And cut out the cto_bidid ... component, and it worked.

Code
req <- request("https://www.forbes.com/forbesapi/org/top-colleges/2023/position/true.json") |> 
  req_url_query(
    limit = "1000",
    fields = "organizationName,academics,state,financialAid,rank,medianBaseSalary,campusSetting,studentPopulation,squareImage,uri,description,grade",
  ) |> 
  req_headers(
    authority = "www.forbes.com",
    accept = "*/*",
    `accept-language` = "en-US,en;q=0.9",
    cookie = "VWO=79.600; client_id=6c054d90ce75cbaa5e03a1e918ddb782ee0; blaize_session=6af976bd-758b-4b9b-9c79-b40d9032b012; blaize_tracking_id=0afddb44-9f7e-4f71-b3ff-37a09761581f; _gcl_au=1.1.1646889643.1709753491; _lc2_fpi=1f8b889072fc--01hrakzr0n7jtr3jasdcwmbf6v; _lc2_fpi_meta={%22w%22:1709753491477}; _fbp=fb.1.1709753491525.277895163; __qca=P0-2050963919-1709753491424; usprivacy=1---; us_privacy=1---; _swb=90a388df-7630-472a-afa2-5985886ad544; _ketch_consent_v1_=eyJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIjp7InN0YXR1cyI6ImdyYW50ZWQiLCJjYW5vbmljYWxQdXJwb3NlcyI6WyJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIl19LCJhbmFseXRpY3MiOnsic3RhdHVzIjoiZ3JhbnRlZCIsImNhbm9uaWNhbFB1cnBvc2VzIjpbImFuYWx5dGljcyJdfSwiZnVuY3Rpb25hbCI6eyJzdGF0dXMiOiJncmFudGVkIiwiY2Fub25pY2FsUHVycG9zZXMiOlsicHJvZF9lbmhhbmNlbWVudCIsInBlcnNvbmFsaXphdGlvbiJdfSwicmVxdWlyZWQiOnsic3RhdHVzIjoiZ3JhbnRlZCIsImNhbm9uaWNhbFB1cnBvc2VzIjpbImVzc2VudGlhbF9zZXJ2aWNlcyJdfX0%3D; _cb=u8YuwittgF9Fnc6; AWSALB=Sw6oWLeI4yi62cG8rtiRRqA6dC0itJbybPFNSpg07bHk4DaXVcVz3PgA2wS2ajm9CMC5LOONAEMSxS/76u9SUJ1EKhk030enJCApczujT5sx/ID8F8lwOBTr4zKH; AWSALBCORS=Sw6oWLeI4yi62cG8rtiRRqA6dC0itJbybPFNSpg07bHk4DaXVcVz3PgA2wS2ajm9CMC5LOONAEMSxS/76u9SUJ1EKhk030enJCApczujT5sx/ID8F8lwOBTr4zKH; _hp2_id.657665248=%7B%22userId%22%3A%221117571212713455%22%2C%22pageviewId%22%3A%2289819520474943%22%2C%22sessionId%22%3A%222432193757053261%22%2C%22identity%22%3Anull%2C%22trackerVersion%22%3A%224.0%22%7D; _li_dcdm_c=.forbes.com; AMP_TOKEN=%24NOT_FOUND; _ga=GA1.2.1579195198.1709753491; _gid=GA1.2.2045193768.1710854361; _dc_gtm_UA-5883199-3=1; _gat_UA-5883199-3=1; BCSessionID=58243ab4-b378-4a63-ae39-14de20c3692e; rbzid=sHg5+m2mA3mFHNRQbee+ZgiR6LGO2wCeINEpnL7OZQSM4O259+ZPxFRdjnDWn970W2WYuPSNUG8b26ajlQDIirX5tB5s//CicZR8wnmlNEKeVNrW2A9ygTIj49dD0mUqFD/z7mSYAcdYRhU0prOS5T9V2riH0xl1bJOpA1ssspbbE82Izd2hrGecC19J2Qy2CcN0s4lv38eSNRFToun1bwwGQPWTT7GEbx3x2NMOk0s=; rbzsessionid=0f61f793ef1117bf7987e6ea591f6b6e; _hp2_ses_props.657665248=%7B%22ts%22%3A1710854361161%2C%22d%22%3A%22www.forbes.com%22%2C%22h%22%3A%22%2Ftop-colleges%2F%22%7D; _swb_consent_=eyJlbnZpcm9ubWVudENvZGUiOiJwcm9kdWN0aW9uIiwiaWRlbnRpdGllcyI6eyJfZ29vZ2xlQW5hbHl0aWNzQ2xpZW50SUQiOiJHQTEuMi4xNTc5MTk1MTk4LjE3MDk3NTM0OTEiLCJzd2Jfd2Vic2l0ZV9zbWFydF90YWciOiI5MGEzODhkZi03NjMwLTQ3MmEtYWZhMi01OTg1ODg2YWQ1NDQifSwianVyaXNkaWN0aW9uQ29kZSI6InVzX2dlbmVyYWwiLCJwcm9wZXJ0eUNvZGUiOiJ3ZWJzaXRlX3NtYXJ0X3RhZyIsInB1cnBvc2VzIjp7ImFuYWx5dGljcyI6eyJhbGxvd2VkIjoidHJ1ZSIsImxlZ2FsQmFzaXNDb2RlIjoiZGlzY2xvc3VyZSJ9LCJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIjp7ImFsbG93ZWQiOiJ0cnVlIiwibGVnYWxCYXNpc0NvZGUiOiJkaXNjbG9zdXJlIn0sImZ1bmN0aW9uYWwiOnsiYWxsb3dlZCI6InRydWUiLCJsZWdhbEJhc2lzQ29kZSI6ImRpc2Nsb3N1cmUifSwicmVxdWlyZWQiOnsiYWxsb3dlZCI6InRydWUiLCJsZWdhbEJhc2lzQ29kZSI6ImRpc2Nsb3N1cmUifX0sImNvbGxlY3RlZEF0IjoxNzEwODU0MzYxfQ%3D%3D; _li_ss=CiEKBQgGEL0XCgUIDBC6FwoGCKIBEL0XCgkI_____wcQuhcSHw3e-4qqEhgKBgiUARC7FwoGCJMBELsXCgYIyQEQvRcSFw3Jv7ToEhAKBgiTARC7FwoGCMkBEL0X; _li_ss_meta={%22w%22:1710854361715%2C%22e%22:1713446361715}; __gads=ID=f61f6a0dff46e0a7:T=1709753492:RT=1710854361:S=ALNI_MbXIwWsL73m0OPojKpMgyldxhRJSQ; __gpi=UID=00000dcfd19a1b09:T=1709753492:RT=1710854361:S=ALNI_MYJqyt3jR_3opCN-HAjr61dfYPa9A; __eoi=ID=0f1ea2bfce4cc1d9:T=1709753492:RT=1710854361:S=AA-AfjZKWnnvNyEdJgxLbYqmXKAW; FCNEC=%5B%5B%22AKsRol8W6qsGPWSU3vRi8RFqAM_5i9Cb27RfkXDBy_uu8_KnG2d5dTq5lr5W3psBFyKYQiJycJbxDEKz_7A0ISBasESitraooPIzE9Q6HLZfQmJprC8GlcgQUj8jyng3DAFzF6MCniXygV2L22IaYtN05Hd-wKzSog%3D%3D%22%5D%5D; ki_t=1709753495187%3B1710854362824%3B1710854362824%3B2%3B4; _chartbeat2=.1709753495366.1710854362950.10000000000001.BjQqKgCE_wS-BsnVV_DFQQxuGRWer.1; _cb_svref=external; QSI_HistorySession=https%3A%2F%2Fwww.forbes.com%2Ftop-colleges%2F~1710854363133; cto_bundle=u40uBl81YnYxZXlhZUxxNnhab1J2WXdjaVU4N1VaaWNwNDJTZCUyRjhLcjVLT0x0SjFXbTE5NEpONjRUSWE5TGZsZ2IzdWlYdmVSVG4lMkJiNDhUT3FBZ3FPc3hwdVhpbiUyQjRubmRLNW96dnFDajNncG9iZ3dGNnFGWUw3ZkJmVjdMNmgxSnZjUmt5aSUyQnVXa0s0T0dzOFk1eXkyNGwwUSUzRCUzRA; _chartbeat4=t=Bmm0V4CKeu9dBYQ-hhDemjcsCMm0H7&E=5&x=3265&c=0.67&y=24046&w=992",
    dnt = "1",
    `user-agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
  ) |> 
  req_perform()
Code
req
<httr2_response>
GET
https://www.forbes.com/forbesapi/org/top-colleges/2023/position/true.json?limit=1000&fields=organizationName%2Cacademics%2Cstate%2CfinancialAid%2Crank%2CmedianBaseSalary%2CcampusSetting%2CstudentPopulation%2CsquareImage%2Curi%2Cdescription%2Cgrade
Status: 200 OK
Content-Type: application/json
Body: In memory (1790187 bytes)
Code
df <- tibble(json = resp_body_json(req)) |> 
  unnest_wider(json) |> 
  select(organizationsLists) |> 
  unnest_longer(organizationsLists) |> 
  unnest_wider(organizationsLists)


df   
# A tibble: 500 × 12
   uri    rank description organizationName state studentPopulation academics   
   <chr> <int> <chr>       <chr>            <chr>             <int> <list>      
 1 prin…     1 "Princeton… Princeton Unive… NJ                 8478 <named list>
 2 yale…     2 "Yale Univ… Yale University  CT                14567 <named list>
 3 stan…     3 "The San F… Stanford Univer… CA                17680 <named list>
 4 mass…     4 "The Massa… Massachusetts I… MA                11934 <named list>
 5 univ…     5 "UC Berkel… University of C… CA                45036 <named list>
 6 colu…     6 "Columbia … Columbia Univer… NY                33776 <named list>
 7 univ…     7 "The Unive… University of C… CA                46116 <named list>
 8 univ…     8 "The Unive… University of P… PA                28038 <named list>
 9 harv…     9 "Harvard U… Harvard Univers… MA                31345 <named list>
10 will…    10 "Williams … Williams College MA                 2224 <named list>
# ℹ 490 more rows
# ℹ 5 more variables: financialAid <list>, campusSetting <chr>, grade <chr>,
#   medianBaseSalary <dbl>, squareImage <chr>

And we’re ready to go.