── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
[conflicted] Will prefer dplyr::filter over any other package.
[conflicted] Will prefer dplyr::lag over any other package.
Hidden API
As we showed in class, one trick is to filter by Fetch/XHR and sort by size to find the json endpoint. Then extract the call with Copy as cURL, and convert it to valid R directly from the clipboard with httr2::curl_translate(). If you get errors, look to see if you can edit the cookie slightly by removing some of the offending fields (they are delimited with a ;). In this case I got
GET
https://www.forbes.com/forbesapi/org/top-colleges/2023/position/true.json?limit=1000&fields=organizationName%2Cacademics%2Cstate%2CfinancialAid%2Crank%2CmedianBaseSalary%2CcampusSetting%2CstudentPopulation%2CsquareImage%2Curi%2Cdescription%2Cgrade
# A tibble: 500 × 12
uri rank description organizationName state studentPopulation academics
<chr> <int> <chr> <chr> <chr> <int> <list>
1 prin… 1 "Princeton… Princeton Unive… NJ 8478 <named list>
2 yale… 2 "Yale Univ… Yale University CT 14567 <named list>
3 stan… 3 "The San F… Stanford Univer… CA 17680 <named list>
4 mass… 4 "The Massa… Massachusetts I… MA 11934 <named list>
5 univ… 5 "UC Berkel… University of C… CA 45036 <named list>
6 colu… 6 "Columbia … Columbia Univer… NY 33776 <named list>
7 univ… 7 "The Unive… University of C… CA 46116 <named list>
8 univ… 8 "The Unive… University of P… PA 28038 <named list>
9 harv… 9 "Harvard U… Harvard Univers… MA 31345 <named list>
10 will… 10 "Williams … Williams College MA 2224 <named list>
# ℹ 490 more rows
# ℹ 5 more variables: financialAid <list>, campusSetting <chr>, grade <chr>,
# medianBaseSalary <dbl>, squareImage <chr>
And we’re ready to go.
Source Code
---title: "Example 06: Extracting a hidden API"---## Setup```{r}#| label: "hidden-apis"library(here) # manage file pathslibrary(socviz) # data and some useful functionslibrary(tidyverse) # your friend and minelibrary(httr2)options(dplyr.summarise.inform =FALSE)library(conflicted)conflicts_prefer( dplyr::filter, dplyr::lag)```## Hidden APIAs we showed in class, one trick is to filter by Fetch/XHR and sort by size to find the json endpoint. Then extract the call with Copy as cURL, and convert it to valid R directly from the clipboard with `httr2::curl_translate()`. If you get errors, look to see if you can edit the cookie slightly by removing some of the offending fields (they are delimited with a `;`). In this case I got```{.r}Error: unexpected numeric constant in:"GWUw3ZkJmVjdMNmgxSnZjUmt5aSUyQnVXa0s0T0dzOFk1eXkyNGwwUSUzRCUzRA; cto_bidid=vG-xRF8xNEhRZEhiT0V4UzBwNmklMkZtRXJGTm0zR0lETzFCWU9TZk43TkdJTk9sM25RaiUyRmhscEgyZFdLcFdmVVElMkJYUCUyQk9TeDNSWFZTTEgya dnt = "1"```And cut out the `cto_bidid ...` component, and it worked.```{r}req <-request("https://www.forbes.com/forbesapi/org/top-colleges/2023/position/true.json") |>req_url_query(limit ="1000",fields ="organizationName,academics,state,financialAid,rank,medianBaseSalary,campusSetting,studentPopulation,squareImage,uri,description,grade", ) |>req_headers(authority ="www.forbes.com",accept ="*/*",`accept-language`="en-US,en;q=0.9",cookie ="VWO=79.600; client_id=6c054d90ce75cbaa5e03a1e918ddb782ee0; blaize_session=6af976bd-758b-4b9b-9c79-b40d9032b012; blaize_tracking_id=0afddb44-9f7e-4f71-b3ff-37a09761581f; _gcl_au=1.1.1646889643.1709753491; _lc2_fpi=1f8b889072fc--01hrakzr0n7jtr3jasdcwmbf6v; _lc2_fpi_meta={%22w%22:1709753491477}; _fbp=fb.1.1709753491525.277895163; __qca=P0-2050963919-1709753491424; usprivacy=1---; us_privacy=1---; _swb=90a388df-7630-472a-afa2-5985886ad544; _ketch_consent_v1_=eyJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIjp7InN0YXR1cyI6ImdyYW50ZWQiLCJjYW5vbmljYWxQdXJwb3NlcyI6WyJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIl19LCJhbmFseXRpY3MiOnsic3RhdHVzIjoiZ3JhbnRlZCIsImNhbm9uaWNhbFB1cnBvc2VzIjpbImFuYWx5dGljcyJdfSwiZnVuY3Rpb25hbCI6eyJzdGF0dXMiOiJncmFudGVkIiwiY2Fub25pY2FsUHVycG9zZXMiOlsicHJvZF9lbmhhbmNlbWVudCIsInBlcnNvbmFsaXphdGlvbiJdfSwicmVxdWlyZWQiOnsic3RhdHVzIjoiZ3JhbnRlZCIsImNhbm9uaWNhbFB1cnBvc2VzIjpbImVzc2VudGlhbF9zZXJ2aWNlcyJdfX0%3D; _cb=u8YuwittgF9Fnc6; AWSALB=Sw6oWLeI4yi62cG8rtiRRqA6dC0itJbybPFNSpg07bHk4DaXVcVz3PgA2wS2ajm9CMC5LOONAEMSxS/76u9SUJ1EKhk030enJCApczujT5sx/ID8F8lwOBTr4zKH; AWSALBCORS=Sw6oWLeI4yi62cG8rtiRRqA6dC0itJbybPFNSpg07bHk4DaXVcVz3PgA2wS2ajm9CMC5LOONAEMSxS/76u9SUJ1EKhk030enJCApczujT5sx/ID8F8lwOBTr4zKH; _hp2_id.657665248=%7B%22userId%22%3A%221117571212713455%22%2C%22pageviewId%22%3A%2289819520474943%22%2C%22sessionId%22%3A%222432193757053261%22%2C%22identity%22%3Anull%2C%22trackerVersion%22%3A%224.0%22%7D; _li_dcdm_c=.forbes.com; AMP_TOKEN=%24NOT_FOUND; _ga=GA1.2.1579195198.1709753491; _gid=GA1.2.2045193768.1710854361; _dc_gtm_UA-5883199-3=1; _gat_UA-5883199-3=1; BCSessionID=58243ab4-b378-4a63-ae39-14de20c3692e; rbzid=sHg5+m2mA3mFHNRQbee+ZgiR6LGO2wCeINEpnL7OZQSM4O259+ZPxFRdjnDWn970W2WYuPSNUG8b26ajlQDIirX5tB5s//CicZR8wnmlNEKeVNrW2A9ygTIj49dD0mUqFD/z7mSYAcdYRhU0prOS5T9V2riH0xl1bJOpA1ssspbbE82Izd2hrGecC19J2Qy2CcN0s4lv38eSNRFToun1bwwGQPWTT7GEbx3x2NMOk0s=; rbzsessionid=0f61f793ef1117bf7987e6ea591f6b6e; _hp2_ses_props.657665248=%7B%22ts%22%3A1710854361161%2C%22d%22%3A%22www.forbes.com%22%2C%22h%22%3A%22%2Ftop-colleges%2F%22%7D; _swb_consent_=eyJlbnZpcm9ubWVudENvZGUiOiJwcm9kdWN0aW9uIiwiaWRlbnRpdGllcyI6eyJfZ29vZ2xlQW5hbHl0aWNzQ2xpZW50SUQiOiJHQTEuMi4xNTc5MTk1MTk4LjE3MDk3NTM0OTEiLCJzd2Jfd2Vic2l0ZV9zbWFydF90YWciOiI5MGEzODhkZi03NjMwLTQ3MmEtYWZhMi01OTg1ODg2YWQ1NDQifSwianVyaXNkaWN0aW9uQ29kZSI6InVzX2dlbmVyYWwiLCJwcm9wZXJ0eUNvZGUiOiJ3ZWJzaXRlX3NtYXJ0X3RhZyIsInB1cnBvc2VzIjp7ImFuYWx5dGljcyI6eyJhbGxvd2VkIjoidHJ1ZSIsImxlZ2FsQmFzaXNDb2RlIjoiZGlzY2xvc3VyZSJ9LCJiZWhhdmlvcmFsX2FkdmVydGlzaW5nIjp7ImFsbG93ZWQiOiJ0cnVlIiwibGVnYWxCYXNpc0NvZGUiOiJkaXNjbG9zdXJlIn0sImZ1bmN0aW9uYWwiOnsiYWxsb3dlZCI6InRydWUiLCJsZWdhbEJhc2lzQ29kZSI6ImRpc2Nsb3N1cmUifSwicmVxdWlyZWQiOnsiYWxsb3dlZCI6InRydWUiLCJsZWdhbEJhc2lzQ29kZSI6ImRpc2Nsb3N1cmUifX0sImNvbGxlY3RlZEF0IjoxNzEwODU0MzYxfQ%3D%3D; _li_ss=CiEKBQgGEL0XCgUIDBC6FwoGCKIBEL0XCgkI_____wcQuhcSHw3e-4qqEhgKBgiUARC7FwoGCJMBELsXCgYIyQEQvRcSFw3Jv7ToEhAKBgiTARC7FwoGCMkBEL0X; _li_ss_meta={%22w%22:1710854361715%2C%22e%22:1713446361715}; __gads=ID=f61f6a0dff46e0a7:T=1709753492:RT=1710854361:S=ALNI_MbXIwWsL73m0OPojKpMgyldxhRJSQ; __gpi=UID=00000dcfd19a1b09:T=1709753492:RT=1710854361:S=ALNI_MYJqyt3jR_3opCN-HAjr61dfYPa9A; __eoi=ID=0f1ea2bfce4cc1d9:T=1709753492:RT=1710854361:S=AA-AfjZKWnnvNyEdJgxLbYqmXKAW; FCNEC=%5B%5B%22AKsRol8W6qsGPWSU3vRi8RFqAM_5i9Cb27RfkXDBy_uu8_KnG2d5dTq5lr5W3psBFyKYQiJycJbxDEKz_7A0ISBasESitraooPIzE9Q6HLZfQmJprC8GlcgQUj8jyng3DAFzF6MCniXygV2L22IaYtN05Hd-wKzSog%3D%3D%22%5D%5D; ki_t=1709753495187%3B1710854362824%3B1710854362824%3B2%3B4; _chartbeat2=.1709753495366.1710854362950.10000000000001.BjQqKgCE_wS-BsnVV_DFQQxuGRWer.1; _cb_svref=external; QSI_HistorySession=https%3A%2F%2Fwww.forbes.com%2Ftop-colleges%2F~1710854363133; cto_bundle=u40uBl81YnYxZXlhZUxxNnhab1J2WXdjaVU4N1VaaWNwNDJTZCUyRjhLcjVLT0x0SjFXbTE5NEpONjRUSWE5TGZsZ2IzdWlYdmVSVG4lMkJiNDhUT3FBZ3FPc3hwdVhpbiUyQjRubmRLNW96dnFDajNncG9iZ3dGNnFGWUw3ZkJmVjdMNmgxSnZjUmt5aSUyQnVXa0s0T0dzOFk1eXkyNGwwUSUzRCUzRA; _chartbeat4=t=Bmm0V4CKeu9dBYQ-hhDemjcsCMm0H7&E=5&x=3265&c=0.67&y=24046&w=992",dnt ="1",`user-agent`="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", ) |>req_perform()``````{r}req``````{r}df <-tibble(json =resp_body_json(req)) |>unnest_wider(json) |>select(organizationsLists) |>unnest_longer(organizationsLists) |>unnest_wider(organizationsLists)df ```And we're ready to go.