From 517952f977c697c2a78443d50785c5c8d4d96e11 Mon Sep 17 00:00:00 2001 From: powe97 <116031952+powe97@users.noreply.github.com> Date: Wed, 13 Mar 2024 23:49:57 -0400 Subject: [PATCH] Re-add catalog name scraping --- transfer_scraper/main.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/transfer_scraper/main.py b/transfer_scraper/main.py index a43363b..3c1795c 100644 --- a/transfer_scraper/main.py +++ b/transfer_scraper/main.py @@ -94,7 +94,10 @@ def scrape_page(page_num): try: driver = webdriver.Firefox(options=options) driver.get("https://ipinfo.io/ip") - print(f"Trying with IP {driver.page_source}", file=sys.stderr) + print( + f'Trying with IP {driver.find_element(By.TAG_NAME, "body").text}', + file=sys.stderr, + ) driver.get( "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce" ) @@ -184,7 +187,7 @@ def scrape_institution(index, page_num): "institution": inst_name, "city": inst_city, "state": inst_state, - "courses": [], + "transfers": [], } # Open list @@ -215,7 +218,7 @@ def scrape_institution(index, page_num): "institution": inst_name, "city": inst_city, "state": inst_state, - "courses": transfer_courses, + "transfers": transfer_courses, } @@ -234,7 +237,10 @@ def parse_course_td(td, include_credits): for x in td_text[: len(td_text) - 3] ] - return [parse_one_course(x, include_credits) for x in courses_info] + return { + "catalog": td.find_element(By.TAG_NAME, "span").text.strip(), + "courses": [parse_one_course(x, include_credits) for x in courses_info], + } def parse_one_course(course_info, include_credits):