Compare commits

..

No commits in common. "517952f977c697c2a78443d50785c5c8d4d96e11" and "b017436be97c7155134e95d89e1b7003746833fc" have entirely different histories.

View file

@ -93,11 +93,6 @@ def scrape_page(page_num):
for i in range(1, 4): for i in range(1, 4):
try: try:
driver = webdriver.Firefox(options=options) driver = webdriver.Firefox(options=options)
driver.get("https://ipinfo.io/ip")
print(
f'Trying with IP {driver.find_element(By.TAG_NAME, "body").text}',
file=sys.stderr,
)
driver.get( driver.get(
"https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce" "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce"
) )
@ -107,14 +102,13 @@ def scrape_page(page_num):
break break
except Exception as e: except Exception as e:
driver.quit() driver.quit()
print( print(
f"Attempt {i} failed due to {type(e).__name__}, retrying in 25 seconds...", f"Attempt {i} failed due to {type(e).__name__}, retrying in 25 seconds...",
file=sys.stderr, file=sys.stderr,
) )
sleep(25) sleep(25)
else: else:
raise Exception(f"Failed to load the main page after 4 attempts, aborting.") raise Exception(f"Failed to load the main page after 15 attempts, aborting.")
num_institutions = len( num_institutions = len(
driver.find_elements( driver.find_elements(
@ -139,7 +133,7 @@ def scrape_institution_safe(index, page_num):
) )
sleep(25) sleep(25)
else: else:
raise Exception(f"Failed to scrape {index} after 4 attempts, aborting.") raise Exception(f"Failed to scrape {index} after 15 attempts, aborting.")
# scrape_institution: Scrapes an institution by index. # scrape_institution: Scrapes an institution by index.
@ -187,7 +181,7 @@ def scrape_institution(index, page_num):
"institution": inst_name, "institution": inst_name,
"city": inst_city, "city": inst_city,
"state": inst_state, "state": inst_state,
"transfers": [], "courses": [],
} }
# Open list # Open list
@ -218,7 +212,7 @@ def scrape_institution(index, page_num):
"institution": inst_name, "institution": inst_name,
"city": inst_city, "city": inst_city,
"state": inst_state, "state": inst_state,
"transfers": transfer_courses, "courses": transfer_courses,
} }
@ -237,10 +231,7 @@ def parse_course_td(td, include_credits):
for x in td_text[: len(td_text) - 3] for x in td_text[: len(td_text) - 3]
] ]
return { return [parse_one_course(x, include_credits) for x in courses_info]
"catalog": td.find_element(By.TAG_NAME, "span").text.strip(),
"courses": [parse_one_course(x, include_credits) for x in courses_info],
}
def parse_one_course(course_info, include_credits): def parse_one_course(course_info, include_credits):