Add if __name__ == "__main__" and fix workflow

This commit is contained in:
powe97 2024-02-29 20:49:45 -05:00
parent 15b09123ee
commit f216c45748
No known key found for this signature in database
GPG key ID: 7D1663B10978D1BA
2 changed files with 221 additions and 205 deletions

View file

@ -29,14 +29,17 @@ jobs:
- name: Install dependencies - name: Install dependencies
working-directory: quatalog-scraping/transfer_scraper working-directory: quatalog-scraping/transfer_scraper
run: pip install -r 'requirements.txt' run: |
python -m pip install --upgrade pip
pip install -r 'requirements.txt'
- name: Log IP - name: Log IP
run: | run: |
echo "Public IP: $(curl -s 'https://ipinfo.io/ip')" echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"
- name: Scrape transfer guide - name: Scrape transfer guide
run: python3 quatalog-scraping/transfer_scraper data/transfer.json data/transfer_state.json run: |
python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json
- name: Upload data to artifact - name: Upload data to artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4

View file

@ -100,6 +100,7 @@ def scrape_course_card(html_id, i, note):
} }
def main():
if len(sys.argv) != 3: if len(sys.argv) != 3:
print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>") print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>")
exit(1) exit(1)
@ -136,12 +137,10 @@ print("Loaded state: ", end="", file=sys.stderr)
json.dump(state, sys.stderr, indent=4) json.dump(state, sys.stderr, indent=4)
print("", file=sys.stderr) print("", file=sys.stderr)
# Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C # Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C
signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt)) signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
alarm(60 * 60 * 2) alarm(60 * 60 * 2)
try: try:
curr_page = 1 curr_page = 1
while state["inst_pg"] <= num_pages: while state["inst_pg"] <= num_pages:
@ -182,9 +181,13 @@ try:
) )
) )
while state["inst_idx"] < inst_list_len: while state["inst_idx"] < inst_list_len:
institution_link = driver.find_element("id", "gdvInstWithEQ").find_elements( institution_link = driver.find_element(
"id", "gdvInstWithEQ"
).find_elements(
By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]" By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
)[state["inst_idx"]] )[
state["inst_idx"]
]
fields = institution_link.find_element(By.XPATH, "../..").find_elements( fields = institution_link.find_element(By.XPATH, "../..").find_elements(
By.CSS_SELECTOR, ".gdv_boundfield_uppercase" By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
) )
@ -239,7 +242,9 @@ try:
len( len(
driver.find_element( driver.find_element(
"id", "lblSendCourseEQDetail" "id", "lblSendCourseEQDetail"
).find_elements(By.CSS_SELECTOR, ".course-detail") ).find_elements(
By.CSS_SELECTOR, ".course-detail"
)
), ),
) )
] ]
@ -251,7 +256,9 @@ try:
len( len(
driver.find_element( driver.find_element(
"id", "lblReceiveCourseEQDetail" "id", "lblReceiveCourseEQDetail"
).find_elements(By.CSS_SELECTOR, ".course-detail") ).find_elements(
By.CSS_SELECTOR, ".course-detail"
)
), ),
) )
] ]
@ -264,7 +271,9 @@ try:
begin_date = driver.find_element( begin_date = driver.find_element(
"id", "lblBeginEffectiveDate" "id", "lblBeginEffectiveDate"
).text ).text
end_date = driver.find_element("id", "lblEndEffectiveDate").text end_date = driver.find_element(
"id", "lblEndEffectiveDate"
).text
driver.find_element( driver.find_element(
By.CSS_SELECTOR, ".modal-header button" By.CSS_SELECTOR, ".modal-header button"
@ -319,3 +328,7 @@ with open(transfer_json_path, "w") as transferjson:
with open(state_json_path, "w") as statejson: with open(state_json_path, "w") as statejson:
json.dump(state, statejson, indent=4) json.dump(state, statejson, indent=4)
driver.quit() driver.quit()
if __name__ == "__main__":
main()