mirror of
https://github.com/quatalog/quatalog.git
synced 2024-11-08 08:04:24 +00:00
Add if __name__ == "__main__" and fix workflow
This commit is contained in:
parent
15b09123ee
commit
f216c45748
7
.github/workflows/transfer.yml
vendored
7
.github/workflows/transfer.yml
vendored
|
@ -29,14 +29,17 @@ jobs:
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
working-directory: quatalog-scraping/transfer_scraper
|
working-directory: quatalog-scraping/transfer_scraper
|
||||||
run: pip install -r 'requirements.txt'
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r 'requirements.txt'
|
||||||
|
|
||||||
- name: Log IP
|
- name: Log IP
|
||||||
run: |
|
run: |
|
||||||
echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"
|
echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"
|
||||||
|
|
||||||
- name: Scrape transfer guide
|
- name: Scrape transfer guide
|
||||||
run: python3 quatalog-scraping/transfer_scraper data/transfer.json data/transfer_state.json
|
run: |
|
||||||
|
python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json
|
||||||
|
|
||||||
- name: Upload data to artifact
|
- name: Upload data to artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
|
|
|
@ -100,6 +100,7 @@ def scrape_course_card(html_id, i, note):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 3:
|
||||||
print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>")
|
print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
@ -136,12 +137,10 @@ print("Loaded state: ", end="", file=sys.stderr)
|
||||||
json.dump(state, sys.stderr, indent=4)
|
json.dump(state, sys.stderr, indent=4)
|
||||||
print("", file=sys.stderr)
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
# Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C
|
# Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C
|
||||||
signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
|
signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
|
||||||
alarm(60 * 60 * 2)
|
alarm(60 * 60 * 2)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
curr_page = 1
|
curr_page = 1
|
||||||
while state["inst_pg"] <= num_pages:
|
while state["inst_pg"] <= num_pages:
|
||||||
|
@ -182,9 +181,13 @@ try:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
while state["inst_idx"] < inst_list_len:
|
while state["inst_idx"] < inst_list_len:
|
||||||
institution_link = driver.find_element("id", "gdvInstWithEQ").find_elements(
|
institution_link = driver.find_element(
|
||||||
|
"id", "gdvInstWithEQ"
|
||||||
|
).find_elements(
|
||||||
By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
|
By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
|
||||||
)[state["inst_idx"]]
|
)[
|
||||||
|
state["inst_idx"]
|
||||||
|
]
|
||||||
fields = institution_link.find_element(By.XPATH, "../..").find_elements(
|
fields = institution_link.find_element(By.XPATH, "../..").find_elements(
|
||||||
By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
|
By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
|
||||||
)
|
)
|
||||||
|
@ -239,7 +242,9 @@ try:
|
||||||
len(
|
len(
|
||||||
driver.find_element(
|
driver.find_element(
|
||||||
"id", "lblSendCourseEQDetail"
|
"id", "lblSendCourseEQDetail"
|
||||||
).find_elements(By.CSS_SELECTOR, ".course-detail")
|
).find_elements(
|
||||||
|
By.CSS_SELECTOR, ".course-detail"
|
||||||
|
)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -251,7 +256,9 @@ try:
|
||||||
len(
|
len(
|
||||||
driver.find_element(
|
driver.find_element(
|
||||||
"id", "lblReceiveCourseEQDetail"
|
"id", "lblReceiveCourseEQDetail"
|
||||||
).find_elements(By.CSS_SELECTOR, ".course-detail")
|
).find_elements(
|
||||||
|
By.CSS_SELECTOR, ".course-detail"
|
||||||
|
)
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -264,7 +271,9 @@ try:
|
||||||
begin_date = driver.find_element(
|
begin_date = driver.find_element(
|
||||||
"id", "lblBeginEffectiveDate"
|
"id", "lblBeginEffectiveDate"
|
||||||
).text
|
).text
|
||||||
end_date = driver.find_element("id", "lblEndEffectiveDate").text
|
end_date = driver.find_element(
|
||||||
|
"id", "lblEndEffectiveDate"
|
||||||
|
).text
|
||||||
|
|
||||||
driver.find_element(
|
driver.find_element(
|
||||||
By.CSS_SELECTOR, ".modal-header button"
|
By.CSS_SELECTOR, ".modal-header button"
|
||||||
|
@ -319,3 +328,7 @@ with open(transfer_json_path, "w") as transferjson:
|
||||||
with open(state_json_path, "w") as statejson:
|
with open(state_json_path, "w") as statejson:
|
||||||
json.dump(state, statejson, indent=4)
|
json.dump(state, statejson, indent=4)
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
Loading…
Reference in a new issue