Compare commits

...

5 Commits

Author SHA1 Message Date
powe97 9c374bf130
Fix timeout issue (again) 2024-03-06 13:20:09 -06:00
powe97 10360ff57c
Wait for table 2024-03-06 12:48:48 -06:00
powe97 3b239f9cef
Merge branch 'main' of https://github.com/quatalog/quatalog 2024-03-06 12:41:13 -06:00
powe97 de89a56808
Add more debug printing 2024-03-06 12:41:08 -06:00
powe97 45cfba68ac
Sort output JSON 2024-03-06 12:10:23 -06:00
2 changed files with 5 additions and 4 deletions

View File

@ -75,7 +75,7 @@ jobs:
- name: Combine JSONs
run: |
cat new-data/* | jq -s 'add' > data/transfer.json
cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json
- name: Commit data
working-directory: data

View File

@ -29,7 +29,7 @@ def wait(ec):
global driver
WebDriverWait(
driver, 40, ignored_exceptions=[StaleElementReferenceException]
driver, 35, ignored_exceptions=[StaleElementReferenceException]
).until(ec)
sleep(random.uniform(400, 1900) / 1000)
@ -44,6 +44,7 @@ def wait(ec):
def jump_to_page(curr_page, to_page, postback_type, pagination_type):
global driver
wait(EC.visibility_of_element_located((By.ID, postback_type)))
page = driver.find_element(By.ID, postback_type)
try:
num_pages = int(driver.find_element(By.ID, pagination_type).text.split()[-1])
@ -100,7 +101,7 @@ def scrape_page(page_num):
except Exception as e:
driver.quit()
print(
f"Attempt {i} failed to load page, retrying in 25 seconds...",
f"Attempt {i} failed due to {type(e).__name__}, retrying in 25 seconds...",
file=sys.stderr,
)
sleep(25)
@ -125,7 +126,7 @@ def scrape_institution_safe(index, page_num):
except Exception as e:
driver.quit()
print(
f"\tAttempt {i} failed due to {type(e).__name__}: {e}, retrying in 25 seconds...",
f"\tAttempt {i} failed due to {type(e).__name__}, retrying in 25 seconds...",
file=sys.stderr,
)
sleep(25)