Compare commits
5 Commits
f7c5c2461a
...
99ba8873bd
Author | SHA1 | Date |
---|---|---|
powe97 | 99ba8873bd | |
powe97 | eb77e84535 | |
powe97 | bdc6b2bcbc | |
powe97 | ecfb176c46 | |
powe97 | ea09d33ac2 |
|
@ -0,0 +1,38 @@
|
|||
name: Scrape single page of transfer guide
|
||||
run-name: Scrape single page of transfer guide
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
page:
|
||||
required: true
|
||||
type: number
|
||||
|
||||
jobs:
|
||||
scrape-page:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r 'requirements.txt'
|
||||
|
||||
- name: Run scraper
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python3 scrape_page.py ${{ inputs.page }} transfer_${{ inputs.page }}.json
|
||||
|
||||
- name: Upload data to artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: transfer-page-${{ inputs.page }}
|
||||
path: transfer_scraper/transfer_${{ inputs.page }}.json
|
|
@ -3,7 +3,7 @@ run-name: Scrape transfer and update file
|
|||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 10 * * *'
|
||||
- cron: '0 10,22 * * *'
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
|
@ -22,37 +22,13 @@ jobs:
|
|||
|
||||
scrape-page:
|
||||
name: Scrape page
|
||||
runs-on: ubuntu-latest
|
||||
needs: setup
|
||||
uses: ./.github/workflows/scrape_page.yml
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
|
||||
fail-fast: true
|
||||
steps:
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r 'requirements.txt'
|
||||
|
||||
- name: Run scraper
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
|
||||
|
||||
- name: Upload data to artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: transfer-page-${{ matrix.page }}
|
||||
path: transfer_scraper/transfer_${{ matrix.page }}.json
|
||||
with:
|
||||
page: ${{ matrix.page }}
|
||||
|
||||
commit-data:
|
||||
name: Combine/convert and commit data
|
||||
|
|
|
@ -16,6 +16,10 @@ from selenium.common.exceptions import (
|
|||
)
|
||||
|
||||
|
||||
class IPBanException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Fix course titles accounting for Roman numerals up to X
|
||||
def normalize_title(input):
|
||||
s = " ".join(input.split())
|
||||
|
@ -103,8 +107,14 @@ def scrape_page(page_num):
|
|||
)
|
||||
wait(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
||||
print(f'Title: "{driver.title}"', file=sys.stderr)
|
||||
if driver.title == "403 Forbidden":
|
||||
raise IPBanException
|
||||
jump_to_page(1, page_num, "gdvInstWithEQ", "lblInstWithEQPaginationInfo")
|
||||
break
|
||||
except IPBanException as e:
|
||||
driver.quit()
|
||||
print(f"We are IP-banned, exiting now", file=sys.stderr)
|
||||
raise e
|
||||
except Exception as e:
|
||||
driver.quit()
|
||||
|
||||
|
|
Loading…
Reference in New Issue