Compare commits
5 Commits
f7c5c2461a
...
99ba8873bd
Author | SHA1 | Date |
---|---|---|
|
99ba8873bd | |
|
eb77e84535 | |
|
bdc6b2bcbc | |
|
ecfb176c46 | |
|
ea09d33ac2 |
|
@ -0,0 +1,38 @@
|
||||||
|
name: Scrape single page of transfer guide
|
||||||
|
run-name: Scrape single page of transfer guide
|
||||||
|
on:
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
page:
|
||||||
|
required: true
|
||||||
|
type: number
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
scrape-page:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout scraping repo
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
cache: 'pip'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
working-directory: transfer_scraper
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r 'requirements.txt'
|
||||||
|
|
||||||
|
- name: Run scraper
|
||||||
|
working-directory: transfer_scraper
|
||||||
|
run: |
|
||||||
|
python3 scrape_page.py ${{ inputs.page }} transfer_${{ inputs.page }}.json
|
||||||
|
|
||||||
|
- name: Upload data to artifact
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: transfer-page-${{ inputs.page }}
|
||||||
|
path: transfer_scraper/transfer_${{ inputs.page }}.json
|
|
@ -3,7 +3,7 @@ run-name: Scrape transfer and update file
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 10 * * *'
|
- cron: '0 10,22 * * *'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
setup:
|
setup:
|
||||||
|
@ -22,37 +22,13 @@ jobs:
|
||||||
|
|
||||||
scrape-page:
|
scrape-page:
|
||||||
name: Scrape page
|
name: Scrape page
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: setup
|
needs: setup
|
||||||
|
uses: ./.github/workflows/scrape_page.yml
|
||||||
strategy:
|
strategy:
|
||||||
matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
|
matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
steps:
|
|
||||||
- name: Checkout scraping repo
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
page: ${{ matrix.page }}
|
||||||
cache: 'pip'
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
working-directory: transfer_scraper
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r 'requirements.txt'
|
|
||||||
|
|
||||||
- name: Run scraper
|
|
||||||
working-directory: transfer_scraper
|
|
||||||
run: |
|
|
||||||
python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
|
|
||||||
|
|
||||||
- name: Upload data to artifact
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: transfer-page-${{ matrix.page }}
|
|
||||||
path: transfer_scraper/transfer_${{ matrix.page }}.json
|
|
||||||
|
|
||||||
commit-data:
|
commit-data:
|
||||||
name: Combine/convert and commit data
|
name: Combine/convert and commit data
|
||||||
|
|
|
@ -16,6 +16,10 @@ from selenium.common.exceptions import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IPBanException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Fix course titles accounting for Roman numerals up to X
|
# Fix course titles accounting for Roman numerals up to X
|
||||||
def normalize_title(input):
|
def normalize_title(input):
|
||||||
s = " ".join(input.split())
|
s = " ".join(input.split())
|
||||||
|
@ -103,8 +107,14 @@ def scrape_page(page_num):
|
||||||
)
|
)
|
||||||
wait(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
wait(EC.visibility_of_element_located((By.TAG_NAME, "body")))
|
||||||
print(f'Title: "{driver.title}"', file=sys.stderr)
|
print(f'Title: "{driver.title}"', file=sys.stderr)
|
||||||
|
if driver.title == "403 Forbidden":
|
||||||
|
raise IPBanException
|
||||||
jump_to_page(1, page_num, "gdvInstWithEQ", "lblInstWithEQPaginationInfo")
|
jump_to_page(1, page_num, "gdvInstWithEQ", "lblInstWithEQPaginationInfo")
|
||||||
break
|
break
|
||||||
|
except IPBanException as e:
|
||||||
|
driver.quit()
|
||||||
|
print(f"We are IP-banned, exiting now", file=sys.stderr)
|
||||||
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue