quatalog-scraper/.github/workflows/transfer.yml

53 lines
1.8 KiB
YAML
Raw Normal View History

2024-02-29 22:16:39 +00:00
name: Scrape transfer and update file
run-name: Scrape transfer and update file
2024-02-29 22:16:39 +00:00
on:
workflow_dispatch:
jobs:
2024-03-05 23:42:05 +00:00
setup:
name: Get number of pages and set up scrape page jobs
2024-02-29 22:16:39 +00:00
runs-on: ubuntu-latest
steps:
2024-03-05 23:27:51 +00:00
- name: Create matrix parameters
id: matrix-params
run: |
NUM_PAGES="$(curl -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' 'https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce' | grep -e 'lblInstWithEQPaginationInfo' | grep -Poie '(?<=of )[0-9]*')"
MATRIX_PARAMS="$(seq -s "," 1 "$NUM_PAGES")"
MATRIX_PARAMS="\"page\": $(sed -e 's/,/}, {"page": /g' <<< "$MATRIX_PARAMS")"
2024-03-05 23:46:02 +00:00
echo "matrix-params={\"include\": [{"$MATRIX_PARAMS"}]}" | tee $GITHUB_OUTPUT
2024-03-05 23:27:51 +00:00
outputs:
matrix-params: ${{ steps.matrix-params.outputs.matrix-params }}
2024-03-05 23:46:02 +00:00
scrape-page:
2024-03-05 23:42:05 +00:00
name: Scrape page
runs-on: ubuntu-latest
2024-03-05 23:42:05 +00:00
needs: setup
2024-03-05 23:27:51 +00:00
strategy:
2024-03-05 23:46:02 +00:00
matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
steps:
2024-03-05 23:42:05 +00:00
- name: Checkout scraping repo
uses: actions/checkout@v4
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
working-directory: transfer_scraper
run: |
python -m pip install --upgrade pip
pip install -r 'requirements.txt'
- name: Run scraper
working-directory: transfer_scraper
run: |
2024-03-06 00:05:41 +00:00
python3 main.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
2024-03-05 23:42:05 +00:00
- name: Upload data to artifact
uses: actions/upload-artifact@v4
with:
name: transfer-page-${{ matrix.page }}
path: transfer_scraper/transfer.json