Run CSV generator
This commit is contained in:
parent
5e9e464ad0
commit
d0f2f969d2
|
@ -59,6 +59,11 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
needs: scrape-page
|
||||
steps:
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Checkout data repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
|
@ -66,11 +71,6 @@ jobs:
|
|||
path: data
|
||||
token: ${{ secrets.PUSH_TOKEN }}
|
||||
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
|
@ -87,15 +87,117 @@ jobs:
|
|||
run: |
|
||||
cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json
|
||||
|
||||
- name: Convert to by-courses format
|
||||
run: |
|
||||
python scrapers/transfer_scraper/convert_json.py data/transfer.json data/transfer_by_course.json
|
||||
|
||||
- name: Commit data
|
||||
working-directory: data
|
||||
run: |
|
||||
git config user.name "Quatalog Updater"
|
||||
git config user.email "github_actions@quatalog.com"
|
||||
git add transfer*.json
|
||||
git add transfer.json
|
||||
git commit -m "$(date)" || exit 0
|
||||
git push
|
||||
|
||||
- name:
|
||||
name: Scrape transfer and update file
|
||||
run-name: Scrape transfer and update file
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 10 * * *'
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Get number of pages and set up scrape page jobs
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Create matrix parameters
|
||||
id: matrix-params
|
||||
run: |
|
||||
NUM_PAGES="$(curl -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' 'https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce' | grep -e 'lblInstWithEQPaginationInfo' | grep -Poie '(?<=of )[0-9]*')"
|
||||
MATRIX_PARAMS="$(seq -s "," 1 "$NUM_PAGES")"
|
||||
MATRIX_PARAMS="\"page\": $(sed -e 's/,/}, {"page": /g' <<< "$MATRIX_PARAMS")"
|
||||
echo "matrix-params={\"include\": [{"$MATRIX_PARAMS"}]}" | tee $GITHUB_OUTPUT
|
||||
outputs:
|
||||
matrix-params: ${{ steps.matrix-params.outputs.matrix-params }}
|
||||
|
||||
scrape-page:
|
||||
name: Scrape page
|
||||
runs-on: ubuntu-latest
|
||||
needs: setup
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
|
||||
fail-fast: true
|
||||
steps:
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r 'requirements.txt'
|
||||
|
||||
- name: Run scraper
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
|
||||
|
||||
- name: Upload data to artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: transfer-page-${{ matrix.page }}
|
||||
path: transfer_scraper/transfer_${{ matrix.page }}.json
|
||||
|
||||
commit-data:
|
||||
name: Combine/convert and commit data
|
||||
runs-on: ubuntu-latest
|
||||
needs: scrape-page
|
||||
steps:
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Checkout data repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: quatalog/data
|
||||
path: data
|
||||
token: ${{ secrets.PUSH_TOKEN }}
|
||||
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: scrapers
|
||||
|
||||
- name: Download partial JSONs
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: transfer-page-*
|
||||
merge-multiple: true
|
||||
path: new-data
|
||||
|
||||
- name: Combine JSONs
|
||||
run: |
|
||||
cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json
|
||||
|
||||
- name: Commit data
|
||||
working-directory: data
|
||||
run: |
|
||||
git config user.name "Quatalog Updater"
|
||||
git config user.email "github_actions@quatalog.com"
|
||||
git add transfer.json
|
||||
git commit -m "$(date)" || exit 0
|
||||
git push
|
||||
|
||||
- name: Run CSV generator
|
||||
run: |
|
||||
curl -H "Accept: application/vnd.github.everest-preview+json" \
|
||||
-H "Authorization: token ${{ secrets.PUSH_TOKEN }}" \
|
||||
--request POST --data '{"event_type": "generate-csv"}' \
|
||||
"https://api.github.com/repos/quatalog/quatalog/dispatches"
|
||||
|
|
Loading…
Reference in New Issue