quatalog-scraper/.github/workflows/scraper.yml

83 lines
2.3 KiB
YAML

name: Update data
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *'
jobs:
scrape-data:
name: Scrape data
runs-on: ubuntu-latest
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
with:
path: quatalog-scraping
- name: Clone QuACS data
uses: actions/checkout@v3
with:
repository: quacs/quacs-data
path: quacs-data
- name: Clone Quatalog data
uses: actions/checkout@v3
with:
repository: quatalog/data
path: quatalog-data
token: ${{ secrets.PUSH_TOKEN }}
- name: Run scraper
run: |
# Usage: CourseOfferingsScraper <data_directory> <terms_offered_file> <prerequisites_file> <list_of_terms_file>
quatalog-scraping/bin/CourseOfferingsScraper \
quacs-data/semester_data \
quatalog-data/terms_offered.json \
quatalog-data/prerequisites.json \
quatalog-data/terms_list.json
- name: Copy catalog.json from QuACS data
run: |
CURRENT_TERM="$(grep quatalog-data/terms_list.json -e 'current_term' | cut -f4 -d '"')"
rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" quatalog-data/catalog.json
- name: List files
run: |
ls -lsaR .
- name: Upload data to artifact
uses: actions/upload-artifact@v3
with:
name: quatalog-data
path: quatalog-data/
# generate-site:
# runs-on: ubuntu-latest
# needs: [scrape-data]
# steps:
# - name: Checkout scraping repo
# uses: actions/checkout@v3
# with:
# path: quatalog-scraping
push-new-data:
name: Push new data to data repo
runs-on: ubuntu-latest
needs: [scrape-data]
steps:
- name: Download data from artifact
uses: actions/download-artifact@v3
with:
name: quatalog-data
path: quatalog-data
- name: Push new data
working-directory: ./quatalog-data
run: |
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
git add terms_offered.json prerequisites.json terms_list.json catalog.json
git commit -m "$(date)" || exit 0
git push