quatalog-scraper/.github/workflows/scraper.yml

121 lines
3.6 KiB
YAML

name: Update data and generate static site
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *'
jobs:
scrape-data:
name: Scrape data
runs-on: ubuntu-latest
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
with:
path: quatalog-scraping
- name: Clone QuACS data
uses: actions/checkout@v3
with:
repository: quacs/quacs-data
path: quacs-data
- name: Run scraper
run: |
# Usage: CourseOfferingsScraper <data_directory> <terms_offered_file> <prerequisites_file> <list_of_terms_file>
mkdir new-data
quatalog-scraping/bin/CourseOfferingsScraper \
quacs-data/semester_data \
new-data/terms_offered.json \
new-data/prerequisites.json \
new-data/terms_list.json
- name: Copy catalog.json from QuACS data
run: |
CURRENT_TERM="$(grep new-data/terms_list.json -e 'current_term' | cut -f4 -d '"')"
rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" new-data/catalog.json
- name: Upload data to artifact
uses: actions/upload-artifact@v3
with:
name: new-data
path: new-data/
push-new-data:
name: Push new data to data repo
runs-on: ubuntu-latest
needs: [scrape-data]
steps:
- name: Clone Quatalog data
uses: actions/checkout@v3
with:
repository: quatalog/data
path: quatalog-data
token: ${{ secrets.PUSH_TOKEN }}
- name: Download data from artifact
uses: actions/download-artifact@v3
with:
name: new-data
path: new-data
- name: Copy data to repo directory
run: |
rsync -avz new-data/ quatalog-data/
- name: Push new data
working-directory: quatalog-data
run: |
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
git add terms_offered.json prerequisites.json terms_list.json catalog.json
git commit -m "$(date)" || exit 0
git push
generate-site:
name: Generate the static site
runs-on: ubuntu-latest
needs: [scrape-data]
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
with:
path: quatalog-scraping
- name: Clone Quatalog static site
uses: actions/checkout@v3
with:
repository: quatalog/site
ref: static-generated
path: quatalog-site
token: ${{ secrets.PUSH_TOKEN }}
- name: Download data from artifact
uses: actions/download-artifact@v3
with:
name: new-data
path: new-data
- name: Run the HTML generator
run: |
# Usage: GenerateHtml <terms_offered_file> <prerequisites_file> <list_of_terms_file> <catalog_file> <out_directory>
quatalog-scraping/bin/GenerateHtml \
new-data/terms_offered.json \
new-data/prerequisites.json \
new-data/terms_list.json \
new-data/catalog.json \
courses/
- name: Merge data
run: |
rsync -avz courses/ quatalog-site/courses/
- name: Push generated HTML
working-directory: quatalog-site
run: |
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
git add courses
git commit -m "$(date)" || exit 0
git push