quatalog-scraper/.github/workflows/scraper.yml

56 lines
1.7 KiB
YAML
Raw Normal View History

2023-02-07 15:17:57 +00:00
name: Update data
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *'
jobs:
scrape-data:
2023-02-07 17:38:41 +00:00
name: Scrape data
2023-02-07 15:17:57 +00:00
runs-on: ubuntu-latest
2023-02-07 16:12:13 +00:00
permissions:
contents: write
2023-02-07 15:17:57 +00:00
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
2023-02-07 15:23:05 +00:00
with:
path: quatalog-scraping
2023-02-07 15:17:57 +00:00
- name: Clone QuACS data
uses: actions/checkout@v3
with:
repository: quacs/quacs-data
path: quacs-data
- name: Clone Quatalog data
uses: actions/checkout@v3
with:
repository: quatalog/quatalog-data
path: quatalog-data
2023-02-07 17:30:32 +00:00
token: ${{ secrets.PUSH_TOKEN }}
2023-02-07 17:38:41 +00:00
2023-02-07 15:17:57 +00:00
- name: Run scraper
run: |
# Usage: CourseOfferingsScraper <data_directory> <terms_offered_file> <prerequisites_file> <list_of_terms_file>
2023-02-07 15:27:38 +00:00
quatalog-scraping/bin/CourseOfferingsScraper \
quacs-data/semester_data \
quatalog-data/terms_offered.json \
quatalog-data/prerequisites.json \
quatalog-data/terms_list.json
2023-02-07 15:17:57 +00:00
2023-02-07 15:31:24 +00:00
- name: Copy catalog.json from QuACS data
2023-02-07 15:17:57 +00:00
run: |
2023-02-07 15:29:34 +00:00
CURRENT_TERM="$(grep quatalog-data/terms_list.json -e 'current_term' | cut -f4 -d '"')"
2023-02-07 16:12:13 +00:00
rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" quatalog-data/catalog.json
2023-02-07 15:17:57 +00:00
- name: Push new data
2023-02-07 16:12:13 +00:00
working-directory: ./quatalog-data
2023-02-07 15:17:57 +00:00
run: |
2023-02-07 15:21:11 +00:00
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
2023-02-07 15:17:57 +00:00
git add terms_offered.json prerequisites.json terms_list.json catalog.json
git commit -m "$(date)" || exit 0
git push