quatalog-scraper/.github/workflows/scraper.yml

153 lines
4.4 KiB
YAML
Raw Normal View History

2023-02-10 04:01:58 +00:00
name: Update data and generate static site
2023-02-07 15:17:57 +00:00
on:
workflow_dispatch:
schedule:
- cron: '0 * * * *'
jobs:
scrape-data:
2023-02-07 17:38:41 +00:00
name: Scrape data
2023-02-07 15:17:57 +00:00
runs-on: ubuntu-latest
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
2023-02-07 15:23:05 +00:00
with:
path: quatalog-scraping
2023-02-07 15:17:57 +00:00
- name: Clone QuACS data
uses: actions/checkout@v3
with:
repository: quacs/quacs-data
path: quacs-data
- name: Run scraper
run: |
# Usage: CourseOfferingsScraper <data_directory> <terms_offered_file> <prerequisites_file> <list_of_terms_file>
2023-02-10 03:59:40 +00:00
mkdir new-data
2023-02-07 15:27:38 +00:00
quatalog-scraping/bin/CourseOfferingsScraper \
quacs-data/semester_data \
2023-02-10 03:59:40 +00:00
new-data/terms_offered.json \
new-data/prerequisites.json \
new-data/terms_list.json
2023-02-07 15:17:57 +00:00
2023-02-07 15:31:24 +00:00
- name: Copy catalog.json from QuACS data
2023-02-07 15:17:57 +00:00
run: |
2023-02-10 03:59:40 +00:00
CURRENT_TERM="$(grep new-data/terms_list.json -e 'current_term' | cut -f4 -d '"')"
rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" new-data/catalog.json
2023-02-10 03:12:25 +00:00
2023-02-10 03:06:25 +00:00
- name: Upload data to artifact
uses: actions/upload-artifact@v3
with:
2023-02-10 03:59:40 +00:00
name: new-data
path: new-data/
2023-02-10 03:06:25 +00:00
push-new-data:
2023-02-10 03:08:32 +00:00
name: Push new data to data repo
2023-02-10 03:06:25 +00:00
runs-on: ubuntu-latest
needs: [scrape-data]
steps:
2023-02-10 03:33:35 +00:00
- name: Clone Quatalog data
uses: actions/checkout@v3
with:
repository: quatalog/data
path: quatalog-data
token: ${{ secrets.PUSH_TOKEN }}
2023-02-10 03:06:25 +00:00
- name: Download data from artifact
uses: actions/download-artifact@v3
with:
2023-02-10 03:59:40 +00:00
name: new-data
path: new-data
2023-02-10 03:33:35 +00:00
- name: Copy data to repo directory
run: |
2023-02-10 03:59:40 +00:00
rsync -avz new-data/ quatalog-data/
2023-02-10 03:06:25 +00:00
2023-02-07 15:17:57 +00:00
- name: Push new data
2023-02-10 03:59:40 +00:00
working-directory: quatalog-data
2023-02-07 15:17:57 +00:00
run: |
2023-02-07 15:21:11 +00:00
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
2023-02-07 15:17:57 +00:00
git add terms_offered.json prerequisites.json terms_list.json catalog.json
git commit -m "$(date)" || exit 0
git push
2023-02-10 03:59:40 +00:00
generate-site:
name: Generate the static site
runs-on: ubuntu-latest
needs: [scrape-data]
steps:
- name: Checkout scraping repo
uses: actions/checkout@v3
with:
path: quatalog-scraping
- name: Clone Quatalog static site
uses: actions/checkout@v3
with:
repository: quatalog/site
ref: static-generated
path: quatalog-site
token: ${{ secrets.PUSH_TOKEN }}
- name: Download data from artifact
uses: actions/download-artifact@v3
with:
name: new-data
path: new-data
- name: Run the HTML generator
run: |
# Usage: GenerateHtml <terms_offered_file> <prerequisites_file> <list_of_terms_file> <catalog_file> <out_directory>
2023-02-10 04:01:27 +00:00
quatalog-scraping/bin/GenerateHtml \
2023-02-10 03:59:40 +00:00
new-data/terms_offered.json \
new-data/prerequisites.json \
new-data/terms_list.json \
new-data/catalog.json \
courses/
- name: Merge data
run: |
rsync -avz courses/ quatalog-site/courses/
- name: Push generated HTML
working-directory: quatalog-site
run: |
git config --global user.name "Quatalog Updater"
git config --global user.email "github_actions@quatalog.com"
git add courses
git commit -m "$(date)" || exit 0
git push
2023-02-10 05:33:40 +00:00
deploy-static-site:
name: Deploy static site to Github Pages
runs-on: ubuntu-latest
needs: [generate-site]
permissions:
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: true
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Checkout static-generated branch
uses: actions/checkout@v3
with:
repository: quatalog/site
ref: static-generated
- name: Setup Pages
uses: actions/configure-pages@v3
- name: Upload artifact
uses: actions/upload-pages-artifact@v1
with:
path: '.'
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v1