diff --git a/.github/workflows/scraper.yml b/.github/workflows/scraper.yml new file mode 100644 index 0000000..5e1160d --- /dev/null +++ b/.github/workflows/scraper.yml @@ -0,0 +1,55 @@ +name: Update data +on: + workflow_dispatch: + schedule: + - cron: '0 * * * *' + +jobs: + scrape-data: + name: Scrape data + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout scraping repo + uses: actions/checkout@v3 + with: + path: quatalog-scraping + + - name: Clone QuACS data + uses: actions/checkout@v3 + with: + repository: quacs/quacs-data + path: quacs-data + + - name: Clone Quatalog data + uses: actions/checkout@v3 + with: + repository: quatalog/quatalog-data + path: quatalog-data + token: ${{ secrets.PUSH_TOKEN }} + + - name: Run scraper + run: | + # Usage: CourseOfferingsScraper + quatalog-scraping/bin/CourseOfferingsScraper \ + quacs-data/semester_data \ + quatalog-data/terms_offered.json \ + quatalog-data/prerequisites.json \ + quatalog-data/terms_list.json + + - name: Copy catalog.json from QuACS data + run: | + CURRENT_TERM="$(grep quatalog-data/terms_list.json -e 'current_term' | cut -f4 -d '"')" + rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" quatalog-data/catalog.json + + - name: Push new data + working-directory: ./quatalog-data + run: | + git config --global user.name "Quatalog Updater" + git config --global user.email "github_actions@quatalog.com" + git add terms_offered.json prerequisites.json terms_list.json catalog.json + git commit -m "$(date)" || exit 0 + git push + + diff --git a/bin/CourseOfferingsScraper b/bin/CourseOfferingsScraper index a9fdab9..16d0f6e 100755 Binary files a/bin/CourseOfferingsScraper and b/bin/CourseOfferingsScraper differ