From 1403b42a5cae3760eb078f57a035f565041103c3 Mon Sep 17 00:00:00 2001 From: powe97 <116031952+powe97@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:43:11 -0400 Subject: [PATCH] Oops --- .github/workflows/transfer.yml | 99 ---------------------------------- 1 file changed, 99 deletions(-) diff --git a/.github/workflows/transfer.yml b/.github/workflows/transfer.yml index 41716fa..d15a39b 100644 --- a/.github/workflows/transfer.yml +++ b/.github/workflows/transfer.yml @@ -5,105 +5,6 @@ on: schedule: - cron: '0 10 * * *' -jobs: - setup: - name: Get number of pages and set up scrape page jobs - runs-on: ubuntu-latest - steps: - - name: Create matrix parameters - id: matrix-params - run: | - NUM_PAGES="$(curl -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' 'https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce' | grep -e 'lblInstWithEQPaginationInfo' | grep -Poie '(?<=of )[0-9]*')" - MATRIX_PARAMS="$(seq -s "," 1 "$NUM_PAGES")" - MATRIX_PARAMS="\"page\": $(sed -e 's/,/}, {"page": /g' <<< "$MATRIX_PARAMS")" - echo "matrix-params={\"include\": [{"$MATRIX_PARAMS"}]}" | tee $GITHUB_OUTPUT - outputs: - matrix-params: ${{ steps.matrix-params.outputs.matrix-params }} - - scrape-page: - name: Scrape page - runs-on: ubuntu-latest - needs: setup - strategy: - matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }} - fail-fast: true - steps: - - name: Checkout scraping repo - uses: actions/checkout@v4 - - - name: Set up python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - cache: 'pip' - - - name: Install dependencies - working-directory: transfer_scraper - run: | - python -m pip install --upgrade pip - pip install -r 'requirements.txt' - - - name: Run scraper - working-directory: transfer_scraper - run: | - python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json - - - name: Upload data to artifact - uses: actions/upload-artifact@v4 - with: - name: transfer-page-${{ matrix.page }} - path: transfer_scraper/transfer_${{ matrix.page }}.json - - commit-data: - name: Combine/convert and commit data - runs-on: ubuntu-latest - needs: scrape-page - steps: - - name: Set up python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Checkout data repo - uses: actions/checkout@v4 - with: - repository: quatalog/data - path: data - token: ${{ secrets.PUSH_TOKEN }} - - - name: Checkout scraping repo - uses: actions/checkout@v4 - with: - path: scrapers - - - name: Download partial JSONs - uses: actions/download-artifact@v4 - with: - pattern: transfer-page-* - merge-multiple: true - path: new-data - - - name: Combine JSONs - run: | - cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json - - - name: Commit data - working-directory: data - run: | - git config user.name "Quatalog Updater" - git config user.email "github_actions@quatalog.com" - git add transfer.json - git commit -m "$(date)" || exit 0 - git push - - - name: - name: Scrape transfer and update file -run-name: Scrape transfer and update file -on: - workflow_dispatch: - schedule: - - cron: '0 10 * * *' - jobs: setup: name: Get number of pages and set up scrape page jobs