name: Scrape transfer and update file run-name: Scrape transfer and update file on: workflow_dispatch: jobs: setup: name: Get number of pages and set up scrape page jobs runs-on: ubuntu-latest steps: - name: Create matrix parameters id: matrix-params run: | NUM_PAGES="$(curl -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' 'https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce' | grep -e 'lblInstWithEQPaginationInfo' | grep -Poie '(?<=of )[0-9]*')" MATRIX_PARAMS="$(seq -s "," 1 "$NUM_PAGES")" MATRIX_PARAMS="\"page\": $(sed -e 's/,/}, {"page": /g' <<< "$MATRIX_PARAMS")" echo "matrix-params={\"include\": [{ "$MATRIX_PARAMS" }]}" | tee $GITHUB_OUTPUT outputs: matrix-params: ${{ steps.matrix-params.outputs.matrix-params }} matrix-job: name: Scrape page runs-on: ubuntu-latest needs: setup strategy: matrix: ${{ fromJson(needs.starter.outputs.matrix-params) }} steps: - name: Checkout scraping repo uses: actions/checkout@v4 - name: Set up python uses: actions/setup-python@v5 with: python-version: '3.11' cache: 'pip' - name: Install dependencies working-directory: transfer_scraper run: | python -m pip install --upgrade pip pip install -r 'requirements.txt' - name: Run scraper working-directory: transfer_scraper run: | python3 main.py ${{ matrix.page }} transfer.json - name: Upload data to artifact uses: actions/upload-artifact@v4 with: name: transfer-page-${{ matrix.page }} path: transfer_scraper/transfer.json