diff --git a/.github/workflows/transfer.yml b/.github/workflows/transfer.yml index 609450b..557363f 100644 --- a/.github/workflows/transfer.yml +++ b/.github/workflows/transfer.yml @@ -1,8 +1,14 @@ name: Scrape transfer and update file on: +# schedule: +# - cron: '15 * * * *' workflow_dispatch: -# schedule: -# - cron: '15 * * * *' + inputs: + timeout: + description: "Timeout time" + required: true + type: number + default: 120 concurrency: group: transfer-scraper @@ -39,7 +45,7 @@ jobs: - name: Scrape transfer guide run: | - python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json + python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json ${{ github.event.inputs.timeout }} - name: Upload data to artifact uses: actions/upload-artifact@v4 @@ -53,14 +59,14 @@ jobs: needs: [scrape-data] steps: - name: Clone Quatalog data - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: quatalog/data path: quatalog-data token: ${{ secrets.PUSH_TOKEN }} - name: Download data from artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: transfer-data path: data/ diff --git a/transfer_scraper/main.py b/transfer_scraper/main.py index 28c20ae..7ea7cfc 100644 --- a/transfer_scraper/main.py +++ b/transfer_scraper/main.py @@ -101,12 +101,17 @@ def scrape_course_card(html_id, i, note): def main(): - if len(sys.argv) != 3: - print(f"USAGE: python {sys.argv[0]} ") + if len(sys.argv) != 4: + print(f"USAGE: python {sys.argv[0]} ") exit(1) transfer_json_path = sys.argv[1] state_json_path = sys.argv[2] + timeout_seconds = int(sys.argv[3]) * 60 + + # Set up timeout so that the GH action does not run forever, pretend it's ^C + signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt)) + alarm(timeout_seconds) options = webdriver.FirefoxOptions() user_agent = UserAgent().random @@ -137,10 +142,6 @@ def main(): json.dump(state, sys.stderr, indent=4) print("", file=sys.stderr) - # Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C - signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt)) - alarm(60 * 60 * 2) - try: curr_page = 1 while state["inst_pg"] <= num_pages: