Add debug timeout to workflow

This commit is contained in:
powe97 2024-02-29 21:10:19 -05:00
parent b304e9f8d2
commit 4916feeb19
No known key found for this signature in database
GPG key ID: 7D1663B10978D1BA
2 changed files with 18 additions and 11 deletions

View file

@ -1,8 +1,14 @@
name: Scrape transfer and update file name: Scrape transfer and update file
on: on:
# schedule:
# - cron: '15 * * * *'
workflow_dispatch: workflow_dispatch:
# schedule: inputs:
# - cron: '15 * * * *' timeout:
description: "Timeout time"
required: true
type: number
default: 120
concurrency: concurrency:
group: transfer-scraper group: transfer-scraper
@ -39,7 +45,7 @@ jobs:
- name: Scrape transfer guide - name: Scrape transfer guide
run: | run: |
python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json python3 quatalog-scraping/transfer_scraper/main.py data/transfer.json data/transfer_state.json ${{ github.event.inputs.timeout }}
- name: Upload data to artifact - name: Upload data to artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
@ -53,14 +59,14 @@ jobs:
needs: [scrape-data] needs: [scrape-data]
steps: steps:
- name: Clone Quatalog data - name: Clone Quatalog data
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
repository: quatalog/data repository: quatalog/data
path: quatalog-data path: quatalog-data
token: ${{ secrets.PUSH_TOKEN }} token: ${{ secrets.PUSH_TOKEN }}
- name: Download data from artifact - name: Download data from artifact
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
with: with:
name: transfer-data name: transfer-data
path: data/ path: data/

View file

@ -101,12 +101,17 @@ def scrape_course_card(html_id, i, note):
def main(): def main():
if len(sys.argv) != 3: if len(sys.argv) != 4:
print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>") print(f"USAGE: python {sys.argv[0]} <transfer file> <state file> <timeout minutes>")
exit(1) exit(1)
transfer_json_path = sys.argv[1] transfer_json_path = sys.argv[1]
state_json_path = sys.argv[2] state_json_path = sys.argv[2]
timeout_seconds = int(sys.argv[3]) * 60
# Set up timeout so that the GH action does not run forever, pretend it's ^C
signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
alarm(timeout_seconds)
options = webdriver.FirefoxOptions() options = webdriver.FirefoxOptions()
user_agent = UserAgent().random user_agent = UserAgent().random
@ -137,10 +142,6 @@ def main():
json.dump(state, sys.stderr, indent=4) json.dump(state, sys.stderr, indent=4)
print("", file=sys.stderr) print("", file=sys.stderr)
# Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C
signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
alarm(60 * 60 * 2)
try: try:
curr_page = 1 curr_page = 1
while state["inst_pg"] <= num_pages: while state["inst_pg"] <= num_pages: