Move debug print to be more accurate

Make transfer scraper run continuously (at least as much as Github allows)
Fix crashing without timeout arg and re-add --headless
2025-07-23 13:20:50 +00:00 · 2024-03-01 01:50:01 -05:00 · 2024-03-01 01:45:06 -05:00 · 2024-03-01 00:29:34 -05:00 · 2024-02-29 22:31:09 -05:00 · 2024-02-29 22:29:51 -05:00
3 changed files with 290 additions and 237 deletions
--- a/.github/workflows/scraper.yml
+++ b/.github/workflows/scraper.yml
@ -10,12 +10,12 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout scraping repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          path: quatalog-scraping
        
      - name: Clone QuACS data
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quacs/quacs-data
          path: quacs-data
@ -36,7 +36,7 @@ jobs:
          rsync -avz "quacs-data/semester_data/$CURRENT_TERM/catalog.json" new-data/catalog.json
          
      - name: Upload data to artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
          name: new-data
          path: new-data/
@ -47,14 +47,14 @@ jobs:
    needs: [scrape-data]
    steps:
      - name: Clone Quatalog data
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/data
          path: quatalog-data
          token: ${{ secrets.PUSH_TOKEN }}
    
      - name: Download data from artifact
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
        with:
          name: new-data
          path: new-data
@ -78,12 +78,12 @@ jobs:
    needs: [scrape-data]
    steps:
      - name: Checkout scraping repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          path: quatalog-scraping
      
      - name: Clone Quatalog static site
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/site
          ref: static-generated
@ -91,7 +91,7 @@ jobs:
          token: ${{ secrets.PUSH_TOKEN }}
      
      - name: Download data from artifact
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
        with:
          name: new-data
          path: new-data
@ -129,14 +129,14 @@ jobs:
    needs: [generate-site]
    steps:
      - name: Checkout site repo/static-generated branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/site
          ref: static-generated
          path: static-generated
      
      - name: Checkout data repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/data
          path: quatalog-data
@ -167,13 +167,13 @@ jobs:
    needs: [generate-site]
    steps:
      - name: Checkout static-generated branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/site
          ref: static-generated
          
      - name: Setup Pages
-        uses: actions/configure-pages@v3
+        uses: actions/configure-pages@v4
        
      - name: Archive github-pages artifact
        run: |
@ -186,7 +186,7 @@ jobs:
            -cf "$RUNNER_TEMP/artifact.tar" .

      - name: Upload github-pages artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
        with:
          name: github-pages
          path: ${{ runner.temp }}/artifact.tar
@ -207,4 +207,4 @@ jobs:
    steps:
      - name: Deploy to GitHub Pages
        id: deployment
-        uses: actions/deploy-pages@v1
+        uses: actions/deploy-pages@v4
--- a/.github/workflows/transfer.yml
+++ b/.github/workflows/transfer.yml
@ -1,13 +1,22 @@
 name: Scrape transfer and update file
+run-name: Scrape transfer and update file
 on:
+  # schedule:
+  # - cron: '*/15 * * * *'
+  repository_dispatch:
+    types: transfer-scraper
  workflow_dispatch:
-#  schedule:
-#    - cron: '15 * * * *'
+    inputs:
+      timeout:
+        description: "Timeout time"
+        required: true
+        type: number
+        default: 120
 concurrency:
  group: transfer-scraper

 jobs:
-  scrape-transfer:
+  scrape-data:
    name: Scrape transfer guide
    runs-on: ubuntu-latest
    steps:
@ -16,34 +25,40 @@ jobs:
        with:
          path: quatalog-scraping

+      - name: Checkout data repo
+        uses: actions/checkout@v4
+        with:
+          repository: quatalog/data
+          path: data
+
      - name: Set up python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
          cache: 'pip'

      - name: Install dependencies
        working-directory: quatalog-scraping/transfer_scraper
-        run: pip install -r 'requirements.txt'
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r 'requirements.txt'

      - name: Log IP
-        run: echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"
-
-      - name: Retrieve existing data
-        run:
-          mkdir data
-          cd data
-          wget 'https://raw.githubusercontent.com/powe97/rpi-transfer-scraper/main/transfer.json'
-          wget 'https://raw.githubusercontent.com/powe97/rpi-transfer-scraper/main/transfer_state.json'
+        run: |
+          echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"

      - name: Scrape transfer guide
-        run: python3 quatalog-scraping/transfer_scraper data/transfer.json data/transfer_state.json
+        run: |
+          mkdir new-data
+          rsync -avzh data/transfer.json new-data
+          rsync -avzh data/transfer_state.json new-data
+          python3 quatalog-scraping/transfer_scraper/main.py new-data/transfer.json new-data/transfer_state.json ${{ github.event.inputs.timeout }}

      - name: Upload data to artifact
        uses: actions/upload-artifact@v4
        with:
          name: transfer-data
-          path: data/
+          path: new-data/

  push-new-data:
    name: Push new data to data repo
@ -51,21 +66,21 @@ jobs:
    needs: [scrape-data]
    steps:
      - name: Clone Quatalog data
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
        with:
          repository: quatalog/data
          path: quatalog-data
          token: ${{ secrets.PUSH_TOKEN }}

      - name: Download data from artifact
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
        with:
          name: transfer-data
-          path: data/
+          path: data

      - name: Copy data to repo directory
        run: |
-          rsync -avz data/ quatalog-data/
+          rsync -avzh data/ quatalog-data/

      - name: Push new data
        working-directory: quatalog-data
@ -75,3 +90,17 @@ jobs:
          git add transfer.json transfer_state.json
          git commit -m "$(date)" || exit 0
          git push
+
+  re-run-scraper:
+    name: Tell Github to run this workflow again
+    runs-on: ubuntu-latest
+    needs: [push-new-data]
+    steps:
+      - name: Tell Github to run this workflow again
+        run: |
+          curl -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: token ${{ secrets.PUSH_TOKEN }}" \
+            --request POST \
+            --data '{"event_type": "transfer-scraper"}' \
+            "https://api.github.com/repos/quatalog/quatalog/dispatches"
--- a/transfer_scraper/main.py
+++ b/transfer_scraper/main.py
@ -31,6 +31,8 @@ def normalize_class_name(input):


 def wait(ec):
+    global driver
+
    WebDriverWait(
        driver, 20, ignored_exceptions=[StaleElementReferenceException]
    ).until(ec)
@ -38,6 +40,8 @@ def wait(ec):


 def scrape_course_card(html_id, i, note):
+    global driver
+
    trs = (
        driver.find_element("id", html_id)
        .find_elements(By.CSS_SELECTOR, ".course-detail")[i]
@ -100,222 +104,242 @@ def scrape_course_card(html_id, i, note):
        }


-if len(sys.argv) != 3:
-    print(f"USAGE: python {sys.argv[0]} <transfer file> <state file>")
-    exit(1)
+def main():
+    global driver

-transfer_json_path = sys.argv[1]
-state_json_path = sys.argv[2]
-
-options = webdriver.FirefoxOptions()
-user_agent = UserAgent().random
-print(f"Using randomized user agent {user_agent}", file=sys.stderr)
-if sys.argv[-1] != "gui":
-    options.add_argument("--headless")
-options.set_preference("general.useragent.override", user_agent)
-driver = webdriver.Firefox(options=options)
-driver.get(
-    "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce"
-)
-
-num_pages = int(
-    driver.find_element("id", "lblInstWithEQPaginationInfo").text.split()[-1]
-)
-print(f"{num_pages} pages detected", file=sys.stderr)
-
-state = {"inst_pg": 1, "inst_idx": 0, "course_pg": 1, "course_idx": 0}
-institutions = {}
-if os.path.isfile(state_json_path):
-    with open(state_json_path, "r") as statejson:
-        state = json.load(statejson)
-if os.path.isfile(transfer_json_path):
-    with open(transfer_json_path, "r") as transferjson:
-        institutions = json.load(transferjson)
-
-print("Loaded state: ", end="", file=sys.stderr)
-json.dump(state, sys.stderr, indent=4)
-print("", file=sys.stderr)
-
-
-# Set up 2hr timeout so that the GH action does not run forever, pretend it's ^C
-signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
-alarm(60 * 60 * 2)
-
-
-try:
-    curr_page = 1
-    while state["inst_pg"] <= num_pages:
-        page = driver.find_element("id", f"gdvInstWithEQ")
-
-        if state["inst_pg"] != 1:
-            while curr_page != state["inst_pg"]:
-                print(f"Jumping to institution page {curr_page}", file=sys.stderr)
-                jumpable_pages = {
-                    int(x.get_attribute("href").split("'")[3][5:]): x
-                    for x in driver.find_elements(
-                        By.CSS_SELECTOR,
-                        """a[href^="javascript:__doPostBack('gdvInstWithEQ','Page$"]""",
-                    )
-                }
-                curr_page = int(
-                    driver.find_element(
-                        "id", "lblInstWithEQPaginationInfo"
-                    ).text.split()[-3]
-                )
-                if state["inst_pg"] in jumpable_pages:
-                    jumpable_pages[state["inst_pg"]].click()
-                    curr_page = state["inst_pg"]
-                elif state["inst_pg"] < min(jumpable_pages):
-                    jumpable_pages[min(jumpable_pages)].click()
-                    curr_page = min(jumpable_pages)
-                else:
-                    jumpable_pages[max(jumpable_pages)].click()
-                    curr_page = max(jumpable_pages)
-
-                wait(EC.staleness_of(page))
-                sleep(random.uniform(3, 6))
-                page = driver.find_element("id", f"gdvInstWithEQ")
-
-        inst_list_len = len(
-            page.find_elements(
-                By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
-            )
+    if len(sys.argv) != 3 and len(sys.argv) != 4:
+        print(
+            f"USAGE: python {sys.argv[0]} <transfer file> <state file> [timeout minutes]"
        )
-        while state["inst_idx"] < inst_list_len:
-            institution_link = driver.find_element("id", "gdvInstWithEQ").find_elements(
-                By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
-            )[state["inst_idx"]]
-            fields = institution_link.find_element(By.XPATH, "../..").find_elements(
-                By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
-            )
-            inst_name = institution_link.text.title().strip()
-            city = fields[0].text.title().strip()
-            us_state = fields[1].text.strip()
+        exit(1)

-            institution_link.click()
-            wait(EC.staleness_of(institution_link))
+    transfer_json_path = sys.argv[1]
+    state_json_path = sys.argv[2]
+    timeout_seconds = int(sys.argv[3] if len(sys.argv) == 4 else 120) * 60

-            try:
-                course_pages_len = int(
-                    driver.find_element(
-                        "id", "lblInstWithEQPaginationInfo"
-                    ).text.split()[-1]
-                )
-            except NoSuchElementException:
-                course_pages_len = 1
+    # Set up timeout so that the GH action does not run forever, pretend it's ^C
+    print(f"Setting timeout to {timeout_seconds} seconds", file=sys.stderr)
+    signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
+    alarm(timeout_seconds)

-            try:
-                courses = institutions[inst_name]["courses"]
-            except:
-                courses = []
+    options = webdriver.FirefoxOptions()
+    options.add_argument("--headless")

-            while state["course_pg"] <= course_pages_len:
-                course_links_len = len(
-                    driver.find_element("id", "gdvCourseEQ").find_elements(
-                        By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
-                    )
-                )
-                while state["course_idx"] < course_links_len:
-                    course_link = driver.find_element(
-                        "id", "gdvCourseEQ"
-                    ).find_elements(
-                        By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
-                    )[
-                        state["course_idx"]
-                    ]
-                    course_link.click()
+    user_agent = UserAgent().random
+    options.set_preference("general.useragent.override", user_agent)
+    print(f"Using randomized user agent {user_agent}", file=sys.stderr)

-                    try:
-                        wait(
-                            EC.element_to_be_clickable(
-                                (By.CSS_SELECTOR, ".modal-header button")
-                            )
+    driver = webdriver.Firefox(options=options)
+    driver.get(
+        "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce"
+    )
+
+    num_pages = int(
+        driver.find_element("id", "lblInstWithEQPaginationInfo").text.split()[-1]
+    )
+    print(f"{num_pages} pages detected", file=sys.stderr)
+
+    state = {"inst_pg": 1, "inst_idx": 0, "course_pg": 1, "course_idx": 0}
+    institutions = {}
+    if os.path.isfile(state_json_path):
+        with open(state_json_path, "r") as statejson:
+            state = json.load(statejson)
+    if os.path.isfile(transfer_json_path):
+        with open(transfer_json_path, "r") as transferjson:
+            institutions = json.load(transferjson)
+
+    print("Loaded state: ", end="", file=sys.stderr)
+    json.dump(state, sys.stderr, indent=4)
+    print("", file=sys.stderr)
+
+    try:
+        curr_page = 1
+        while state["inst_pg"] <= num_pages:
+            page = driver.find_element("id", f"gdvInstWithEQ")
+
+            if state["inst_pg"] != 1:
+                while curr_page != state["inst_pg"]:
+                    jumpable_pages = {
+                        int(x.get_attribute("href").split("'")[3][5:]): x
+                        for x in driver.find_elements(
+                            By.CSS_SELECTOR,
+                            """a[href^="javascript:__doPostBack('gdvInstWithEQ','Page$"]""",
                        )
-
-                        transfer = [
-                            scrape_course_card("lblSendCourseEQDetail", i, False)
-                            for i in range(
-                                0,
-                                len(
-                                    driver.find_element(
-                                        "id", "lblSendCourseEQDetail"
-                                    ).find_elements(By.CSS_SELECTOR, ".course-detail")
-                                ),
-                            )
-                        ]
-
-                        rpi = [
-                            scrape_course_card("lblReceiveCourseEQDetail", i, True)
-                            for i in range(
-                                0,
-                                len(
-                                    driver.find_element(
-                                        "id", "lblReceiveCourseEQDetail"
-                                    ).find_elements(By.CSS_SELECTOR, ".course-detail")
-                                ),
-                            )
-                        ]
-
-                        print(
-                            f"{inst_name} ({state['inst_idx']}:{state['inst_pg']}/{num_pages}): {transfer[0]['id']} {transfer[0]['name']} -> {rpi[0]['id']} {rpi[0]['name']} ({state['course_idx']}:{state['course_pg']}/{course_pages_len})",
-                            file=sys.stderr,
-                        )
-
-                        begin_date = driver.find_element(
-                            "id", "lblBeginEffectiveDate"
-                        ).text
-                        end_date = driver.find_element("id", "lblEndEffectiveDate").text
-
+                    }
+                    curr_page = int(
                        driver.find_element(
-                            By.CSS_SELECTOR, ".modal-header button"
-                        ).click()
+                            "id", "lblInstWithEQPaginationInfo"
+                        ).text.split()[-3]
+                    )
+                    if state["inst_pg"] in jumpable_pages:
+                        jumpable_pages[state["inst_pg"]].click()
+                        curr_page = state["inst_pg"]
+                    elif state["inst_pg"] < min(jumpable_pages):
+                        jumpable_pages[min(jumpable_pages)].click()
+                        curr_page = min(jumpable_pages)
+                    else:
+                        jumpable_pages[max(jumpable_pages)].click()
+                        curr_page = max(jumpable_pages)
+                    print(f"Jumping to institution page {curr_page}", file=sys.stderr)

-                        courses += [
-                            {
-                                "transfer": transfer,
-                                "rpi": rpi,
-                                "begin": begin_date,
-                                "end": end_date,
-                            }
-                        ]
-                        state["course_idx"] += 1
-                    except Exception as e:
-                        institutions.update(
-                            {
-                                inst_name: {
-                                    "city": city,
-                                    "state": us_state,
-                                    "courses": courses,
-                                }
-                            }
-                        )
-                        raise e
-                state["course_idx"] = 0
-                state["course_pg"] += 1
-            institutions.update(
-                {inst_name: {"city": city, "state": us_state, "courses": courses}}
-            )
-            state["course_pg"] = 1
-            state["inst_idx"] += 1
+                    wait(EC.staleness_of(page))
+                    sleep(random.uniform(3, 6))
+                    page = driver.find_element("id", f"gdvInstWithEQ")

-            driver.find_element("id", "btnSwitchView").click()
-            wait(
-                EC.text_to_be_present_in_element(
-                    ("id", "lblInstWithEQPaginationInfo"), str(state["inst_pg"])
+            inst_list_len = len(
+                page.find_elements(
+                    By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
                )
            )
-        state["inst_idx"] = 0
-        state["inst_pg"] = (state["inst_pg"] % num_pages) + 1
+            while state["inst_idx"] < inst_list_len:
+                institution_link = driver.find_element(
+                    "id", "gdvInstWithEQ"
+                ).find_elements(
+                    By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
+                )[
+                    state["inst_idx"]
+                ]
+                fields = institution_link.find_element(By.XPATH, "../..").find_elements(
+                    By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
+                )
+                inst_name = institution_link.text.title().strip()
+                city = fields[0].text.title().strip()
+                us_state = fields[1].text.strip()

-except (Exception, KeyboardInterrupt) as e:
-    print("Program hits exception and will save and terminate", file=sys.stderr)
-    print(traceback.format_exc(), file=sys.stderr)
+                institution_link.click()
+                wait(EC.staleness_of(institution_link))

-print("Program will terminate with state: ", end="", file=sys.stderr)
-json.dump(state, sys.stderr, indent=4)
-print("", file=sys.stderr)
-with open(transfer_json_path, "w") as transferjson:
-    json.dump(institutions, transferjson, indent=4)
-with open(state_json_path, "w") as statejson:
-    json.dump(state, statejson, indent=4)
-driver.quit()
+                try:
+                    course_pages_len = int(
+                        driver.find_element(
+                            "id", "lblInstWithEQPaginationInfo"
+                        ).text.split()[-1]
+                    )
+                except NoSuchElementException:
+                    course_pages_len = 1
+
+                try:
+                    courses = institutions[inst_name]["courses"]
+                except:
+                    courses = []
+
+                while state["course_pg"] <= course_pages_len:
+                    course_links_len = len(
+                        driver.find_element("id", "gdvCourseEQ").find_elements(
+                            By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
+                        )
+                    )
+                    while state["course_idx"] < course_links_len:
+                        course_link = driver.find_element(
+                            "id", "gdvCourseEQ"
+                        ).find_elements(
+                            By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
+                        )[
+                            state["course_idx"]
+                        ]
+                        course_link.click()
+
+                        try:
+                            wait(
+                                EC.element_to_be_clickable(
+                                    (By.CSS_SELECTOR, ".modal-header button")
+                                ),
+                            )
+
+                            transfer = [
+                                scrape_course_card("lblSendCourseEQDetail", i, False)
+                                for i in range(
+                                    0,
+                                    len(
+                                        driver.find_element(
+                                            "id", "lblSendCourseEQDetail"
+                                        ).find_elements(
+                                            By.CSS_SELECTOR, ".course-detail"
+                                        )
+                                    ),
+                                )
+                            ]
+
+                            rpi = [
+                                scrape_course_card("lblReceiveCourseEQDetail", i, True)
+                                for i in range(
+                                    0,
+                                    len(
+                                        driver.find_element(
+                                            "id", "lblReceiveCourseEQDetail"
+                                        ).find_elements(
+                                            By.CSS_SELECTOR, ".course-detail"
+                                        )
+                                    ),
+                                )
+                            ]
+
+                            print(
+                                f"{inst_name} ({state['inst_idx']}:{state['inst_pg']}/{num_pages}): {transfer[0]['id']} {transfer[0]['name']} -> {rpi[0]['id']} {rpi[0]['name']} ({state['course_idx']}:{state['course_pg']}/{course_pages_len})",
+                                file=sys.stderr,
+                            )
+
+                            begin_date = driver.find_element(
+                                "id", "lblBeginEffectiveDate"
+                            ).text
+                            end_date = driver.find_element(
+                                "id", "lblEndEffectiveDate"
+                            ).text
+
+                            driver.find_element(
+                                By.CSS_SELECTOR, ".modal-header button"
+                            ).click()
+
+                            courses += [
+                                {
+                                    "transfer": transfer,
+                                    "rpi": rpi,
+                                    "begin": begin_date,
+                                    "end": end_date,
+                                }
+                            ]
+                            state["course_idx"] += 1
+                        except (Exception, KeyboardInterrupt) as e:
+                            institutions.update(
+                                {
+                                    inst_name: {
+                                        "city": city,
+                                        "state": us_state,
+                                        "courses": courses,
+                                    }
+                                }
+                            )
+                            raise e
+                    state["course_idx"] = 0
+                    state["course_pg"] += 1
+                institutions.update(
+                    {inst_name: {"city": city, "state": us_state, "courses": courses}}
+                )
+                state["course_pg"] = 1
+                state["inst_idx"] += 1
+
+                driver.find_element("id", "btnSwitchView").click()
+                wait(
+                    EC.text_to_be_present_in_element(
+                        ("id", "lblInstWithEQPaginationInfo"), str(state["inst_pg"])
+                    ),
+                )
+            state["inst_idx"] = 0
+            state["inst_pg"] = (state["inst_pg"] % num_pages) + 1
+
+    except (Exception, KeyboardInterrupt) as e:
+        print("Program hits exception and will save and terminate", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+
+    print("Program will terminate with state: ", end="", file=sys.stderr)
+    json.dump(state, sys.stderr, indent=4)
+    print("", file=sys.stderr)
+    with open(transfer_json_path, "w") as transferjson:
+        json.dump(institutions, transferjson, indent=4)
+    with open(state_json_path, "w") as statejson:
+        json.dump(state, statejson, indent=4)
+    driver.quit()
+
+
+if __name__ == "__main__":
+    main()
Author	SHA1	Message	Date
powe97	d03be03aeb	Move debug print to be more accurate	2024-03-01 01:50:01 -05:00
powe97	019b777228	Make transfer scraper run continuously (at least as much as Github allows)	2024-03-01 01:45:06 -05:00
powe97	1a4542e20e	Fix crashing without timeout arg and re-add --headless	2024-03-01 00:29:34 -05:00
powe97	b0acd0e745	Dammit python	2024-02-29 22:31:09 -05:00
powe97	53891400ea	Every 15 minutes	2024-02-29 22:29:51 -05:00
powe97	c6e28d399a	Make timeout field have default value	2024-02-29 22:28:00 -05:00
powe97	682b1679b4	Run every 15 mins	2024-02-29 22:25:22 -05:00
powe97	aa4af079f8	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-02-29 22:13:54 -05:00
powe97	cf2abf7193	Fix partial updates when KeyboardInterrupt happens mid-institution	2024-02-29 22:13:44 -05:00
powe97	55e34c9dd4	Bump versions of actions	2024-02-29 22:06:23 -05:00
powe97	efad1e9103	Bump versions for actions	2024-02-29 22:01:40 -05:00
powe97	cf953b2f02	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-02-29 21:45:29 -05:00
powe97	44067261c3	Don't put whole repo in artifact	2024-02-29 21:45:22 -05:00
powe97	d268233d8b	Update transfer.yml	2024-02-29 21:38:17 -05:00
powe97	8a3e8a84d8	See previous commit	2024-02-29 21:25:53 -05:00
powe97	fd2da56aee	Make checkout data repo actually check the data repo out	2024-02-29 21:23:29 -05:00
powe97	12d844ca28	Fix global var fuckery	2024-02-29 21:21:39 -05:00
powe97	4916feeb19	Add debug timeout to workflow	2024-02-29 21:16:07 -05:00
powe97	b304e9f8d2	Fix scraper	2024-02-29 21:02:38 -05:00
powe97	f216c45748	Add if __name__ == "__main__" and fix workflow	2024-02-29 20:49:45 -05:00
powe97	15b09123ee	Set up workflow for transfer scraper	2024-02-29 20:40:15 -05:00