--headless

Re-get the page to try circumvent timeout
Disable fail-fast
2024-11-15 19:42:44 +00:00 · 2024-03-05 21:14:32 -05:00 · 2024-03-05 21:14:00 -05:00 · 2024-03-05 20:49:08 -05:00 · 2024-03-05 20:47:41 -05:00 · 2024-03-05 19:10:16 -05:00
2 changed files with 186 additions and 367 deletions
--- a/.github/workflows/transfer.yml
+++ b/.github/workflows/transfer.yml
@ -1,37 +1,32 @@
 name: Scrape transfer and update file
 run-name: Scrape transfer and update file
-env:
-  DEFAULT_TIMEOUT: 45
 on:
-  # schedule:
-  # - cron: '*/15 * * * *'
-  repository_dispatch:
-    types: transfer-scraper
  workflow_dispatch:
-    inputs:
-      timeout:
-        description: "Timeout time"
-        required: true
-        type: number
-        default: 2
-concurrency:
-  group: transfer-scraper

 jobs:
-  scrape-data:
-    name: Scrape transfer guide
+  setup:
+    name: Get number of pages and set up scrape page jobs
    runs-on: ubuntu-latest
+    steps:
+    - name: Create matrix parameters
+      id: matrix-params
+      run: |
+        NUM_PAGES="$(curl -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0' 'https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce' | grep -e 'lblInstWithEQPaginationInfo' | grep -Poie '(?<=of )[0-9]*')"
+        MATRIX_PARAMS="$(seq -s "," 1 "$NUM_PAGES")"
+        MATRIX_PARAMS="\"page\": $(sed -e 's/,/}, {"page": /g' <<< "$MATRIX_PARAMS")"
+        echo "matrix-params={\"include\": [{"$MATRIX_PARAMS"}]}" | tee $GITHUB_OUTPUT
+    outputs:
+      matrix-params: ${{ steps.matrix-params.outputs.matrix-params }}
+  scrape-page:
+    name: Scrape page
+    runs-on: ubuntu-latest
+    needs: setup
+    strategy:
+      matrix: ${{ fromJson(needs.setup.outputs.matrix-params) }}
+      fail-fast: false
    steps:
      - name: Checkout scraping repo
        uses: actions/checkout@v4
-        with:
-          path: quatalog-scraping
-
-      - name: Checkout data repo
-        uses: actions/checkout@v4
-        with:
-          repository: quatalog/data
-          path: data

      - name: Set up python
        uses: actions/setup-python@v5
@ -40,74 +35,19 @@ jobs:
          cache: 'pip'

      - name: Install dependencies
-        working-directory: quatalog-scraping/transfer_scraper
+        working-directory: transfer_scraper
        run: |
          python -m pip install --upgrade pip
          pip install -r 'requirements.txt'

-      - name: Log IP
+      - name: Run scraper
+        working-directory: transfer_scraper
        run: |
-          echo "Public IP: $(curl -s 'https://ipinfo.io/ip')"
-
-      - name: Copy data to temp dir
-        run: |
-          mkdir new-data
-          dd status=progress if='data/transfer.json' of='new-data/transfer.json'
-          dd status=progress if='data/transfer_state.json' of='new-data/transfer_state.json'
-
-      - name: Scrape transfer guide
-        run: |
-          python3 quatalog-scraping/transfer_scraper/main.py new-data/transfer.json new-data/transfer_state.json ${{ github.event.inputs.timeout || env.DEFAULT_TIMEOUT }}
+          python3 main.py ${{ matrix.page }} transfer_${{ matrix.page }}.json

      - name: Upload data to artifact
        uses: actions/upload-artifact@v4
        with:
-          name: transfer-data
-          path: new-data/
+          name: transfer-page-${{ matrix.page }}
+          path: transfer_scraper/transfer_${{ matrix.page }}.json

-  push-new-data:
-    name: Push new data to data repo
-    runs-on: ubuntu-latest
-    needs: [scrape-data]
-    steps:
-      - name: Clone Quatalog data
-        uses: actions/checkout@v4
-        with:
-          repository: quatalog/data
-          path: quatalog-data
-          token: ${{ secrets.PUSH_TOKEN }}
-
-      - name: Download data from artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: transfer-data
-          path: data
-
-      - name: Copy data to repo directory
-        run: |
-          ls -lsa data
-          dd status=progress if='data/transfer.json' of='quatalog-data/transfer.json'
-          dd status=progress if='data/transfer_state.json' of='quatalog-data/transfer_state.json'
-
-      - name: Push new data
-        working-directory: quatalog-data
-        run: |
-          git config user.name "Quatalog Updater"
-          git config user.email "github_actions@quatalog.com"
-          git add transfer.json transfer_state.json
-          git commit -m "$(date)" || exit 0
-          git push
-
-  re-run-scraper:
-    name: Tell Github to run this workflow again
-    runs-on: ubuntu-latest
-    needs: [push-new-data]
-    steps:
-      - name: Tell Github to run this workflow again
-        run: |
-          curl -L \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: token ${{ secrets.PUSH_TOKEN }}" \
-            --request POST \
-            --data '{"event_type": "transfer-scraper"}' \
-            "https://api.github.com/repos/quatalog/quatalog/dispatches"
--- a/transfer_scraper/main.py
+++ b/transfer_scraper/main.py
@ -3,22 +3,17 @@ import html
 import sys
 import re
 import os.path
-import traceback
 from time import sleep
 import random
-from signal import alarm, SIGALRM, signal
 from fake_useragent import UserAgent
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.common.exceptions import StaleElementReferenceException
-from selenium.common.exceptions import TimeoutException
-from selenium.common.exceptions import NoSuchElementException
-
-
-def raise_(ex):
-    raise ex
+from selenium.common.exceptions import (
+    StaleElementReferenceException,
+    NoSuchElementException,
+)


 # Fix course titles accounting for Roman numerals up to X
@ -29,84 +24,36 @@ def normalize_title(input):
    return s.strip()


+# Waits until EC plus some random wait time
 def wait(ec):
    global driver

    WebDriverWait(
-        driver, 20, ignored_exceptions=[StaleElementReferenceException]
+        driver, 40, ignored_exceptions=[StaleElementReferenceException]
    ).until(ec)
    sleep(random.uniform(400, 1900) / 1000)


-def scrape_course_card(html_id, i, note):
+# jump_to_page: navigates to a paginated page on this insufferable website
+#
+# curr_page: the current page number
+# to_page: the page number to jump to
+# num_pages: the total number of pages
+# postback_type: javascript:__doPostBack('<this field>','Page$3')
+# pagination_type: <span id="<this field>">PAGE 1 OF 27<br></span>
+def jump_to_page(curr_page, to_page, postback_type, pagination_type):
    global driver

-    trs = (
-        driver.find_element("id", html_id)
-        .find_elements(By.CSS_SELECTOR, ".course-detail")[i]
-        .find_elements(By.TAG_NAME, "tr")
-    )
-    course_name_and_id = trs[0].text.split()
-
-    course_desc = ""
-    if trs[1].find_element(By.TAG_NAME, "td").get_attribute("colspan") == "2":
-        course_desc = trs[1].text
-
-    course_department = normalize_title(
-        next((x for x in trs if x.text.strip().startswith("Department:")))
-        .find_elements(By.TAG_NAME, "td")[1]
-        .text
-    )
-    course_catalog = normalize_title(
-        next((x for x in trs if x.text.strip().startswith("Source catalog:")))
-        .find_elements(By.TAG_NAME, "td")[1]
-        .text
-    )
-
+    page = driver.find_element(By.ID, postback_type)
    try:
-        k = 1 + next(
-            i for i, v in enumerate(course_name_and_id) if bool(re.search(r"\d", v))
-        )
-        course_id = " ".join(course_name_and_id[0:k])
-        course_name = normalize_title(" ".join(course_name_and_id[k:]))
-    except StopIteration:  # Handling for Not Transferrable
-        course_id = course_name_and_id[0]
-        course_name = normalize_title(" ".join(course_name_and_id[1:]))
-
-    if not note:
-        try:
-            course_credits = (
-                next((x for x in trs if x.text.strip().startswith("Units:")))
-                .find_elements(By.TAG_NAME, "td")[1]
-                .text.strip()
-            )
-        except:
-            course_credits = ""
-
-        return {
-            "id": course_id,
-            "name": course_name,
-            "credits": course_credits,
-            "desc": course_desc,
-            "department": course_department,
-            "catalog": course_catalog,
-        }
-    else:
-        course_note = driver.find_element("id", "lblCommentsPublic").text.strip()
-        return {
-            "id": course_id,
-            "name": course_name,
-            "note": course_note,
-            "desc": course_desc,
-            "department": course_department,
-            "catalog": course_catalog,
-        }
-
-
-def jump_to_page(curr_page, to_page, num_pages, postback_type, pagination_type):
-    page = driver.find_element("id", postback_type)
-    if num_pages == 1:
+        num_pages = int(driver.find_element(By.ID, pagination_type).text.split()[-1])
+    except NoSuchElementException:
        return 1, page
+
+    if to_page > num_pages or to_page < 1:
+        raise ValueError(
+            f"to_page was out of range ({to_page} not in [1, {num_pages}])"
+        )
    while curr_page != to_page:
        jumpable_pages = {
            int(x.get_attribute("href").split("'")[3][5:]): x
@ -117,7 +64,7 @@ def jump_to_page(curr_page, to_page, num_pages, postback_type, pagination_type):
                + """','Page$"]""",
            )
        }
-        curr_page = int(driver.find_element("id", pagination_type).text.split()[-3])
+        curr_page = int(driver.find_element(By.ID, pagination_type).text.split()[-3])
        if to_page in jumpable_pages:
            jumpable_pages[to_page].click()
            curr_page = to_page
@ -127,251 +74,183 @@ def jump_to_page(curr_page, to_page, num_pages, postback_type, pagination_type):
        else:
            jumpable_pages[max(jumpable_pages)].click()
            curr_page = max(jumpable_pages)
-        print(f"Jumping to {postback_type} page {curr_page}", file=sys.stderr)

        wait(EC.staleness_of(page))
        sleep(random.uniform(400, 1900) / 1000)
-        page = driver.find_element("id", postback_type)
+        page = driver.find_element(By.ID, postback_type)
    return curr_page, page


-def main():
+# scrape_page: Scrapes a page of institutions
+#
+# page_num: The page to scrape.
+# Note that the current page before running this function must be 1.
+def scrape_page(page_num):
    global driver
-
-    if len(sys.argv) != 3 and len(sys.argv) != 4:
-        print(
-            f"USAGE: python {sys.argv[0]} <transfer file> <state file> [timeout minutes]"
-        )
-        exit(1)
-
-    transfer_json_path = sys.argv[1]
-    state_json_path = sys.argv[2]
-    timeout_seconds = int(sys.argv[3] if len(sys.argv) == 4 else 120) * 60
-
-    # Set up timeout so that the GH action does not run forever, pretend it's ^C
-    print(f"Setting timeout to {timeout_seconds} seconds", file=sys.stderr)
-    signal(SIGALRM, lambda a, b: raise_(KeyboardInterrupt))
-    alarm(timeout_seconds)
-
-    options = webdriver.FirefoxOptions()
-    options.add_argument("--headless")
-
-    user_agent = UserAgent().random
-    options.set_preference("general.useragent.override", user_agent)
-    # options.set_preference("network.proxy.socks", "")
-    # options.set_preference("network.proxy.socks_port", )
-    # options.set_preference("network.proxy.socks_remote_dns", True)
-    # options.set_preference("network.proxy.type", 1)
-    print(f"Using randomized user agent {user_agent}", file=sys.stderr)
+    global options

    driver = webdriver.Firefox(options=options)
    driver.get(
        "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce"
    )
+    jump_to_page(1, page_num, "gdvInstWithEQ", "lblInstWithEQPaginationInfo")

-    print(
-        f'Title is {driver.find_element(By.TAG_NAME, "title").get_attribute("innerText").strip()}',
-        file=sys.stderr,
-    )
-
-    num_pages = int(
-        driver.find_element("id", "lblInstWithEQPaginationInfo").text.split()[-1]
-    )
-    print(f"{num_pages} pages detected", file=sys.stderr)
-
-    state = {"inst_pg": 1, "inst_idx": 0, "course_pg": 1, "course_idx": 0}
-    institutions = {}
-    if os.path.isfile(state_json_path):
-        with open(state_json_path, "r") as statejson:
-            state = json.load(statejson)
-    if os.path.isfile(transfer_json_path):
-        with open(transfer_json_path, "r") as transferjson:
-            institutions = json.load(transferjson)
-
-    print("Loaded state: ", end="", file=sys.stderr)
-    json.dump(state, sys.stderr, indent=4)
-    print("", file=sys.stderr)
-
-    if state["inst_pg"] > num_pages:
-        raise Exception
-
-    try:
-        curr_inst_page = 1
-        while state["inst_pg"] <= num_pages:
-            curr_inst_page, page = jump_to_page(
-                curr_inst_page,
-                state["inst_pg"],
-                num_pages,
-                "gdvInstWithEQ",
-                "lblInstWithEQPaginationInfo",
-            )
-
-            inst_list_len = len(
-                page.find_elements(
+    num_institutions = len(
+        driver.find_elements(
            By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
        )
    )
+    driver.quit()

-            while state["inst_idx"] < inst_list_len:
-                institution_link = driver.find_element(
-                    "id", "gdvInstWithEQ"
-                ).find_elements(
-                    By.CSS_SELECTOR, "a[id^=gdvInstWithEQ_btnCreditFromInstName_]"
-                )[
-                    state["inst_idx"]
+    print(f"Scraping page {page_num}, found {num_institutions} links", file=sys.stderr)
+    return [scrape_institution(i, page_num) for i in range(0, num_institutions)]
+
+
+# scrape_institution: Scrapes an institution by index.
+#
+# index: the 0-indexed index of the instituion to scrape on the page we are on.
+def scrape_institution(index, page_num):
+    global driver
+    global options
+
+    driver = webdriver.Firefox(options=options)
+    driver.get(
+        "https://tes.collegesource.com/publicview/TES_publicview01.aspx?rid=f080a477-bff8-46df-a5b2-25e9affdd4ed&aid=27b576bb-cd07-4e57-84d0-37475fde70ce"
+    )
+    jump_to_page(1, page_num, "gdvInstWithEQ", "lblInstWithEQPaginationInfo")
+
+    inst_link = driver.find_element(
+        By.ID, f"gdvInstWithEQ_btnCreditFromInstName_{index}"
+    )
+    [inst_name, inst_city, inst_state, _] = [
+        e.text
+        for e in inst_link.find_element(By.XPATH, "../..").find_elements(
+            By.TAG_NAME, "td"
+        )
    ]
-                fields = institution_link.find_element(By.XPATH, "../..").find_elements(
-                    By.CSS_SELECTOR, ".gdv_boundfield_uppercase"
-                )
-                inst_name = normalize_title(institution_link.text)
-                city = normalize_title(fields[0].text)
-                us_state = fields[1].text.strip()
-
-                institution_link.click()
-                wait(EC.staleness_of(institution_link))
+    inst_name, inst_city = normalize_title(inst_name), normalize_title(inst_city)
+    inst_link.click()
+    wait(EC.staleness_of(inst_link))
+    print(f"Scraping {inst_name} ({inst_city}, {inst_state})", file=sys.stderr)

+    # Add all courses
    try:
-                    course_pages_len = int(
-                        driver.find_element(
-                            "id", "lblCourseEQPaginationInfo"
-                        ).text.split()[-1]
+        num_pages = int(
+            driver.find_element(By.ID, "lblCourseEQPaginationInfo").text.split()[-1]
        )
    except NoSuchElementException:
-                    course_pages_len = 1
+        num_pages = 1

    try:
-                    courses = institutions[inst_name]["courses"]
-                except Exception:
-                    courses = []
+        for i in range(1, num_pages + 1):
+            jump_to_page(max(1, i - 1), i, "gdvCourseEQ", "lblCourseEQPaginationInfo")
+            driver.find_element(By.ID, "gdvCourseEQ_cbxHeaderCheckAll").click()
+    except NoSuchElementException:
+        # Institution has no data
+        return {
+            "institution": inst_name,
+            "city": inst_city,
+            "state": inst_state,
+            "courses": [],
+        }

-                curr_course_page = 1
-                while state["course_pg"] <= course_pages_len:
-                    curr_course_page, page = jump_to_page(
-                        curr_course_page,
-                        state["course_pg"],
-                        course_pages_len,
-                        "gdvCourseEQ",
-                        "lblCourseEQPaginationInfo",
+    # Open list
+    driver.find_element(By.ID, "btnAddToMyEQList").click()
+    wait(EC.visibility_of_element_located((By.ID, "gdvMyCourseEQList")))
+
+    # Scrape list
+    tds = driver.find_element(By.ID, "gdvMyCourseEQList").find_elements(
+        By.TAG_NAME, "td"
    )

-                    course_links_len = len(
-                        page.find_elements(
-                            By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
+    transfer_courses = [
+        {
+            "transfer": parse_course_td(transfer_course),
+            "rpi": parse_course_td(rpi_course, note.text.strip()),
+            "begin": begin.text.strip(),
+            "end": end.text.strip(),
+        }
+        for transfer_course, rpi_course, note, begin, end, _ in zip(
+            *[iter(x for x in tds)] * 6
        )
-                    )
-
-                    while state["course_idx"] < course_links_len:
-                        course_link = driver.find_element(
-                            "id", "gdvCourseEQ"
-                        ).find_elements(
-                            By.CSS_SELECTOR, "a[id^=gdvCourseEQ_btnViewCourseEQDetail_]"
-                        )[
-                            state["course_idx"]
    ]
-                        course_link.click()

+    driver.quit()
+
+    return {
+        "institution": inst_name,
+        "city": inst_city,
+        "state": inst_state,
+        "courses": transfer_courses,
+    }
+
+
+def parse_course_td(td, note=None):
+    course_info = (
+        html.unescape(td.get_attribute("innerHTML")).strip().split("<br>")[0].split()
+    )
+
+    # Not all schools use the same course code format, so this figures out how long
+    # it is if it exists, it will not exist for Not Transferrable.
    try:
-                            wait(
-                                EC.element_to_be_clickable(
-                                    (By.CSS_SELECTOR, ".modal-header button")
-                                ),
-                            )
+        course_id_delim = 1 + list(
+            bool(re.search(r"\d", s)) for s in course_info
+        ).index(True)
+    except ValueError:
+        course_id_delim = 1

-                            transfer = [
-                                scrape_course_card("lblSendCourseEQDetail", i, False)
-                                for i in range(
-                                    0,
-                                    len(
-                                        driver.find_element(
-                                            "id", "lblSendCourseEQDetail"
-                                        ).find_elements(
-                                            By.CSS_SELECTOR, ".course-detail"
+    # Same deal with credit counts.
+    try:
+        cr_delim = (
+            len(course_info)
+            - 1
+            - list(bool(re.search(r"\(", s)) for s in course_info[::-1]).index(True)
        )
-                                    ),
-                                )
-                            ]
+    except ValueError:
+        cr_delim = len(course_info)

-                            rpi = [
-                                scrape_course_card("lblReceiveCourseEQDetail", i, True)
-                                for i in range(
-                                    0,
-                                    len(
-                                        driver.find_element(
-                                            "id", "lblReceiveCourseEQDetail"
-                                        ).find_elements(
-                                            By.CSS_SELECTOR, ".course-detail"
-                                        )
-                                    ),
-                                )
-                            ]
+    # note serves as a credit count override, since the RPI-side credit counts
+    # are inaccurate
+    out = {
+        "id": " ".join(course_info[:course_id_delim]),
+        "name": normalize_title(" ".join(course_info[course_id_delim:cr_delim])),
+        "catalog": td.find_element(By.TAG_NAME, "span").text,
+    }
+    if note is None:
+        out.update({"credits": str(" ".join(course_info[cr_delim:])[1:-1])}),
+        return out
+    else:
+        out.update({"note": note})
+        return out

+
+def main():
+    global driver
+    global options
+
+    if len(sys.argv) != 3:
        print(
-                                f"{inst_name} ({state['inst_idx']}:{state['inst_pg']}/{num_pages}): {transfer[0]['id']} {transfer[0]['name']} -> {rpi[0]['id']} {rpi[0]['name']} ({state['course_idx']}:{state['course_pg']}/{course_pages_len})",
+            f"USAGE: python {sys.argv[0]} <page number to scrape> <output file>",
            file=sys.stderr,
        )
+        return 1

-                            begin_date = driver.find_element(
-                                "id", "lblBeginEffectiveDate"
-                            ).text
-                            end_date = driver.find_element(
-                                "id", "lblEndEffectiveDate"
-                            ).text
+    PAGE_NUM_TO_SCRAPE = int(sys.argv[1])
+    OUT_FILENAME = sys.argv[2]

-                            driver.find_element(
-                                By.CSS_SELECTOR, ".modal-header button"
-                            ).click()
+    print(f"Setting up selenium Firefox emulator", file=sys.stderr)
+    options = webdriver.FirefoxOptions()
+    options.add_argument("--headless")

-                            courses += [
-                                {
-                                    "transfer": transfer,
-                                    "rpi": rpi,
-                                    "begin": begin_date,
-                                    "end": end_date,
-                                }
-                            ]
-                            state["course_idx"] += 1
-                        except (Exception, KeyboardInterrupt) as e:
-                            institutions.update(
-                                {
-                                    inst_name: {
-                                        "city": city,
-                                        "state": us_state,
-                                        "courses": courses,
-                                    }
-                                }
-                            )
-                            raise e
-                    state["course_idx"] = 0
-                    state["course_pg"] += 1
+    user_agent = UserAgent().random
+    options.set_preference("general.useragent.override", user_agent)
+    print(f"Using randomized user agent {user_agent}", file=sys.stderr)

-                institutions.update(
-                    {inst_name: {"city": city, "state": us_state, "courses": courses}}
-                )
-                state["course_pg"] = 1
-                state["inst_idx"] += 1
+    with open(OUT_FILENAME, "w") as transferjson:
+        json.dump(scrape_page(PAGE_NUM_TO_SCRAPE), transferjson, indent=4)

-                driver.find_element("id", "btnSwitchView").click()
-                wait(
-                    EC.text_to_be_present_in_element(
-                        ("id", "lblInstWithEQPaginationInfo"), str(state["inst_pg"])
-                    ),
-                )
-            state["inst_idx"] = 0
-            state["inst_pg"] += 1
-
-    except (Exception, KeyboardInterrupt) as e:
-        print("Program hits exception and will save and terminate", file=sys.stderr)
-        print(traceback.format_exc(), file=sys.stderr)
-
-    print("Program will terminate with state: ", end="", file=sys.stderr)
-    json.dump(state, sys.stderr, indent=4)
-    print("", file=sys.stderr)
-    with open(transfer_json_path, "w") as transferjson:
-        json.dump(institutions, transferjson, indent=4)
-    with open(state_json_path, "w") as statejson:
-        json.dump(state, statejson, indent=4)
    driver.quit()


 if __name__ == "__main__":
-    main()
+    exit(main())
Author	SHA1	Message	Date
powe97	a0b9081f8f	--headless	2024-03-05 21:14:32 -05:00
powe97	4f69c1d8a0	Re-get the page to try circumvent timeout	2024-03-05 21:14:00 -05:00
powe97	56c9268398	Disable fail-fast	2024-03-05 20:49:08 -05:00
powe97	02b383b90b	Extend timeout	2024-03-05 20:47:41 -05:00
powe97	95e8238786	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-03-05 19:10:16 -05:00
powe97	fc72fda5de	Remove jump debug print	2024-03-05 19:10:10 -05:00
powe97	e45318404d	Update transfer.yml	2024-03-05 19:06:49 -05:00
powe97	10715c89e3	Update transfer.yml	2024-03-05 19:05:41 -05:00
powe97	52fdab6ce6	Make everything stderr print	2024-03-05 19:03:54 -05:00
powe97	42dbf3c19a	Update transfer.yml	2024-03-05 18:46:02 -05:00
powe97	985f40c4e7	Set up matrix jobs	2024-03-05 18:42:05 -05:00
powe97	cb24d84b46	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-03-05 18:38:17 -05:00
powe97	ce2f22b23b	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-03-05 18:38:12 -05:00
powe97	c8eadc06ee	Merge branch 'main' of https://github.com/quatalog/quatalog	2024-03-05 18:34:02 -05:00
powe97	6ad6f85708	Redesign scraper to not be unbearably slow	2024-03-05 18:33:54 -05:00
powe97	acdd08168f	Update transfer.yml	2024-03-05 18:27:51 -05:00