mirror of
https://github.com/quatalog/quatalog.git
synced 2024-11-16 03:52:55 +00:00
Compare commits
No commits in common. "a7ecbcbb5f2c562cf0b21cf0bd470262589cca5b" and "517952f977c697c2a78443d50785c5c8d4d96e11" have entirely different histories.
a7ecbcbb5f
...
517952f977
68
.github/workflows/transfer.yml
vendored
68
.github/workflows/transfer.yml
vendored
|
@ -46,7 +46,7 @@ jobs:
|
|||
- name: Run scraper
|
||||
working-directory: transfer_scraper
|
||||
run: |
|
||||
python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
|
||||
python3 main.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
|
||||
|
||||
- name: Upload data to artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
|
@ -55,15 +55,10 @@ jobs:
|
|||
path: transfer_scraper/transfer_${{ matrix.page }}.json
|
||||
|
||||
commit-data:
|
||||
name: Combine/convert and commit data
|
||||
name: Combine and commit data
|
||||
runs-on: ubuntu-latest
|
||||
needs: scrape-page
|
||||
steps:
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Checkout data repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
|
@ -71,11 +66,6 @@ jobs:
|
|||
path: data
|
||||
token: ${{ secrets.PUSH_TOKEN }}
|
||||
|
||||
- name: Checkout scraping repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: scrapers
|
||||
|
||||
- name: Download partial JSONs
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
|
@ -86,64 +76,12 @@ jobs:
|
|||
- name: Combine JSONs
|
||||
run: |
|
||||
cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json
|
||||
|
||||
- name: Convert to by-courses format
|
||||
run: |
|
||||
python scrapers/transfer_scraper/convert_json.py data/transfer.json data/transfer_by_course.json
|
||||
|
||||
- name: Commit data
|
||||
working-directory: data
|
||||
run: |
|
||||
git config user.name "Quatalog Updater"
|
||||
git config user.email "github_actions@quatalog.com"
|
||||
git add transfer*.json
|
||||
git commit -m "$(date)" || exit 0
|
||||
git push
|
||||
|
||||
generate-csv:
|
||||
name: Generate CSVs
|
||||
runs-on: ubuntu-latest
|
||||
needs: commit-data
|
||||
steps:
|
||||
- name: Set up python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Checkout data repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: quatalog/data
|
||||
path: data
|
||||
|
||||
- name: Checkout scrapers repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: scrapers
|
||||
|
||||
- name: Checkout transfer guides repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: quatalog/transfer_guides
|
||||
path: transfer_guides
|
||||
token: ${{ secrets.PUSH_TOKEN }}
|
||||
|
||||
- name: Generate CSVs
|
||||
run: |
|
||||
python scrapers/generate_csv.py data/transfer_by_course.json new_transfer_guides
|
||||
|
||||
- name: Copy data to repo and remove stale files
|
||||
run: |
|
||||
cd transfer_guides
|
||||
git rm *
|
||||
cd ..
|
||||
rsync -azvh new_transfer_guides/ transfer_guides
|
||||
|
||||
- name: Commit CSVs
|
||||
working-directory: transfer_guides
|
||||
run: |
|
||||
git config user.name "Quatalog Updater"
|
||||
git config user.email "github_actions@quatalog.com"
|
||||
git add .
|
||||
git add transfer.json
|
||||
git commit -m "$(date)" || exit 0
|
||||
git push
|
||||
|
|
Binary file not shown.
BIN
bin/GenerateHtml
BIN
bin/GenerateHtml
Binary file not shown.
|
@ -239,15 +239,7 @@ void handle_everything(const Json::Value& sections,
|
|||
const Json::Value& term_prereqs) {
|
||||
Json::Value& course_term = course_terms[term];
|
||||
const auto& course_id = course["id"].asString();
|
||||
// course_term["title"] = course["title"];
|
||||
std::set<std::string> titles;
|
||||
for(const auto& sec : course["sections"]) {
|
||||
titles.insert(sec["title"].asString());
|
||||
}
|
||||
course_term["title"] = Json::arrayValue;
|
||||
for(const auto& title : titles) {
|
||||
course_term["title"].append(title);
|
||||
}
|
||||
course_term["title"] = course["title"];
|
||||
handle_sections(sections,course_term);
|
||||
course_terms["latest_term"] = term;
|
||||
handle_attributes(sections[0],course_id,course_term,out_prereqs);
|
||||
|
|
|
@ -417,7 +417,7 @@ void generate_table_cell(const int year,
|
|||
std::ostream& os) {
|
||||
std::string year_term = std::to_string(year) + term_to_number.at(term);
|
||||
const auto& term_offered = terms_offered[year_term];
|
||||
const auto& course_titles = term_offered["title"];
|
||||
const auto& course_title = term_offered["title"].asString();
|
||||
const auto& credit_string = generate_credit_string(term_offered["credits"]);
|
||||
|
||||
tag(os,TAG::COMPLEX_BEGIN) << R"(<td )";
|
||||
|
@ -437,14 +437,11 @@ void generate_table_cell(const int year,
|
|||
<< "&subj_in=" << term_offered["prefix"].asString()
|
||||
<< "&crse_in=" << course_id.substr(5,4)
|
||||
<< "&schd_in="
|
||||
<< R"(">)" << course_titles[0].asString() << " (" << credit_string << "c)";
|
||||
<< R"(">)" << course_title << " (" << credit_string << "c)</a>";
|
||||
for(const auto& attr : term_offered["attributes"]) {
|
||||
os << ' ' << attr.asString();
|
||||
}
|
||||
for(int i = 1; i < course_titles.size(); i++) {
|
||||
os << "<br>" << course_titles[i].asString();
|
||||
}
|
||||
os << "</a>" << '\n';
|
||||
os << '\n';
|
||||
|
||||
tag(os,TAG::END,"span");
|
||||
tag(os,TAG::BEGIN,R"(ul class="prof-list")");
|
||||
|
@ -550,7 +547,7 @@ get_course_title_and_description(const std::string& course_id,
|
|||
if(catalog_entry) {
|
||||
title = catalog_entry["name"].asString();
|
||||
} else {
|
||||
title = terms_offered[latest_term]["title"][0].asString();
|
||||
title = terms_offered[latest_term]["title"].asString();
|
||||
}
|
||||
if(catalog_entry) {
|
||||
description = catalog_entry["description"].asString();
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
import json
|
||||
import sys
|
||||
import collections
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print(
|
||||
f"USAGE: python {sys.argv[0]} <json from scraper> <by-course output file>",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
with open(sys.argv[1], "r") as scraper_json:
|
||||
by_institution = json.load(scraper_json)
|
||||
|
||||
by_rpi_course = collections.defaultdict(list)
|
||||
for inst in by_institution:
|
||||
for xfer in inst["transfers"]:
|
||||
for rpi_course in xfer["rpi"]["courses"]:
|
||||
for a in ["institution", "city", "state"]:
|
||||
xfer[a] = inst[a]
|
||||
by_rpi_course[rpi_course["id"]].append(xfer)
|
||||
|
||||
with open(sys.argv[2], "w") as out_json:
|
||||
json.dump(by_rpi_course, out_json, sort_keys=True, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
|
@ -1,61 +0,0 @@
|
|||
import json
|
||||
import sys
|
||||
import collections
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print(
|
||||
f"USAGE: python {sys.argv[0]} <by-course json> <output dir>",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
with open(sys.argv[1], "r") as transfer_json:
|
||||
transfer_data = json.load(transfer_json)
|
||||
|
||||
if not os.path.exists(sys.argv[2]):
|
||||
os.makedirs(sys.argv[2])
|
||||
|
||||
for course, data in transfer_data.items():
|
||||
print(f"Generating {course} transfer guide...", file=sys.stderr)
|
||||
csv_output = [
|
||||
(
|
||||
"City",
|
||||
"State",
|
||||
"Institution",
|
||||
"Transfer Course ID",
|
||||
"Transfer Course Name",
|
||||
"RPI Course ID",
|
||||
"RPI Course Name",
|
||||
"Note",
|
||||
"Begin",
|
||||
"End",
|
||||
"Transfer Catalog",
|
||||
)
|
||||
]
|
||||
|
||||
for xfer in data:
|
||||
csv_output.append(
|
||||
(
|
||||
xfer["city"],
|
||||
xfer["state"],
|
||||
xfer["institution"],
|
||||
" + ".join([x["id"] for x in xfer["transfer"]["courses"]]),
|
||||
" + ".join([x["name"] for x in xfer["transfer"]["courses"]]),
|
||||
" + ".join([x["id"] for x in xfer["rpi"]["courses"]]),
|
||||
" + ".join([x["name"] for x in xfer["rpi"]["courses"]]),
|
||||
xfer["note"],
|
||||
xfer["begin"],
|
||||
xfer["end"],
|
||||
xfer["transfer"]["catalog"],
|
||||
)
|
||||
)
|
||||
|
||||
with open(f"{sys.argv[2]}/{course} Transfer Guide.csv", "w") as course_csv:
|
||||
csv.writer(course_csv).writerows(csv_output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
|
@ -306,9 +306,7 @@ def main():
|
|||
print(f"Using randomized user agent {user_agent}", file=sys.stderr)
|
||||
|
||||
with open(OUT_FILENAME, "w") as transferjson:
|
||||
json.dump(
|
||||
scrape_page(PAGE_NUM_TO_SCRAPE), transferjson, sort_keys=True, indent=2
|
||||
)
|
||||
json.dump(scrape_page(PAGE_NUM_TO_SCRAPE), transferjson, indent=4)
|
||||
|
||||
driver.quit()
|
||||
|
Loading…
Reference in a new issue