Compare commits

...

9 Commits

Author SHA1 Message Date
powe97 a7ecbcbb5f
Run CSV generator 2024-03-16 02:42:08 -04:00
powe97 5e9e464ad0
Add CSV generator 2024-03-16 02:17:31 -04:00
powe97 de7360f1c7
Convert JSON to by-course format 2024-03-16 01:28:33 -04:00
powe97 ba3e5c77d1
Lint 2024-03-16 01:17:49 -04:00
powe97 c3be28e520
Add JSON converter from by-institution to by-course 2024-03-16 01:14:58 -04:00
powe97 061f9b14e5
Sort keys 2024-03-16 01:07:16 -04:00
powe97 1fa7ab61af
Rename transfer scraper 2024-03-16 00:32:59 -04:00
Quatalog Compiler bd930f05e9 Recompile scraper : Sat Mar 16 03:47:31 UTC 2024 2024-03-16 03:47:31 +00:00
powe97 77423ec069
Show music lessons properly 2024-03-15 23:46:28 -04:00
8 changed files with 174 additions and 9 deletions

View File

@ -46,7 +46,7 @@ jobs:
- name: Run scraper
working-directory: transfer_scraper
run: |
python3 main.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
python3 scrape_page.py ${{ matrix.page }} transfer_${{ matrix.page }}.json
- name: Upload data to artifact
uses: actions/upload-artifact@v4
@ -55,10 +55,15 @@ jobs:
path: transfer_scraper/transfer_${{ matrix.page }}.json
commit-data:
name: Combine and commit data
name: Combine/convert and commit data
runs-on: ubuntu-latest
needs: scrape-page
steps:
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Checkout data repo
uses: actions/checkout@v4
with:
@ -66,6 +71,11 @@ jobs:
path: data
token: ${{ secrets.PUSH_TOKEN }}
- name: Checkout scraping repo
uses: actions/checkout@v4
with:
path: scrapers
- name: Download partial JSONs
uses: actions/download-artifact@v4
with:
@ -76,12 +86,64 @@ jobs:
- name: Combine JSONs
run: |
cat new-data/* | jq -s 'add | sort_by(.institution)' > data/transfer.json
- name: Convert to by-courses format
run: |
python scrapers/transfer_scraper/convert_json.py data/transfer.json data/transfer_by_course.json
- name: Commit data
working-directory: data
run: |
git config user.name "Quatalog Updater"
git config user.email "github_actions@quatalog.com"
git add transfer.json
git add transfer*.json
git commit -m "$(date)" || exit 0
git push
generate-csv:
name: Generate CSVs
runs-on: ubuntu-latest
needs: commit-data
steps:
- name: Set up python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Checkout data repo
uses: actions/checkout@v4
with:
repository: quatalog/data
path: data
- name: Checkout scrapers repo
uses: actions/checkout@v4
with:
path: scrapers
- name: Checkout transfer guides repo
uses: actions/checkout@v4
with:
repository: quatalog/transfer_guides
path: transfer_guides
token: ${{ secrets.PUSH_TOKEN }}
- name: Generate CSVs
run: |
python scrapers/generate_csv.py data/transfer_by_course.json new_transfer_guides
- name: Copy data to repo and remove stale files
run: |
cd transfer_guides
git rm *
cd ..
rsync -azvh new_transfer_guides/ transfer_guides
- name: Commit CSVs
working-directory: transfer_guides
run: |
git config user.name "Quatalog Updater"
git config user.email "github_actions@quatalog.com"
git add .
git commit -m "$(date)" || exit 0
git push

Binary file not shown.

Binary file not shown.

View File

@ -239,7 +239,15 @@ void handle_everything(const Json::Value& sections,
const Json::Value& term_prereqs) {
Json::Value& course_term = course_terms[term];
const auto& course_id = course["id"].asString();
course_term["title"] = course["title"];
// course_term["title"] = course["title"];
std::set<std::string> titles;
for(const auto& sec : course["sections"]) {
titles.insert(sec["title"].asString());
}
course_term["title"] = Json::arrayValue;
for(const auto& title : titles) {
course_term["title"].append(title);
}
handle_sections(sections,course_term);
course_terms["latest_term"] = term;
handle_attributes(sections[0],course_id,course_term,out_prereqs);

View File

@ -417,7 +417,7 @@ void generate_table_cell(const int year,
std::ostream& os) {
std::string year_term = std::to_string(year) + term_to_number.at(term);
const auto& term_offered = terms_offered[year_term];
const auto& course_title = term_offered["title"].asString();
const auto& course_titles = term_offered["title"];
const auto& credit_string = generate_credit_string(term_offered["credits"]);
tag(os,TAG::COMPLEX_BEGIN) << R"(<td )";
@ -437,11 +437,14 @@ void generate_table_cell(const int year,
<< "&subj_in=" << term_offered["prefix"].asString()
<< "&crse_in=" << course_id.substr(5,4)
<< "&schd_in="
<< R"(">)" << course_title << " (" << credit_string << "c)</a>";
<< R"(">)" << course_titles[0].asString() << " (" << credit_string << "c)";
for(const auto& attr : term_offered["attributes"]) {
os << ' ' << attr.asString();
}
os << '\n';
for(int i = 1; i < course_titles.size(); i++) {
os << "<br>" << course_titles[i].asString();
}
os << "</a>" << '\n';
tag(os,TAG::END,"span");
tag(os,TAG::BEGIN,R"(ul class="prof-list")");
@ -547,7 +550,7 @@ get_course_title_and_description(const std::string& course_id,
if(catalog_entry) {
title = catalog_entry["name"].asString();
} else {
title = terms_offered[latest_term]["title"].asString();
title = terms_offered[latest_term]["title"][0].asString();
}
if(catalog_entry) {
description = catalog_entry["description"].asString();

View File

@ -0,0 +1,29 @@
import json
import sys
import collections
def main():
if len(sys.argv) != 3:
print(
f"USAGE: python {sys.argv[0]} <json from scraper> <by-course output file>",
file=sys.stderr,
)
return 1
with open(sys.argv[1], "r") as scraper_json:
by_institution = json.load(scraper_json)
by_rpi_course = collections.defaultdict(list)
for inst in by_institution:
for xfer in inst["transfers"]:
for rpi_course in xfer["rpi"]["courses"]:
for a in ["institution", "city", "state"]:
xfer[a] = inst[a]
by_rpi_course[rpi_course["id"]].append(xfer)
with open(sys.argv[2], "w") as out_json:
json.dump(by_rpi_course, out_json, sort_keys=True, indent=2)
if __name__ == "__main__":
exit(main())

View File

@ -0,0 +1,61 @@
import json
import sys
import collections
import csv
import os
def main():
if len(sys.argv) != 3:
print(
f"USAGE: python {sys.argv[0]} <by-course json> <output dir>",
file=sys.stderr,
)
return 1
with open(sys.argv[1], "r") as transfer_json:
transfer_data = json.load(transfer_json)
if not os.path.exists(sys.argv[2]):
os.makedirs(sys.argv[2])
for course, data in transfer_data.items():
print(f"Generating {course} transfer guide...", file=sys.stderr)
csv_output = [
(
"City",
"State",
"Institution",
"Transfer Course ID",
"Transfer Course Name",
"RPI Course ID",
"RPI Course Name",
"Note",
"Begin",
"End",
"Transfer Catalog",
)
]
for xfer in data:
csv_output.append(
(
xfer["city"],
xfer["state"],
xfer["institution"],
" + ".join([x["id"] for x in xfer["transfer"]["courses"]]),
" + ".join([x["name"] for x in xfer["transfer"]["courses"]]),
" + ".join([x["id"] for x in xfer["rpi"]["courses"]]),
" + ".join([x["name"] for x in xfer["rpi"]["courses"]]),
xfer["note"],
xfer["begin"],
xfer["end"],
xfer["transfer"]["catalog"],
)
)
with open(f"{sys.argv[2]}/{course} Transfer Guide.csv", "w") as course_csv:
csv.writer(course_csv).writerows(csv_output)
if __name__ == "__main__":
exit(main())

View File

@ -306,7 +306,9 @@ def main():
print(f"Using randomized user agent {user_agent}", file=sys.stderr)
with open(OUT_FILENAME, "w") as transferjson:
json.dump(scrape_page(PAGE_NUM_TO_SCRAPE), transferjson, indent=4)
json.dump(
scrape_page(PAGE_NUM_TO_SCRAPE), transferjson, sort_keys=True, indent=2
)
driver.quit()