quatalog-scraper/CourseOfferingsScraper.cpp

170 lines
6.2 KiB
C++
Raw Normal View History

2023-02-04 17:38:37 +00:00
#include<set>
#include<regex>
#include<fstream>
#include<iostream>
#include<filesystem>
#include<dist/jsoncpp.cpp>
struct course_sections_data_t {
int credMin = Json::Value::maxInt;
int credMax = 0;
int seatsTaken = 0;
int capacity = 0;
int remaining = 0;
};
2023-02-04 17:38:37 +00:00
namespace fs = std::filesystem;
void handle_term(const fs::directory_entry&,Json::Value&);
void handle_course(const Json::Value&,const std::string&,Json::Value&);
void handle_attribute(Json::Value&,const std::string&);
course_sections_data_t handle_sections(const Json::Value&);
2023-02-04 17:38:37 +00:00
int main(const int argc,const char** argv) {
if(argc < 2) {
std::cerr << "Bad number of arguments " << argc << std::endl;
return EXIT_FAILURE;
}
const auto data_dir_path = fs::path(argv[1]);
if(!fs::is_directory(data_dir_path)) {
std::cerr << "Data dir argument " << data_dir_path << " is not a directory" << std::endl;
return EXIT_FAILURE;
}
const auto data_dir = fs::directory_iterator(data_dir_path);
std::set<fs::directory_entry> term_dirs;
for(const fs::directory_entry term : data_dir) {
term_dirs.insert(term);
}
Json::Value terms_offered;
terms_offered["all_terms"] = Json::arrayValue;
for(auto term : term_dirs) {
if(!fs::is_directory(term)) {
continue;
}
handle_term(term,terms_offered);
}
//std::cout << terms_offered["all_terms"] << std::endl;
//std::cout << terms_offered["MATH-4020"] << std::endl;
std::cout << terms_offered << std::endl;
return EXIT_SUCCESS;
}
void handle_term(const fs::directory_entry& term_entry,
Json::Value& terms_offered) {
const fs::path dir = term_entry.path();
const auto dirname = dir.string();
const auto term = dir.stem().string();
const auto courses_filename = dirname + "/courses.json";
const auto prereqs_filename = dirname + "/prerequisites.json";
std::fstream courses_file{courses_filename,std::ios::in};
std::fstream prereqs_file{prereqs_filename,std::ios::in};
#ifdef DEBUG
std::cerr << '\t' << courses_filename << std::endl;
std::cerr << '\t' << prereqs_filename << std::endl;
std::cerr << "Processing term " << term << "..." << std::endl;
#endif
terms_offered["all_terms"].append(term);
Json::Value courses;
courses_file >> courses;
for(auto prefix : courses) {
#ifdef DEBUG
std::cerr << "\tProcessing prefix "
<< prefix["code"]
<< " - "
<< prefix["name"]
<< "..."
<< std::endl;
#endif
for(auto course : prefix["courses"]) {
handle_course(course,term,terms_offered);
}
}
}
void handle_course(const Json::Value& course,const std::string& term,Json::Value& terms_offered) {
const auto course_code = course["id"].asString();
2023-02-04 17:38:37 +00:00
auto& course_term = terms_offered[course_code][term];
2023-02-04 17:38:37 +00:00
course_term["title"] = course["title"];
Json::Value sections = course["sections"];
course_sections_data_t course_sections_data = handle_sections(sections);
course_term["credits"]["min"] = course_sections_data.credMin;
course_term["credits"]["max"] = course_sections_data.credMax;
course_term["seats"]["taken"] = course_sections_data.seatsTaken;
course_term["seats"]["capacity"] = course_sections_data.capacity;
course_term["seats"]["remaining"] = course_sections_data.remaining;
2023-02-04 17:38:37 +00:00
// Need to do this because for some reason attributes are by
// section instead of by course
2023-02-04 17:38:37 +00:00
const auto section = course["sections"][0];
const auto delim = std::regex(" and |, ");
const auto attributes_str = section["attribute"].asString();
const auto end_itr = std::sregex_token_iterator();
auto attributes_itr = std::sregex_token_iterator(
attributes_str.begin(),
attributes_str.end(),
delim,
-1
);
2023-02-04 17:38:37 +00:00
while(attributes_itr != end_itr
&& !attributes_itr->str().empty()) {
handle_attribute(course_term,attributes_itr->str());
attributes_itr++;
}
}
course_sections_data_t handle_sections(const Json::Value& sections) {
course_sections_data_t csd;
//csd.credMin = Json::Value::maxInt;
//csd.credMax = 0;
//csd.seatsTaken = 0, capacity = 0, remaining = 0;
for(auto section : sections) {
// Get min/max credits *of all sections*
// (RCOS looking at you)
csd.credMin = std::min(csd.credMin,section["credMin"].asInt());
csd.credMax = std::max(csd.credMax,section["credMax"].asInt());
// Add seating data of all sections together.
// remaining might get clobbered by some sections
// having negative seats, but this probably won't
// be too much of an issue
csd.seatsTaken += section["act"].asInt();
csd.capacity += section["cap"].asInt();
csd.remaining += section["rem"].asInt();
}
return csd;
}
2023-02-04 17:38:37 +00:00
void handle_attribute(Json::Value& course_term,const std::string& attribute) {
std::string attr;
if(attribute == "Communication Intensive") {
attr = "[CI]";
} else if(attribute == "Writing Intensive") {
attr = "[WI]";
} else if(attribute == "HASS Inquiry") {
attr = "[HInq]";
} else if(attribute == "Culminating Exp/Capstone") {
attr = "[CulmExp]";
} else if(attribute == "PDII Option for Engr Majors") {
attr = "[PDII]";
}
if(!attr.empty()) {
course_term["attributes"].append(attr);
}
}