Mini-CSV-Parser/csv_parse.py

39 lines
2.0 KiB
Python

import re
# Lines 1-2: We start with a couple helper functions, which will come in handy later on
cons = lambda h, t: [h, *t]
postpend = lambda l, e: [*l, e]
# Lines 3-4: The two most basic tokens we can parse: A regex and an EOF
parse_reg = lambda regex, f=(lambda m:m): (lambda reg=re.compile(regex): lambda s: (lambda m=reg.match(s): [(f(m), s[m.end():])] if m is not None else [])())()
parse_eof = lambda s: [('<EOF>', "")]if len(s) == 0 else []
# Lines 5-11: We define a handful of parser combinators
parse_altl = lambda a, b: lambda s: (lambda ra: ra if len(ra) > 0 else b(s))(a(s))
parse_seq = lambda f: lambda a, b: lambda s: [(f(v1, v2), r2) for (v1, r1) in a(s) for (v2, r2) in b(r1)]
parse_seql = parse_seq(lambda l, r: l)
parse_pure = lambda v: lambda s: [(v, s)]
parse_map = lambda f, a: lambda s: [(f(v), r) for (v, r) in a(s)]
parse_many = lambda a: parse_altl(parse_seq(cons)(a, lambda s: parse_many(a)(s)), parse_pure([]))
parse_many_sep = lambda sep: lambda a: parse_altl(parse_seq(postpend)(parse_many(parse_seql(a, sep)), a), parse_pure([]))
# Lines 12-16: We can now construct parsers for elements of our grammar
parse_field = parse_reg(r'(?: *"((?:[^"]|"")*)" *)|([^\r\n,"]*)', lambda m:m.group(2) if m.group(2) is not None else m.group(1).replace('""', '"'))
parse_newline = parse_reg(r'\r\n|\r|\n')
parse_line = parse_many_sep(parse_reg(','))(parse_field)
parse_lines = parse_many_sep(parse_newline)(parse_line)
trim_final_empty_line = lambda lines: lines[:-1] if len(lines)>0 and lines[-1] in [[""],[]] else lines
# Line 17: Finally, we can define our actual parser
parse_csv = parse_seql(parse_map(trim_final_empty_line, parse_lines), parse_eof)
# Extra lines: Here's just some sample code which reads in a file, parses it with our
# parser, and pulls some data out.
def read_csv_file(file_path):
with open(file_path, 'r') as file:
parse_results = parse_csv(file.read())
return parse_results[0][0] if len(parse_results) else None
parsed_table = read_csv_file("./my_data.csv")
print(f'Row 3, Column D reads: {parsed_table[2][3]}')