39 lines
2.0 KiB
Python
39 lines
2.0 KiB
Python
import re
|
|
|
|
# Lines 1-2: We start with a couple helper functions, which will come in handy later on
|
|
cons = lambda h, t: [h, *t]
|
|
postpend = lambda l, e: [*l, e]
|
|
|
|
# Lines 3-4: The two most basic tokens we can parse: A regex and an EOF
|
|
parse_reg = lambda regex, f=(lambda m:m): (lambda reg=re.compile(regex): lambda s: (lambda m=reg.match(s): [(f(m), s[m.end():])] if m is not None else [])())()
|
|
parse_eof = lambda s: [('<EOF>', "")]if len(s) == 0 else []
|
|
|
|
# Lines 5-11: We define a handful of parser combinators
|
|
parse_altl = lambda a, b: lambda s: (lambda ra: ra if len(ra) > 0 else b(s))(a(s))
|
|
parse_seq = lambda f: lambda a, b: lambda s: [(f(v1, v2), r2) for (v1, r1) in a(s) for (v2, r2) in b(r1)]
|
|
parse_seql = parse_seq(lambda l, r: l)
|
|
parse_pure = lambda v: lambda s: [(v, s)]
|
|
parse_map = lambda f, a: lambda s: [(f(v), r) for (v, r) in a(s)]
|
|
parse_many = lambda a: parse_altl(parse_seq(cons)(a, lambda s: parse_many(a)(s)), parse_pure([]))
|
|
parse_many_sep = lambda sep: lambda a: parse_altl(parse_seq(postpend)(parse_many(parse_seql(a, sep)), a), parse_pure([]))
|
|
|
|
# Lines 12-16: We can now construct parsers for elements of our grammar
|
|
parse_field = parse_reg(r'(?: *"((?:[^"]|"")*)" *)|([^\r\n,"]*)', lambda m:m.group(2) if m.group(2) is not None else m.group(1).replace('""', '"'))
|
|
parse_newline = parse_reg(r'\r\n|\r|\n')
|
|
parse_line = parse_many_sep(parse_reg(','))(parse_field)
|
|
parse_lines = parse_many_sep(parse_newline)(parse_line)
|
|
trim_final_empty_line = lambda lines: lines[:-1] if len(lines)>0 and lines[-1] in [[""],[]] else lines
|
|
|
|
# Line 17: Finally, we can define our actual parser
|
|
parse_csv = parse_seql(parse_map(trim_final_empty_line, parse_lines), parse_eof)
|
|
|
|
# Extra lines: Here's just some sample code which reads in a file, parses it with our
|
|
# parser, and pulls some data out.
|
|
def read_csv_file(file_path):
|
|
with open(file_path, 'r') as file:
|
|
parse_results = parse_csv(file.read())
|
|
return parse_results[0][0] if len(parse_results) else None
|
|
|
|
parsed_table = read_csv_file("./my_data.csv")
|
|
print(f'Row 3, Column D reads: {parsed_table[2][3]}')
|