Add the regex-free version of the parser
This commit is contained in:
parent
7be3c2311b
commit
151a53691c
|
@ -11,8 +11,10 @@ than employ a multi-line function definition, which looks a little sloppy. Stil
|
|||
pretty happy with the result, especially given that conciseness was an explicit goal.
|
||||
|
||||
If you want to take a look, find the file in [`csv_parse.py`](./csv_parse.py) or try
|
||||
cloning and running it. The sample data in this repository is stolen from [this handy
|
||||
repo of sample CSV files](https://github.com/datablist/sample-csv-files).
|
||||
cloning and running it. I also put together another version which doesn't use python's
|
||||
regex module, which you can find in [`csv_parse_no_regex.py`](./csv_parse_no_regex.py).
|
||||
The sample data in this repository is stolen from [this handy repo of sample CSV
|
||||
files](https://github.com/datablist/sample-csv-files).
|
||||
|
||||
It's worth noting that this was built purely as an exercise. While it is spec compliant
|
||||
and can theoretically actually be used, I would recommend using Python's built-in CSV
|
||||
|
|
39
csv_parse_no_regex.py
Normal file
39
csv_parse_no_regex.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from functools import reduce
|
||||
|
||||
cons = lambda h, t: [h, *t]
|
||||
postpend = lambda l, e: [*l, e]
|
||||
|
||||
parse_tok = lambda t: lambda s: [(t, s[len(t):])] if s.startswith(t) else []
|
||||
parse_except = lambda e: lambda s: [(s[0], s[1:])] if len(s) > 0 and s[0] not in e else []
|
||||
parse_eof = lambda s: [('<EOF>', "")]if len(s) == 0 else []
|
||||
|
||||
parse_altl = lambda a, b: lambda s: (lambda ra: ra if len(ra) > 0 else b(s))(a(s))
|
||||
parse_seq = lambda f: lambda a, b: lambda s: [(f(v1, v2), r2) for (v1, r1) in a(s) for (v2, r2) in b(r1)]
|
||||
parse_seql = parse_seq(lambda l, r: l)
|
||||
parse_seqr = parse_seq(lambda l, r: r)
|
||||
parse_pure = lambda v: lambda s: [(v, s)]
|
||||
parse_map = lambda f, a: lambda s: [(f(v), r) for (v, r) in a(s)]
|
||||
parse_many = lambda a: parse_altl(parse_seq(cons)(a, lambda s: parse_many(a)(s)), parse_pure([]))
|
||||
parse_many_sep = lambda sep: lambda a: parse_altl(parse_seq(postpend)(parse_many(parse_seql(a, sep)), a), parse_pure([]))
|
||||
parse_any_tok = lambda *ts: reduce(parse_altl, [parse_tok(t) for t in ts], lambda s: [])
|
||||
|
||||
parse_whitespace = parse_many(parse_tok(' '))
|
||||
parse_dbqt = parse_tok('"')
|
||||
parse_str_char = parse_except('"')
|
||||
parse_newline = parse_any_tok('\r\n', '\n', '\r')
|
||||
parse_escaped_quote = parse_map(lambda _: '"', parse_tok('""'))
|
||||
parse_quoted = parse_seql(parse_seqr(parse_seqr(parse_whitespace, parse_dbqt), parse_many(parse_altl(parse_str_char, parse_escaped_quote))), parse_seql(parse_dbqt, parse_whitespace))
|
||||
parse_unquoted = parse_many(parse_except('"\r\n,'))
|
||||
parse_field = parse_map(lambda chars: ''.join(chars), parse_altl(parse_quoted, parse_unquoted))
|
||||
parse_line = parse_many_sep(parse_tok(','))(parse_field)
|
||||
parse_lines = parse_many_sep(parse_newline)(parse_line)
|
||||
trim_final_empty_line = lambda lines: lines[:-1] if len(lines)>0 and lines[-1] in [[""],[]] else lines
|
||||
parse_csv = parse_seql(parse_map(trim_final_empty_line, parse_lines), parse_eof)
|
||||
|
||||
def read_csv_file(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
parse_results = parse_csv(file.read())
|
||||
return parse_results[0][0] if len(parse_results) else None
|
||||
|
||||
parsed_table = read_csv_file("./my_data.csv")
|
||||
print(f'Row 3, Column D reads: {parsed_table[2][3]}')
|
Loading…
Reference in a new issue