Add the regex-free version of the parser

2024-03-17 12:40:49 -04:00 · 2024-03-17 12:40:49 -04:00 · 151a53691c
parent 7be3c2311b
commit 151a53691c
2 changed files with 43 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -11,8 +11,10 @@ than employ a multi-line function definition, which looks a little sloppy.  Stil
 pretty happy with the result, especially given that conciseness was an explicit goal.
 If you want to take a look, find the file in [`csv_parse.py`](./csv_parse.py) or try
-cloning and running it.  The sample data in this repository is stolen from [this handy
+cloning and running it.  I also put together another version which doesn't use python's
-repo of sample CSV files](https://github.com/datablist/sample-csv-files).
+regex module, which you can find in [`csv_parse_no_regex.py`](./csv_parse_no_regex.py).
 The sample data in this repository is stolen from [this handy repo of sample CSV
 files](https://github.com/datablist/sample-csv-files).
 It's worth noting that this was built purely as an exercise.  While it is spec compliant
 and can theoretically actually be used, I would recommend using Python's built-in CSV
--- a/csv_parse_no_regex.py
+++ b/csv_parse_no_regex.py
@ -0,0 +1,39 @@
 from functools import reduce
 cons = lambda h, t: [h, *t]
 postpend = lambda l, e: [*l, e]
 parse_tok = lambda t: lambda s: [(t, s[len(t):])] if s.startswith(t) else []
 parse_except = lambda e: lambda s: [(s[0], s[1:])] if len(s) > 0 and s[0] not in e else []
 parse_eof = lambda s: [('<EOF>', "")]if len(s) == 0 else []
 parse_altl = lambda a, b: lambda s: (lambda ra: ra if len(ra) > 0 else b(s))(a(s))
 parse_seq = lambda f: lambda a, b: lambda s: [(f(v1, v2), r2) for (v1, r1) in a(s) for (v2, r2) in b(r1)]
 parse_seql = parse_seq(lambda l, r: l)
 parse_seqr = parse_seq(lambda l, r: r)
 parse_pure = lambda v: lambda s: [(v, s)]
 parse_map = lambda f, a: lambda s: [(f(v), r) for (v, r) in a(s)]
 parse_many = lambda a: parse_altl(parse_seq(cons)(a, lambda s: parse_many(a)(s)), parse_pure([]))
 parse_many_sep = lambda sep: lambda a: parse_altl(parse_seq(postpend)(parse_many(parse_seql(a, sep)), a), parse_pure([]))
 parse_any_tok = lambda *ts: reduce(parse_altl, [parse_tok(t) for t in ts], lambda s: [])
 parse_whitespace = parse_many(parse_tok(' '))
 parse_dbqt = parse_tok('"')
 parse_str_char = parse_except('"')
 parse_newline = parse_any_tok('\r\n', '\n', '\r')
 parse_escaped_quote = parse_map(lambda _: '"', parse_tok('""'))
 parse_quoted = parse_seql(parse_seqr(parse_seqr(parse_whitespace, parse_dbqt), parse_many(parse_altl(parse_str_char, parse_escaped_quote))), parse_seql(parse_dbqt, parse_whitespace))
 parse_unquoted = parse_many(parse_except('"\r\n,'))
 parse_field = parse_map(lambda chars: ''.join(chars), parse_altl(parse_quoted, parse_unquoted))
 parse_line = parse_many_sep(parse_tok(','))(parse_field)
 parse_lines = parse_many_sep(parse_newline)(parse_line)
 trim_final_empty_line = lambda lines: lines[:-1] if len(lines)>0 and lines[-1] in [[""],[]] else lines
 parse_csv = parse_seql(parse_map(trim_final_empty_line, parse_lines), parse_eof)
 def read_csv_file(file_path): 
    with open(file_path, 'r') as file:
        parse_results = parse_csv(file.read())
        return parse_results[0][0] if len(parse_results) else None
 parsed_table = read_csv_file("./my_data.csv")
 print(f'Row 3, Column D reads: {parsed_table[2][3]}')