diff options
| author | omagdy <omar.professional8777@gmail.com> | 2025-08-06 01:50:35 +0300 |
|---|---|---|
| committer | omagdy <omar.professional8777@gmail.com> | 2025-08-06 01:50:35 +0300 |
| commit | 0e926ab37973b7cd1c4e24690f4715fbb226f03c (patch) | |
| tree | 617beaf62f1cd64087d2d71436ca4b16203f89b3 /presets/scripts/parse_lexicon.py | |
| parent | 0688270d3a4afbadb6ceebb339dfdc314d1c71ac (diff) | |
| download | gof-rs-0e926ab37973b7cd1c4e24690f4715fbb226f03c.tar.xz gof-rs-0e926ab37973b7cd1c4e24690f4715fbb226f03c.zip | |
Diffstat (limited to 'presets/scripts/parse_lexicon.py')
| -rwxr-xr-x | presets/scripts/parse_lexicon.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/presets/scripts/parse_lexicon.py b/presets/scripts/parse_lexicon.py new file mode 100755 index 0000000..253b2dd --- /dev/null +++ b/presets/scripts/parse_lexicon.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +import re +import json + +def parse_patterns(file_content): + # Split the text into sections by the pattern delimiter + sections = file_content.split("\n\n") + print(sections) + + patterns = [] + for section in sections: + # Extract the pattern name + name_match = re.match(r":([\w\-\(\),/ ]+):", section) + if not name_match: + continue + name = name_match.group(1).strip() + + + # Extract the pattern description + pattern_match = re.search(r"\n\t.*([\.\*]*\n)", section) + pattern = pattern_match.group(0).strip() if pattern_match else None + + # Extract the discoverer + discoverer_match = re.search(r"Found by ([\w\s]+) in", section, re.IGNORECASE) + discoverer = discoverer_match.group(1).strip() if discoverer_match else None + + # Add the extracted data to the patterns list + patterns.append({ + "name": name, + "pattern": pattern, + "discoverer": discoverer + }) + + return patterns + +# Load the input file +with open("../lexicon_stripped.txt", "r") as file: + content = file.read() + +# Parse the patterns +parsed_patterns = parse_patterns(content) + +print(parsed_patterns[0]) + +# Output the result as JSON +output_file = "patterns.json" +with open(output_file, "w") as json_file: + json.dump(parsed_patterns, json_file, indent=4) + + + + +print(f"Patterns saved to {output_file}") + |
