forked from CodeCrafter-Guy/PyLex
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
109 lines (85 loc) · 3.38 KB
/
main.py
File metadata and controls
109 lines (85 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
A command-line tool for tokenizing an input file using a specified lexer configuration.
This script reads an input file and a YAML lexer configuration file, tokenizes the input
using the provided lexer, and prints the resulting tokens.
Usage:
python main.py <input_file> <lexer_config>
Arguments:
input_file The path to the input file to be tokenized.
lexer_config The path to the YAML lexer configuration file.
Example:
python main.py source_code.js lexers/javascript.yaml
"""
import argparse
import yaml
import sys
from lexers.lexer import process_tokens, precompile_patterns
from tokenizer.tokenizer import tokenize
def read_file(file_path):
"""
Read the contents of a file and return it as a string.
Parameters:
file_path (str): The path to the file to read.
Returns:
str: The contents of the file.
Raises:
IOError: If the file cannot be opened or read.
"""
try:
with open(file_path, encoding="utf-8") as file:
read_data = file.read()
return read_data
except FileNotFoundError:
print(f"Error: File not found: {file_path}", file=sys.stderr)
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied: {file_path}", file=sys.stderr)
sys.exit(1)
except IOError as e:
print(f"Error: Cannot read file {file_path}: {e}", file=sys.stderr)
sys.exit(1)
def read_lexer_config(config_file_name):
"""
Read a YAML configuration file for the lexer and return the configuration data.
Parameters:
config_file_name (str): The path to the YAML configuration file.
Returns:
dict: The lexer configuration data parsed from the YAML file.
Raises:
IOError: If the file cannot be opened or read.
yaml.YAMLError: If there is an error parsing the YAML file.
"""
try:
with open(config_file_name, 'r') as file:
data = yaml.safe_load(file)
return data
except FileNotFoundError:
print(f"Error: Lexer config file not found: {config_file_name}", file=sys.stderr)
sys.exit(1)
except PermissionError:
print(f"Error: Permission denied: {config_file_name}", file=sys.stderr)
sys.exit(1)
except yaml.YAMLError as e:
print(f"Error: Invalid YAML in lexer config: {e}", file=sys.stderr)
sys.exit(1)
except IOError as e:
print(f"Error: Cannot read lexer config {config_file_name}: {e}", file=sys.stderr)
sys.exit(1)
def main():
"""
The main entry point of the script.
Parses command-line arguments, reads the input file and lexer configuration,
tokenizes the input text using the lexer, and prints the resulting tokens.
"""
parser = argparse.ArgumentParser(description="Tokenize an input file using a lexer configuration.")
parser.add_argument('input_file', type=str, help='The path to the input file to be tokenized.')
parser.add_argument('lexer_config', type=str, help='The path to the YAML lexer configuration file.')
args = parser.parse_args()
input_text = read_file(args.input_file)
lexer_config = read_lexer_config(args.lexer_config)
# Precompile regex patterns for efficient tokenization
precompile_patterns(lexer_config)
tokens = tokenize(input_text, lexer_config, process_tokens)
print(tokens)
if __name__ == "__main__":
main()