-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.py
More file actions
106 lines (84 loc) · 2.63 KB
/
utils.py
File metadata and controls
106 lines (84 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import json
import re
import jsonlines
def format(samples):
formatted_samples = []
for question, answer in samples:
sample = {
"messages": [
{
"role": "user",
"content": question
},
{
"role": "assistant",
"content": answer
}
]
}
formatted_samples.append(sample)
return formatted_samples
def export(samples: list, filename: str):
with jsonlines.open(f'{filename}.jsonl', mode='w') as writer:
writer.write_all(samples)
def debug(*msgs):
print(f'\n{"="*10}\n')
for msg in msgs:
print(msg)
print(f'\n{"="*10}\n')
def format_question(q):
q = q.replace("enAlusus", "c++")
q += "\nPlease return only the code without any explanation and without comments inside the code"
q += "\nPlease make sure to use C++ only"
return [
{
"role": "user",
"content": q,
}
]
def get_cpp_code(client, question, model_tag="gpt-4o-mini"):
messages = format_question(question)
response = client.chat.completions.create(
model=model_tag,
messages=messages
)
return response.choices[0].message.content
def get_docs_ids(cpp_code, docs_index_path):
def exists(feature):
if feature["regex"]:
for pattern in feature["patterns"]:
m = re.search(pattern, cpp_code, re.DOTALL | re.MULTILINE)
if m is not None:
return True
else:
for pattern in feature["patterns"]:
if cpp_code.find(pattern) != -1:
return True
return False
with open(docs_index_path) as f:
c_features_mapper = json.load(f)
docs = []
names = []
for feature in c_features_mapper:
if exists(feature):
docs.extend(feature["ids"])
names.append(feature["name"])
return docs, names
def get_docs(cpp_code, docs_root_dir: str, docs_index_path='docs2id.json'):
docs_ids, _ = get_docs_ids(cpp_code, docs_index_path)
docs = []
for id in docs_ids:
doc_path = os.path.join(docs_root_dir, f"{id}.txt")
with open(doc_path) as f:
doc = f.read()
docs.append(doc)
return docs
def get_rag_question(question, docs):
docs = "\n".join(docs)
return f"""
please answer the following question with the help of the given context about enAlusus programming language.
question: {question},
context: {docs},
answer:
"""