From 00612cc265b0d42c8671be110a030a9d1d12dd73 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Mon, 31 Mar 2025 22:37:24 +0200 Subject: [PATCH 01/17] Implement AI fake tagging in procedure --- handler/Tags.py | 116 ++++++++++++++++++-------------- tests/helper.py | 14 ++++ tests/test_unit_handler_Tags.py | 15 +++-- 3 files changed, 92 insertions(+), 53 deletions(-) diff --git a/handler/Tags.py b/handler/Tags.py index ee0941c0..37e87503 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -108,8 +108,7 @@ def tag(self, if rule_name == 'ai': # AI only - return self.tag_ai(rules, prio=prio, - prio_set=prio_set, dry_run=dry_run) + return self.tag_ai(rules, dry_run=dry_run) # Benutzer Regeln laden rules = self._load_ruleset(rule_name) @@ -122,8 +121,9 @@ def tag(self, raise ValueError('Es existieren noch keine Regeln für den Benutzer') # Benutzer Regeln anwenden - return self.tag_regex(rules, prio=prio, prio_set=prio_set, dry_run=dry_run) - + result_rx = self.tag_regex(rules, prio=prio, prio_set=prio_set, dry_run=dry_run) + result_ai = self.tag_ai(dry_run=dry_run) + return {**result_rx, **result_ai} def tag_regex(self, ruleset: dict, collection: str=None, prio: int=1, prio_set: int=1, dry_run: bool=False) -> dict: @@ -228,34 +228,27 @@ def tag_regex(self, ruleset: dict, collection: str=None, prio: int=1, # Store Result for this Rule result['tagged'] += rule_result.get('tagged') result[rule_name] = rule_result - return result + return result - def tag_ai(self, collection: str=None, prio: int=1, prio_set: int=None, - dry_run: bool=False) -> dict: + def tag_ai(self, collection: str=None, dry_run: bool=False) -> dict: """ Automatisches Tagging mit AI. Args: collection: Name der Collection, in die Werte eingefügt werden sollen. Default: IBAN aus der Config. - prio: Value of priority for this tagging run - in comparison with already tagged transactions (higher = important) - This value will be set as the new priority in DB - prio_set Compare with priority but set this value instead. - Default: prio. dry_run Switch to show, which TX would be updated. Do not update. Returns: dict: - - tagged (int): Summe aller erfolgreichen Taggings (0 bei dry_run) + - guessed (int): Summe aller erfolgreichen Taggings (0 bei dry_run) - ai (dict): - - tagged (int): Anzahl der getaggten Datensätze (0 bei dry_run) - entries (list): UUIDs die selektiert wurden (auch bei dry_run) """ logging.info("Tagging with AI....") # Allgemeine Startfilter für die Condition - query_args = self._form_tag_query(prio, collection=collection) + query_args = self._form_tag_query(collection=collection, ai=True) matched = self.db_handler.select(**query_args) tagged = 0 @@ -266,25 +259,19 @@ def tag_ai(self, collection: str=None, prio: int=1, prio_set: int=None, for row in matched: c, entry = self._ai_tagging(row) count += c - entries += entry.get('uuid') + entries.append(entry.get('uuid')) # Update Request if count and not dry_run: for entry in entries: - if entry.get('primmary') is None: - continue - uuid = entry.get('uuid') query = {'key': 'uuid', 'value': uuid} - new_prio = prio if prio_set is None else prio_set # Updated Category new_category = { - 'prio': new_prio, - 'primary_tag': entry.get('primary'), - 'secondary_tag': entry.get('secondary', 'sonstiges'), + 'guess': entry.get('guess') } updated = self.db_handler.update(data=new_category, condition=query) @@ -297,9 +284,8 @@ def tag_ai(self, collection: str=None, prio: int=1, prio_set: int=None, tagged += updated result = { - 'tagged': tagged, + 'guessed': tagged, 'ai': { - 'tagged': tagged, 'entries': entries } } @@ -307,28 +293,45 @@ def tag_ai(self, collection: str=None, prio: int=1, prio_set: int=None, logging.info("Tagging with AI....DONE") return result - def _form_tag_query(self, prio: int, collection: str=None) -> dict: + def _form_tag_query(self, prio: int=1, collection: str=None, ai=False) -> dict: """ Erstellt die Standardabfrage-Filter für den Ausgangsdatensatz eines Taggings. Args: prio, int: Filter more important tags collection, str: Collection to select from + ai, bool: True if AI Tagging Return: dict: Query Dict for db_handler.select() """ - # Allgemeine Startfilter für die Condition - query_args = { - 'condition': [{ - 'key': 'prio', - 'value': prio, - 'compare': '<' - }], - 'multi': 'AND', - 'collection': collection - } - if collection is not None: - query_args['collection'] = collection + if not ai: + # Allgemeine Startfilter für die Condition + query_args = { + 'condition': [{ + 'key': 'prio', + 'value': prio, + 'compare': '<' + }], + 'multi': 'AND', + 'collection': collection + } + else: + # Startfilter für unkategoriesierte Transaktionen + query_args = { + 'condition': [ + { + 'key': 'primary_tag', + 'value': None, + 'compare': '==' + }, { + 'key': 'secondary_tag', + 'value': None, + 'compare': '==' + } + ], + 'multi': 'OR', + 'collection': collection + } return query_args @@ -344,20 +347,35 @@ def _ai_tagging(self, transaction): tuple(int, dict): Trefferanzahl (0|1), Aktualisierte Transaktion """ #TODO: Fake Methode - list_of_categories = [ - 'Vergnügen', 'Versicherung', 'KFZ', 'Kredite', - 'Haushalt und Lebensmittel', 'Anschaffung', + primary_categories = [ + 'AI_Pri_1', 'AI_Pri_2', 'AI_Pri_3', 'AI_Pri_4', + 'AI_Pri_5', 'AI_Pri_6', None, None, None, None, + None, None, None, None, None, None, None + ] + secondary_categories = [ + 'AI_Sec_1', 'AI_Sec_2', 'AI_Sec_3', 'AI_Sec_4', + 'AI_Sec_5', 'AI_Sec_6', None, None, None, None, + None, None, None, None, None, None, None ] - list_of_categories += 20 * None - found_category = random.choice(list_of_categories) c = 0 - - if found_category is not None: - transaction['primary_tag'] = found_category - transaction['secondary_tag'] = None - c = 1 - + guess = {} + if transaction.get('primary_tag') is not None: + # Guess Primary Tag + found_category = random.choice(primary_categories) + if found_category is not None: + guess['primary_tag'] = found_category + c += 1 + + if transaction.get('secondary_tag') is not None: + # Guess Secondary Tag + found_category = random.choice(secondary_categories) + if found_category is not None: + guess['secondary_tag'] = found_category + c += 1 + + # Store result and return + transaction['guess'] = guess return c, transaction def _load_ruleset(self, rule_name=None, namespace='both'): diff --git a/tests/helper.py b/tests/helper.py index f8c35098..d12876e0 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -154,6 +154,17 @@ def __init__(self): 'compare': 'regex' } ] + self.query3 = [ + { + 'key': 'primary_tag', + 'value': None, + 'compare': '==' + }, { + 'key': 'secondary_tag', + 'value': None, + 'compare': '==' + } + ] self.db_all = [ { @@ -220,6 +231,9 @@ def select(self, collection=None, condition=None, multi=None): # pylint: disable if condition == self.query2: return [self.db_all[4]] + if condition == self.query3: + return self.db_all + return [] def update(self, data, collection=None, condition=None, multi=None): # pylint: disable=unused-argument diff --git a/tests/test_unit_handler_Tags.py b/tests/test_unit_handler_Tags.py index b6b50b9e..95c977e3 100644 --- a/tests/test_unit_handler_Tags.py +++ b/tests/test_unit_handler_Tags.py @@ -98,7 +98,14 @@ def test_regex_custom(): die vom Benutzer hinterlegt worden sind""" return -@pytest.mark.skip(reason="Currently not implemented yet") -def test_ai(): - """Testet das Kategorisieren der Datensätze mit Hilfe der KI""" - return + +def test_ai_guess(test_app): + """Prüft zunächst, ob die Methode für das KI Tagging die + richtigen Datensätze selektiert und ein Guess hinterlässt""" + with test_app.app_context(): + tagger = Tagger(MockDatabase()) + tagging_result = tagger.tag_ai(dry_run=True) + assert tagging_result.get('guessed') == 0, \ + "Die Option dry_run hat trotzdem Datensätze verändert" + assert len(tagging_result.get('ai').get('entries')) == 5, \ + "Die Methode hat nicht die richtige Anzahl an Einträgen getroffen" From 2f3e714ad82115f00416637b0d1f14b01ca63118 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Tue, 1 Apr 2025 21:22:28 +0200 Subject: [PATCH 02/17] frontend implementation of ai Tagging (fake) --- app/static/js/index.js | 2 +- app/templates/index.html | 2 +- app/ui.py | 2 +- handler/Tags.py | 16 ++++++++++------ tests/test_integ_basics.py | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/app/static/js/index.js b/app/static/js/index.js index 7f8bd0be..ab3c7911 100644 --- a/app/static/js/index.js +++ b/app/static/js/index.js @@ -147,7 +147,7 @@ function manualTagEntries() { function getInfo(uuid) { const iban = document.getElementById('input_iban').value; - apiGet('getTx/'+iban+'/'+uuid, {}, function (responseText, error) { + apiGet('/'+iban+'/'+uuid, {}, function (responseText, error) { if (error) { printResult('getTx failed: ' + '(' + error + ')' + responseText); diff --git a/app/templates/index.html b/app/templates/index.html index b38f306c..442d04c6 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -19,7 +19,7 @@ {% for key in table_header %} {% if key != 'parsed' %} - {{ entry.get(key) }} + {{ entry.get(key) or '' }} {% endif %} {% endfor %} diff --git a/app/ui.py b/app/ui.py index 62f6d330..e77aa65a 100644 --- a/app/ui.py +++ b/app/ui.py @@ -182,7 +182,7 @@ def upload(): 'inserted': inserted }, return_code - @current_app.route('/api/getTx//', methods=['GET']) + @current_app.route('/api//', methods=['GET']) def getTx(iban, t_id): """ Gibt alle Details zu einer bestimmten Transaktion zurück. diff --git a/handler/Tags.py b/handler/Tags.py index 37e87503..7ddbc408 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -108,7 +108,7 @@ def tag(self, if rule_name == 'ai': # AI only - return self.tag_ai(rules, dry_run=dry_run) + return self.tag_ai(dry_run=dry_run) # Benutzer Regeln laden rules = self._load_ruleset(rule_name) @@ -259,7 +259,7 @@ def tag_ai(self, collection: str=None, dry_run: bool=False) -> dict: for row in matched: c, entry = self._ai_tagging(row) count += c - entries.append(entry.get('uuid')) + entries.append(entry) # Update Request if count and not dry_run: @@ -274,6 +274,7 @@ def tag_ai(self, collection: str=None, dry_run: bool=False) -> dict: 'guess': entry.get('guess') } updated = self.db_handler.update(data=new_category, condition=query) + updated = updated.get('updated') # soft Exception Handling if not updated: @@ -286,7 +287,7 @@ def tag_ai(self, collection: str=None, dry_run: bool=False) -> dict: result = { 'guessed': tagged, 'ai': { - 'entries': entries + 'entries': [e.get('uuid') for e in entries], } } @@ -360,14 +361,17 @@ def _ai_tagging(self, transaction): c = 0 guess = {} - if transaction.get('primary_tag') is not None: + primary_tag = transaction.get('primary_tag') + if primary_tag is None: # Guess Primary Tag found_category = random.choice(primary_categories) if found_category is not None: - guess['primary_tag'] = found_category + primary_tag = found_category + guess['primary_tag'] = primary_tag c += 1 - if transaction.get('secondary_tag') is not None: + + if primary_tag is not None and transaction.get('secondary_tag') is None: # Guess Secondary Tag found_category = random.choice(secondary_categories) if found_category is not None: diff --git a/tests/test_integ_basics.py b/tests/test_integ_basics.py index bfb007eb..3926f595 100644 --- a/tests/test_integ_basics.py +++ b/tests/test_integ_basics.py @@ -291,7 +291,7 @@ def test_get_tx(test_app): with test_app.test_client() as client: # Get Transaction result = client.get( - f"/api/getTx/{test_app.config['IBAN']}/6884802db5e07ee68a68e2c64f9c0cdd" + f"/api/{test_app.config['IBAN']}/6884802db5e07ee68a68e2c64f9c0cdd" ) assert result.status_code == 200, \ "Der Statuscode der Transaktion war falsch" From dd09b8566f36efdbe8de7ab6f233e47fbf84f0ab Mon Sep 17 00:00:00 2001 From: Pitastic Date: Tue, 1 Apr 2025 22:14:01 +0200 Subject: [PATCH 03/17] metadata speicher vorbereitet --- app/static/js/index.js | 21 +++++++++++++++++++++ app/templates/index.html | 9 +++++++++ app/ui.py | 17 +++++++++++++++++ handler/BaseDb.py | 23 +++++++++++++++++++++++ handler/MongoDb.py | 32 +++++++++++++++++++++++++++++--- handler/Tags.py | 20 +++++++++++++++----- handler/TinyDb.py | 22 ++++++++++++++++++++-- 7 files changed, 134 insertions(+), 10 deletions(-) diff --git a/app/static/js/index.js b/app/static/js/index.js index ab3c7911..436e0960 100644 --- a/app/static/js/index.js +++ b/app/static/js/index.js @@ -144,6 +144,13 @@ function manualTagEntries() { } +/** + * Fetches information based on the provided UUID and IBAN input value. + * + * @param {string} uuid - The unique identifier used to fetch specific information. + * + * This function retrieves the info for a given uuid from the server. + */ function getInfo(uuid) { const iban = document.getElementById('input_iban').value; @@ -157,3 +164,17 @@ function getInfo(uuid) { } }); } + + +function saveRule() { + const rule = document.getElementById('input_rule').value; + apiSubmit('saveRule', {'rule_json': rule}, function (responseText, error) { + if (error) { + printResult('Rule saving failed: ' + '(' + error + ')' + responseText); + + } else { + alert('Rule saved successfully!' + responseText); + + } + }, false); +} diff --git a/app/templates/index.html b/app/templates/index.html index 442d04c6..12db9f2d 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -64,6 +64,13 @@

Switches to Click

truncate Database tag Entries tag Entries (manually) + save Rule + + + +
+ +
@@ -72,6 +79,8 @@

Switches to Click

Message Box

 
+ + {% endblock %} \ No newline at end of file diff --git a/app/ui.py b/app/ui.py index e77aa65a..ffacbcd6 100644 --- a/app/ui.py +++ b/app/ui.py @@ -270,6 +270,23 @@ def setManualTags(iban): return updated_entries + @current_app.route('/api/saveRule/', defaults={'rule_type':'rule'}, methods=['POST']) + @current_app.route('/api/saveRule/', methods=['POST']) + def saveRule(rule_type): + """ + Einfügen oder updaten einer Regel in der Datenbank. + Args (json): + rule_type, str: Typ der Regel (rule | parser) + rule, dict: Regel-Objekt + - name, str: Name der Regel + - rule, dict: Regel-Objekt + """ + #TODO: Beide Arten in einer DB speichern, anhand eines Key aber unterscheiden. + # - Das Dict muss hier noch richtig verpackt werden. + # - Das Select muss dafür optimiert werden. + # - Es müssen für alles mit metadata noch tests geschrieben werden. + raise NotImplementedError() + def _set_manual_tag(self, iban, t_id, data): """ Setzt manuell eine Kategorie für einen bestimmten Eintrag. diff --git a/handler/BaseDb.py b/handler/BaseDb.py index d6bdbe28..10527a7e 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -141,3 +141,26 @@ def _generate_unique(self, tx_entries): return tx_list[0] return tx_list + + def get_metadata(self, key): + """ + Ruft Metadaten aus der Datenbank ab. + + Args: + key (str): Der Schlüssel der Metadaten. + Returns: + dict: Die abgerufenen Metadaten. + """ + raise NotImplementedError() + + def set_metadata(self, key, value): + """ + Speichert oder aktualisiert Metadaten in der Datenbank. + + Args: + key (str): Der Schlüssel der Metadaten. + value (any): Der Wert der Metadaten. + Returns: + dict: Informationen über den Speichervorgang. + """ + raise NotImplementedError() diff --git a/handler/MongoDb.py b/handler/MongoDb.py index 1ac03992..0cef6c41 100644 --- a/handler/MongoDb.py +++ b/handler/MongoDb.py @@ -26,12 +26,24 @@ def __init__(self): def create(self): """ - Erstellt eine Collection je Konto und legt Indexes/Constraints fest + Erstellt eine Collection je Konto und legt Indexes/Constraints fest. + Außerdem wird die Collection für Metadaten erstellt, falls sie noch nicht existiert. """ - self.connection[current_app.config['IBAN']].create_index( - [("uuid", pymongo.TEXT)], unique=True + # Collection für Transaktionen (je Konto) + iban = current_app.config['IBAN'] + if iban not in self.connection.list_collection_names(): + self.connection.create_collection(iban) + self.connection[iban].create_index( + [("uuid", pymongo.TEXT)], unique=True ) + # Collection für Metadaten + if 'metadata' not in self.connection.list_collection_names(): + self.connection.create_collection('metadata') + self.connection['metadata'].create_index( + [("key", pymongo.TEXT)], unique=True + ) + def select(self, collection=None, condition=None, multi='AND'): """ Selektiert Datensätze aus der Datenbank, die die angegebene Bedingung erfüllen. @@ -170,6 +182,20 @@ def truncate(self, collection=None): """ return self.delete(collection=collection) + def get_metadata(self, key): + collection = self.connection['metadata'] + result = collection.find_one({'key': key}) + return result + + def set_metadata(self, key, value): + collection = self.connection['metadata'] + result = collection.update_one( + {'key': key}, + {'$set': {'value': value}}, + upsert=True + ) + return {'updated': result.modified_count} + def _form_condition(self, condition): """ Erstellt aus einem Condition-Dict eine entsprechende Query diff --git a/handler/Tags.py b/handler/Tags.py index 7ddbc408..ccffa44d 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -29,11 +29,7 @@ def parse(self, input_data): # RegExes # Der Key wird als Bezeichner für das Ergebnis verwendet. # Jeder RegEx muss genau eine Gruppe matchen. - parse_regexes = { - 'Mandatsreferenz': re.compile(r"Mandatsref\:\s?([A-z0-9]*)"), - 'Gläubiger-ID': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{11})"), - 'Gläubiger-ID-2': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{19})"), - } + parse_regexes = self._load_parsers() for d in input_data: for name, regex in parse_regexes.items(): @@ -382,6 +378,20 @@ def _ai_tagging(self, transaction): transaction['guess'] = guess return c, transaction + def _load_parsers(self) -> dict: + """ + Parser ermöglichen das Extrahieren von Kerninformationen aus dem Buchungstext. + Die Ergebnisse können für Entscheidung beim Tagging genutzt werden. + Der Key wird als Bezeichner für das Ergebnis verwendet. + Jeder RegEx muss genau eine Gruppe matchen. + """ + parsers = { + 'Mandatsreferenz': re.compile(r"Mandatsref\:\s?([A-z0-9]*)"), + 'Gläubiger-ID': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{11})"), + 'Gläubiger-ID-2': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{19})"), + } + return parsers + def _load_ruleset(self, rule_name=None, namespace='both'): """ Load Rules from the Settings of for the requesting User. diff --git a/handler/TinyDb.py b/handler/TinyDb.py index 8c31bf15..819c578b 100644 --- a/handler/TinyDb.py +++ b/handler/TinyDb.py @@ -35,11 +35,15 @@ def __init__(self): def create(self): """ - Erstellt einen Table je Konto und legt Indexes/Constraints fest + Erstellt einen Table je Konto und legt Indexes/Constraints fest. + Außerdem wird der Table für Metadaten erstellt, falls er noch nicht existiert. """ - # Touch Table + # Touch Table für Transaktionen (je Konto) self.connection.table(current_app.config['IBAN']) + # Table für Metadaten + self.connection.table('metadata') + def select(self, collection=None, condition=None, multi='AND'): """ Selektiert Datensätze aus der Datenbank, die die angegebene Bedingung erfüllen. @@ -207,6 +211,20 @@ def truncate(self, collection=None): r = table.remove(lambda x: True) return {'deleted': len(r)} + def get_metadata(self, key): + collection = self.connection.table('metadata') + result = collection.get(Query().key == key) + return result + + def set_metadata(self, key, value): + collection = self.connection.table('metadata') + existing = collection.get(Query().key == key) + if existing: + collection.update({'value': value}, Query().key == key) + else: + collection.insert({'key': key, 'value': value}) + return {'updated': 1} + def _form_where(self, condition): """ Erstellt aus einem Condition-Dict eine entsprechende Query From 6bc0eefbd1f817b20b057c037ae7f4788e5078d4 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Wed, 2 Apr 2025 17:40:13 +0200 Subject: [PATCH 04/17] Zwischenspeicher "Metadaten in DB" --- handler/BaseDb.py | 51 ++++++++++++++++++++++++++++++++++++++-------- handler/MongoDb.py | 29 +++++++++++++++++--------- handler/TinyDb.py | 30 ++++++++++++++++++--------- 3 files changed, 82 insertions(+), 28 deletions(-) diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 10527a7e..365bd601 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -105,12 +105,13 @@ def truncate(self, collection): """ raise NotImplementedError() - def _generate_unique(self, tx_entries): + def _generate_unique(self, tx_entries, salt=""): """ Erstellt einen einmaligen ID für jede Transaktion. Args: tx_entries (dict | list(dict)): Liste mit Transaktionsobjekten + salt (str): Optionaler String, der zur Erstellung der ID verwendet wird. Returns: dict | list(dict): Die um die IDs ('uuid') erweiterte Eingabeliste """ @@ -127,7 +128,8 @@ def _generate_unique(self, tx_entries): tx_text = no_special_chars.sub('', transaction.get('text_tx', '')) combined_string = str(transaction.get('date_tx', '')) + \ str(transaction.get('betrag', '')) + \ - tx_text + tx_text + \ + salt md5_hash.update(combined_string.encode('utf-8')) # Store UUID @@ -142,24 +144,57 @@ def _generate_unique(self, tx_entries): return tx_list - def get_metadata(self, key): + def _generate_unique_meta(self, entry): + """ + Generiert eine eindeutige UUID für Metadaten basierend auf dem Eintrag. + Args: + entry (dict): Eintrag für den die UUID generiert werden soll. + Returns: + dict: Das um die ID ('uuid') erweiterte Dict mit den Metadaten. + """ + no_special_chars = re.compile("[^A-Za-z0-9]") + + # Calculate Hash + md5_hash = hashlib.md5() + uuid_text = f"{entry.get('type', '')}-{entry.get('name', '')}" + uuid_text = no_special_chars.sub('', uuid_text) + md5_hash.update(uuid_text.encode('utf-8')) + + # Store UUID + entry['uuid'] = md5_hash.hexdigest() + + return entry + + def get_metadata(self, uuid): """ Ruft Metadaten aus der Datenbank ab. Args: - key (str): Der Schlüssel der Metadaten. + uuid (str): Unique ID (key). + Returns: + dict: Die abgerufenen Metadaten. + """ + raise NotImplementedError() + + def filter_metadata(self, condition, multi): + """ + Ruft Metadaten aus der Datenbank anhand von Kriterien ab. + + Args: + condition (dict): key-value-Paare für die Filterung der Metadaten. + multi (str) : ['AND' | 'OR'] Wenn 'condition' eine Liste mit conditions ist, + werden diese logisch wie hier angegeben verknüpft. Default: 'AND' Returns: dict: Die abgerufenen Metadaten. """ raise NotImplementedError() - def set_metadata(self, key, value): + def set_metadata(self, entry): """ - Speichert oder aktualisiert Metadaten in der Datenbank. + Speichert oder ersetzt Metadaten in der Datenbank. Args: - key (str): Der Schlüssel der Metadaten. - value (any): Der Wert der Metadaten. + entry (dict): Der Eintrag, der gespeichert werden soll. Returns: dict: Informationen über den Speichervorgang. """ diff --git a/handler/MongoDb.py b/handler/MongoDb.py index 0cef6c41..45df1b45 100644 --- a/handler/MongoDb.py +++ b/handler/MongoDb.py @@ -41,7 +41,7 @@ def create(self): if 'metadata' not in self.connection.list_collection_names(): self.connection.create_collection('metadata') self.connection['metadata'].create_index( - [("key", pymongo.TEXT)], unique=True + [("uuid", pymongo.TEXT)], unique=True ) def select(self, collection=None, condition=None, multi='AND'): @@ -182,19 +182,28 @@ def truncate(self, collection=None): """ return self.delete(collection=collection) - def get_metadata(self, key): + def get_metadata(self, uuid): collection = self.connection['metadata'] - result = collection.find_one({'key': key}) + result = collection.find_one({'uuid': uuid}) return result - def set_metadata(self, key, value): + def filter_metadata(self, condition, multi='AND'): collection = self.connection['metadata'] - result = collection.update_one( - {'key': key}, - {'$set': {'value': value}}, - upsert=True - ) - return {'updated': result.modified_count} + query = self._form_complete_query(condition, multi) + return list(collection.find(query)) + + def set_metadata(self, entry): + # Set uuid if not present + if not entry.get('uuid'): + entry = self._generate_unique_meta(entry) + + # Remove Entry if exists + collection = self.connection['metadata'] + result = collection.delete_one({'uuid': entry.get('uuid')}) + + # Insert new Entry + result = collection.insert_one(entry) + return {'inserted': result.modified_count} def _form_condition(self, condition): """ diff --git a/handler/TinyDb.py b/handler/TinyDb.py index 819c578b..88eaeceb 100644 --- a/handler/TinyDb.py +++ b/handler/TinyDb.py @@ -123,7 +123,7 @@ def insert(self, data, collection=None): return {'inserted': 0} result = self.connection.table(collection).insert(data) - return {'inserted': 1} + return {'inserted': (1 if result else 0)} def update(self, data, collection=None, condition=None, multi='AND'): """ @@ -211,19 +211,29 @@ def truncate(self, collection=None): r = table.remove(lambda x: True) return {'deleted': len(r)} - def get_metadata(self, key): + def get_metadata(self, uuid): collection = self.connection.table('metadata') - result = collection.get(Query().key == key) + result = collection.get(Query().uuid == uuid) return result - def set_metadata(self, key, value): + def filter_metadata(self, condition, multi='AND'): collection = self.connection.table('metadata') - existing = collection.get(Query().key == key) - if existing: - collection.update({'value': value}, Query().key == key) - else: - collection.insert({'key': key, 'value': value}) - return {'updated': 1} + query = self._form_complete_query(condition, multi) + results = collection.search(query) + return results + + def set_metadata(self, entry): + # Set uuid if not present + if not entry.get('uuid'): + entry = self._generate_unique_meta(entry) + + # Remove Entry if exists + collection = self.connection.table('metadata') + collection.remove(Query().uuid == entry.get('uuid')) + + # Insert new Entry + result = collection.insert(entry) + return {'inserted': (1 if result else 0)} def _form_where(self, condition): """ From 98fb42e68293d2ba3c89e795c8fbddb635a945b5 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Wed, 2 Apr 2025 21:21:44 +0200 Subject: [PATCH 05/17] clean self.data db flush --- app/ui.py | 42 +++++++++---------------------- handler/BaseDb.py | 6 ++--- tests/test_integ_app_protected.py | 31 +++++------------------ 3 files changed, 20 insertions(+), 59 deletions(-) diff --git a/app/ui.py b/app/ui.py index ffacbcd6..e3b37dd7 100644 --- a/app/ui.py +++ b/app/ui.py @@ -51,7 +51,6 @@ def __init__(self): self.tagger = Tagger(self.db_handler) # Weitere Attribute - self.data = None self.reader = None #TODO: Usermanagement, #7 @@ -130,7 +129,7 @@ def index(iban) -> str: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - @current_app.route('/api/upload', methods=['POST']) - def upload(): + def uploadIban(): """ Endpunkt für das Annehmen hochgeladener Kontoumsatzdateien. Im Anschluss wird automatisch die Untersuchung der Inhalte angestoßen. @@ -167,11 +166,14 @@ def upload(): } # Read Input and Parse the contents - self._read_input(path, data_format=content_formats.get(content_type)) + parsed_data = self._read_input( + path, data_format=content_formats.get(content_type) + ) # Verarbeitete Kontiumsätze in die DB speichern # und vom Objekt und Dateisystem löschen - inserted = self._flush_to_db() + insert_result = self.db_handler.insert(parsed_data) + inserted = insert_result.get('inserted') os.remove(path) return_code = 201 if inserted else 200 @@ -179,7 +181,7 @@ def upload(): 'size': size, 'filename': input_file.filename, 'content_type': content_type, - 'inserted': inserted + 'inserted': inserted, }, return_code @current_app.route('/api//', methods=['GET']) @@ -332,7 +334,7 @@ def _read_input(self, uri, bank='Generic', data_format=None): bank (str): Bezeichnung der Bank bzw. des einzusetzenden Readers. format (str, optional): Bezeichnung des Ressourcenformats (http, csv, pdf). Returns: - int: Anzahl an geparsten Einträgen + list(dict): Geparste und getaggte Kontoumsätze """ # Format if data_format is None: @@ -350,28 +352,8 @@ def _read_input(self, uri, bank='Generic', data_format=None): 'http': self.reader.from_http }.get(data_format) - self.data = parsing_method(uri) - if self.data is not None: - self.data = self._parse(self.data) - return len(self.data) - - return 0 + data = parsing_method(uri) + if data is None: + return [] - def _parse(self, input_data=None): - """Hanlder für den gleichnamigen Methodenaufruf beim Taggers""" - # Parsing Data - #TODO: Daten nicht aus self.data, sondern DB nach Signal, #8 - if input_data is None: - input_data = self.data - return self.tagger.parse(input_data) - - def _flush_to_db(self) -> int: - """ - Speichert die eingelesenen Kontodaten in der Datenbank und bereinigt den Objektspeicher. - - Returns: - int: Die Anzahl der eingefügten Datensätze - """ - inserted_rows = self.db_handler.insert(self.data) - self.data = None - return inserted_rows.get('inserted') + return self.tagger.parse(data) diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 365bd601..8fea7897 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -105,13 +105,12 @@ def truncate(self, collection): """ raise NotImplementedError() - def _generate_unique(self, tx_entries, salt=""): + def _generate_unique(self, tx_entries): """ Erstellt einen einmaligen ID für jede Transaktion. Args: tx_entries (dict | list(dict)): Liste mit Transaktionsobjekten - salt (str): Optionaler String, der zur Erstellung der ID verwendet wird. Returns: dict | list(dict): Die um die IDs ('uuid') erweiterte Eingabeliste """ @@ -128,8 +127,7 @@ def _generate_unique(self, tx_entries, salt=""): tx_text = no_special_chars.sub('', transaction.get('text_tx', '')) combined_string = str(transaction.get('date_tx', '')) + \ str(transaction.get('betrag', '')) + \ - tx_text + \ - salt + tx_text md5_hash.update(combined_string.encode('utf-8')) # Store UUID diff --git a/tests/test_integ_app_protected.py b/tests/test_integ_app_protected.py index 44f91e0c..56f21fb8 100644 --- a/tests/test_integ_app_protected.py +++ b/tests/test_integ_app_protected.py @@ -18,11 +18,13 @@ def test_read_input_csv(test_app): ), bank='Commerzbank', data_format=None) # Check Return Value - assert found_rows == 5, (f'Es wurden {found_rows} statt der ' + found_rows_len = len(found_rows) + assert found_rows_len == 5, (f'Es wurden {found_rows_len} statt der ' 'erwarteten 5 Einträge aus der Datei eingelesen.') - assert len(test_app.host.data) == 5, \ - (f'Es wurden {len(test_app.host.data)} Einträge statt ' - 'der 5 erwarteten in der Instanz UserInterface gespeichert') + # Savev to DB for next Tests + r = test_app.host.db_handler.insert(found_rows, test_app.config['IBAN']) + assert r.get('inserted') == 5, \ + "Es wurden nicht alle Einträge in die DB eingefügt." @pytest.mark.skip(reason="Currently not implemented yet") def test_read_input_pdf(): @@ -42,27 +44,6 @@ def test_read_input_html(): """ return -def test_flush_to_db(test_app): - """Testet das Wegschreiben von Daten aus der Instanz in die Datenbank""" - - # Muss Daten in der Instanz haben - # Leeren und 5 Datensätze einlesen - test_app.host.data = None - test_read_input_csv(test_app) - - with test_app.app_context(): - # Methode ausführen - inserted = test_app.host._flush_to_db() # pylint: disable=protected-access - - # Überprüfen - assert inserted == 5, \ - (f'Es wurden {inserted} Einträge statt ' - 'der 5 erwarteten von UserInterface gespeichert') - - r = test_app.host.db_handler.select() - assert len(r) == 5, \ - (f'Es wurden {len(r)} Einträge statt ' - 'der 5 erwarteten von UserInterface gespeichert') def test_set_manual_tag(test_app): """ From 76015d57374df1540d1890251dcf8fdcaeeb9a8d Mon Sep 17 00:00:00 2001 From: Pitastic Date: Wed, 2 Apr 2025 22:45:44 +0200 Subject: [PATCH 06/17] Speichern von Metadaten per File Upload --- Models.md | 19 +++--- app/static/js/index.js | 27 +++++--- app/templates/index.html | 9 ++- app/ui.py | 130 +++++++++++++++++++++++++++++-------- handler/Tags.py | 7 +- tests/test_integ_basics.py | 29 ++++++--- 6 files changed, 161 insertions(+), 60 deletions(-) diff --git a/Models.md b/Models.md index 8f808934..dbf386da 100644 --- a/Models.md +++ b/Models.md @@ -4,7 +4,7 @@ ``` { - 'uuid': str, + 'uuid': str, # (generated) 'date_tx': int, # (UTC) 'text_tx': str, 'betrag': float, @@ -46,19 +46,22 @@ } ``` -### Dictionary eines Rulesets +### Dictionary eines Rulesets (Tag/Parse) ``` { - 'primary': str, - 'regex': r-str( RegEx ), # (optional if parsed) - 'parsed': dict( # (optional if regex) - str( parsed-Key ) : r-str( RegEx ) - ) + 'uuid': str # (generated) + 'metatype': str # (config|regex|parser) + 'name': str, + 'regex': r-str( RegEx ), + + ----------- bei Rules ---------- + + 'primary': str | None, + 'secondary': str | None, ----------- optional ----------- - 'secondary': str, 'prioriry': int } ``` diff --git a/app/static/js/index.js b/app/static/js/index.js index 436e0960..87281e7c 100644 --- a/app/static/js/index.js +++ b/app/static/js/index.js @@ -24,6 +24,7 @@ function printResult(result){ * The file is selected via the file input element 'input_file'. */ function uploadFile() { + const iban = document.getElementById('input_iban').value; const fileInput = document.getElementById('input_file'); if (fileInput.files.length === 0) { alert('Please select a file to upload.'); @@ -31,7 +32,7 @@ function uploadFile() { } const params = { file: 'input_file' }; // The key 'file' corresponds to the input element's ID - apiSubmit('upload', params, function (responseText, error) { + apiSubmit('upload/' + iban, params, function (responseText, error) { if (error) { printResult('File upload failed: ' + '(' + error + ')' + responseText); @@ -51,7 +52,7 @@ function uploadFile() { function truncateDB() { const iban = document.getElementById('input_iban').value; - apiGet('truncateDatabase/'+iban, {}, function (responseText, error) { + apiGet(iban + '/truncateDatabase/'+iban, {}, function (responseText, error) { if (error) { printResult('Truncate failed: ' + '(' + error + ')' + responseText); @@ -72,12 +73,13 @@ function truncateDB() { */ function tagEntries() { // TODO: Implement more, complex tagging rules + const iban = document.getElementById('input_iban').value; const rule_name = document.getElementById('input_tagging_name').value; const rules = { 'rule_name': rule_name } - apiSubmit('tag', rules, function (responseText, error) { + apiSubmit(iban + '/tag', rules, function (responseText, error) { if (error) { printResult('Tagging failed: ' + '(' + error + ')' + responseText); @@ -125,9 +127,9 @@ function manualTagEntries() { let api_function; if (t_ids.length == 1) { - api_function = 'setManualTag/'+iban+'/'+t_ids[0]; + api_function = iban+'/setManualTag/'+t_ids[0]; } else { - api_function = 'setManualTags/' + iban; + api_function = iban+'/setManualTags'; tags['t_ids'] = t_ids; }; @@ -166,9 +168,16 @@ function getInfo(uuid) { } -function saveRule() { - const rule = document.getElementById('input_rule').value; - apiSubmit('saveRule', {'rule_json': rule}, function (responseText, error) { +function saveMeta() { + const meta_type = document.getElementById('select_meta').value; + const fileInput = document.getElementById('input_file'); + if (fileInput.files.length === 0) { + alert('Please select a file to upload.'); + return; + } + + const params = { file: 'input_file' }; // The key 'file' corresponds to the input element's ID + apiSubmit('upload/metadata/'+meta_type, params, function (responseText, error) { if (error) { printResult('Rule saving failed: ' + '(' + error + ')' + responseText); @@ -176,5 +185,5 @@ function saveRule() { alert('Rule saved successfully!' + responseText); } - }, false); + }, true); } diff --git a/app/templates/index.html b/app/templates/index.html index 12db9f2d..62e8b635 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -64,13 +64,18 @@

Switches to Click

truncate Database tag Entries tag Entries (manually) - save Rule + save Meta
- + +
diff --git a/app/ui.py b/app/ui.py index e3b37dd7..a29b06f1 100644 --- a/app/ui.py +++ b/app/ui.py @@ -3,6 +3,7 @@ import sys import os +import json import logging from datetime import datetime from flask import request, current_app, render_template @@ -128,8 +129,8 @@ def index(iban) -> str: # - API Endpoints - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @current_app.route('/api/upload', methods=['POST']) - def uploadIban(): + @current_app.route('/api/upload/', methods=['POST']) + def uploadIban(iban): """ Endpunkt für das Annehmen hochgeladener Kontoumsatzdateien. Im Anschluss wird automatisch die Untersuchung der Inhalte angestoßen. @@ -143,19 +144,9 @@ def uploadIban(): if not input_file: return {'error': 'No file provided'}, 400 - content_type = input_file.content_type - size = 0 - path = '/tmp/upload.file' - with open(path, 'wb') as f: - - while True: - data = input_file.read(8192) - - if not data: - break - - size += len(data) - f.write(data) + # Store Upload file to tmp + path = '/tmp/transactions.tmp' + content_type, size = self._mv_fileupload(input_file, path) # Daten einlesen und in Object speichern (Bank und Format default bzw. wird geraten) content_formats = { @@ -172,7 +163,7 @@ def uploadIban(): # Verarbeitete Kontiumsätze in die DB speichern # und vom Objekt und Dateisystem löschen - insert_result = self.db_handler.insert(parsed_data) + insert_result = self.db_handler.insert(parsed_data, iban) inserted = insert_result.get('inserted') os.remove(path) @@ -184,6 +175,26 @@ def uploadIban(): 'inserted': inserted, }, return_code + @current_app.route('/api/upload/metadata/', methods=['POST']) + def uploadRules(metadata): + """ + Endpunkt für das Annehmen hochgeladener Tagging- und Parsingregeln.. + + Args (uri, multipart/form-data): + metadata (str): [regex|parser|config] Type of Metadata to save + input_file (binary): Dateiupload aus Formular-Submit + Returns: + json: Informationen zur Datei und Ergebnis der Untersuchung. + """ + input_file = request.files.get('input_file') + if not input_file: + return {'error': 'No file provided'}, 400 + + # Store Upload file to tmp + path = f'/tmp/{metadata}.tmp' + _ = self._mv_fileupload(input_file, path) + return self._read_settings(path, metatype=metadata) + @current_app.route('/api//', methods=['GET']) def getTx(iban, t_id): """ @@ -203,8 +214,7 @@ def getTx(iban, t_id): ) return tx_details[0], 200 - @current_app.route('/api/truncateDatabase/', defaults={'iban':None}, methods=['DELETE']) - @current_app.route('/api/truncateDatabase/', methods=['DELETE']) + @current_app.route('/api//truncateDatabase', methods=['DELETE']) def truncateDatabase(iban): """ Leert die Datenbank zu einer IBAN @@ -217,8 +227,8 @@ def truncateDatabase(iban): deleted_entries = self.db_handler.truncate(iban) return {'deleted': deleted_entries}, 200 - @current_app.route('/api/tag', methods=['PUT']) - def tag() -> dict: + @current_app.route('/api//tag', methods=['PUT']) + def tag(iban) -> dict: """ Kategorisiert die Kontoumsätze und aktualisiert die Daten in der Instanz. Die Argumente werden nach Prüfung an die Tagger-Klasse weitergegeben. @@ -228,10 +238,10 @@ def tag() -> dict: Returns: json: Informationen zum Ergebnis des Taggings. """ - return self.tagger.tag(**request.json) + return self.tagger.tag(iban, **request.json) - @current_app.route('/api/setManualTag//', methods=['PUT']) - def setManualTag(iban, t_id, data=None): + @current_app.route('/api//setManualTag/', methods=['PUT']) + def setManualTag(iban, t_id): """ Handler für _set_manual_tag() für einzelne Einträge. @@ -244,11 +254,10 @@ def setManualTag(iban, t_id, data=None): Returns: dict: updated, int: Anzahl der gespeicherten Datensätzen """ - if data is None: - data = request.json + data = request.json return self._set_manual_tag(iban, t_id, data) - @current_app.route('/api/setManualTags/', methods=['PUT']) + @current_app.route('/api//setManualTags', methods=['PUT']) def setManualTags(iban): """ Handler für _set_manual_tag() für mehrere Einträge. @@ -284,9 +293,10 @@ def saveRule(rule_type): - rule, dict: Regel-Objekt """ #TODO: Beide Arten in einer DB speichern, anhand eines Key aber unterscheiden. - # - Das Dict muss hier noch richtig verpackt werden. - # - Das Select muss dafür optimiert werden. # - Es müssen für alles mit metadata noch tests geschrieben werden. + # - Bisher ist nur der Import via FileUpload möglich + # - Das speichern eines Regexes in JSON ist noch problematisch + # - Laden, Filtern und Auflisten testen raise NotImplementedError() def _set_manual_tag(self, iban, t_id, data): @@ -323,6 +333,31 @@ def _set_manual_tag(self, iban, t_id, data): updated_entries = self.db_handler.update(new_tag_data, iban, condition) return updated_entries + def _mv_fileupload(self, input_file, path): + """ + Verschiebt die hochgeladene Datei in ein temporäres Verzeichnis. + + Args: + input_file (binary): Dateiupload aus Formular-Submit + path (str): Pfad zur temporären Datei + Returns: + str: Content-Type der Datei + """ + content_type = input_file.content_type + size = 0 + with open(path, 'wb') as f: + + while True: + data = input_file.read(8192) + + if not data: + break + + size += len(data) + f.write(data) + + return content_type, size + def _read_input(self, uri, bank='Generic', data_format=None): """ Liest Kontoumsätze aus der Ressource ein. Wenn das Format nicht angegeben ist, @@ -357,3 +392,42 @@ def _read_input(self, uri, bank='Generic', data_format=None): return [] return self.tagger.parse(data) + + def _read_settings(self, uri, metatype): + """ + Liest eine Datei mit Metadaten ein, die entweder Konfigurationen, + Regeln für das Tagging oder Regeln für das Parsing enthalten kann. + + Args: + uri (str): Pfad zur JSON mit den Eingabedaten. + metatype (str): [rule|parser|config] Art der Metadaten. + Sie dürfen nicht gemischt vorliegen. + Returns: + list(dict): Geparste Objekte für das Einfügen in die Datenbank. + """ + #with open(uri, 'rb') as infile: + with open(uri, 'r', encoding='utf-8') as infile: + parsed_data = json.load(infile) + + if isinstance(parsed_data, list): + + for entry in parsed_data.keys(): + parsed_data[entry]['metatype'] = metatype + + else: + parsed_data['metatype'] = metatype + parsed_data = [parsed_data] + + # Verarbeitete Metadataen in die DB speichern + # und vom Objekt und Dateisystem löschen + inserted = 0 + for data in parsed_data: + inserted += self.db_handler.set_metadata(data).get('inserted') + + os.remove(uri) + + return_code = 201 if inserted else 200 + return { + 'metatype': metatype, + 'inserted': inserted, + }, return_code diff --git a/handler/Tags.py b/handler/Tags.py index ccffa44d..e605abaa 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -39,7 +39,7 @@ def parse(self, input_data): return input_data - def tag(self, + def tag(self, iban, rule_name: str = None, rule_primary: str = None, rule_secondary: str = None, rule_regex: str = None, rule_parsed_keys: list = (), rule_parsed_vals: list = (), prio: int = 1, prio_set: int = None, dry_run: bool = False) -> dict: @@ -48,6 +48,7 @@ def tag(self, Args: data (dict): Dictionary mit den Parametern für das Tagging: + iban Name der Collection rule_name: Name der anzuwendenden Taggingregel. Reserviertes Keyword 'ai' führt nur das AI Tagging aus. Default: Es werden alle Regeln des Benutzers ohne das @@ -117,8 +118,8 @@ def tag(self, raise ValueError('Es existieren noch keine Regeln für den Benutzer') # Benutzer Regeln anwenden - result_rx = self.tag_regex(rules, prio=prio, prio_set=prio_set, dry_run=dry_run) - result_ai = self.tag_ai(dry_run=dry_run) + result_rx = self.tag_regex(rules, iban, prio=prio, prio_set=prio_set, dry_run=dry_run) + result_ai = self.tag_ai(iban, dry_run=dry_run) return {**result_rx, **result_ai} def tag_regex(self, ruleset: dict, collection: str=None, prio: int=1, diff --git a/tests/test_integ_basics.py b/tests/test_integ_basics.py index 3926f595..ee24ee2a 100644 --- a/tests/test_integ_basics.py +++ b/tests/test_integ_basics.py @@ -23,7 +23,7 @@ def test_truncate(test_app): with test_app.app_context(): with test_app.test_client() as client: - result = client.delete('/api/truncateDatabase/') + result = client.delete(f'/api/{test_app.config['IBAN']}/truncateDatabase') assert result.status_code == 200, "Fehler beim Leeren der Datenbank" @@ -53,7 +53,10 @@ def test_upload_csv_commerzbank(test_app): content = get_testfile_contents(EXAMPLE_CSV, binary=True) files = {'input_file': (io.BytesIO(content), 'commerzbank.csv')} # Post File - result = client.post("/api/upload", data=files, content_type='multipart/form-data') + result = client.post( + f"/api/upload/{test_app.config['IBAN']}", + data=files, content_type='multipart/form-data' + ) # Check Response assert result.status_code == 201, \ @@ -130,7 +133,10 @@ def test_double_upload(test_app): content = get_testfile_contents(EXAMPLE_CSV, binary=True) files = {'input_file': (io.BytesIO(content), 'commerzbank.csv')} # Post File 1 - result = client.post("/api/upload", data=files, content_type='multipart/form-data') + result = client.post( + f"/api/upload/{test_app.config['IBAN']}", + data=files, content_type='multipart/form-data' + ) # Check Response assert result.status_code == 201, \ @@ -140,7 +146,10 @@ def test_double_upload(test_app): # Post File 2 files = {'input_file': (io.BytesIO(content), 'commerzbank.csv')} - result = client.post("/api/upload", data=files, content_type='multipart/form-data') + result = client.post( + f"/api/upload/{test_app.config['IBAN']}", + data=files, content_type='multipart/form-data' + ) # Check Response (same TX: Keine neuen Einträge angelegt) assert result.status_code == 200, \ @@ -168,7 +177,7 @@ def test_tag_stored(test_app): 'dry_run': True, 'prio': 2 } - result = client.put("/api/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) result = result.json assert result.get('tagged') == 0, \ @@ -183,7 +192,7 @@ def test_tag_stored(test_app): 'rule_name': 'City Tax', 'prio': 2 } - result = client.put("/api/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) result = result.json assert result.get('tagged') == 1, \ @@ -217,7 +226,7 @@ def test_own_rules(test_app): 'rule_regex': r'EDEKA', 'prio': 0, } - result = client.put("/api/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) result = result.json # Es sollte eine Transaktion zutreffen, @@ -238,7 +247,7 @@ def test_own_rules(test_app): 'prio': 9, 'prio_set': 3, } - result = client.put("/api/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) result = result.json assert result.get('tagged') == 1, \ @@ -258,7 +267,7 @@ def test_manual_tagging(test_app): 'secondary_tag': 'Test_SECONDARY' } r = client.put( - f"/api/setManualTag/{test_app.config['IBAN']}/6884802db5e07ee68a68e2c64f9c0cdd", + f"/api/{test_app.config['IBAN']}/setManualTag/6884802db5e07ee68a68e2c64f9c0cdd", json=new_tag ) r = r.json @@ -277,7 +286,7 @@ def test_manual_multi_tagging(test_app): "fdd4649484137572ac642e2c0f34f9af"] } r = client.put( - f"/api/setManualTags/{test_app.config['IBAN']}", + f"/api/{test_app.config['IBAN']}/setManualTags", json=new_tag ) r = r.json From 5d420aa1643c15aaa5bedbf80dc6897a241758f8 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Thu, 3 Apr 2025 22:14:32 +0200 Subject: [PATCH 07/17] Rules und Configs in DB; ungetestet --- Models.md | 8 +++-- app/config.py | 15 -------- app/server.py | 10 ++++-- app/static/js/index.js | 2 +- app/templates/index.html | 2 +- app/ui.py | 12 ++++--- configs/config_default.json | 10 ++++++ configs/parser_default.json | 11 ++++++ configs/rule_default.json | 20 +++++++++++ handler/Tags.py | 68 ++++++++++++------------------------- 10 files changed, 86 insertions(+), 72 deletions(-) delete mode 100644 app/config.py create mode 100644 configs/config_default.json create mode 100644 configs/parser_default.json create mode 100644 configs/rule_default.json diff --git a/Models.md b/Models.md index dbf386da..2229c3d9 100644 --- a/Models.md +++ b/Models.md @@ -51,7 +51,7 @@ ``` { 'uuid': str # (generated) - 'metatype': str # (config|regex|parser) + 'metatype': str # (config|rule|parser) 'name': str, 'regex': r-str( RegEx ), @@ -62,6 +62,10 @@ ----------- optional ----------- - 'prioriry': int + 'prioriry': int, + 'parsed': dict( + 'multi': str, # (AND|OR) + 'query': dict # (key=Name, val=Value) + ) } ``` diff --git a/app/config.py b/app/config.py deleted file mode 100644 index 03c406e8..00000000 --- a/app/config.py +++ /dev/null @@ -1,15 +0,0 @@ -LOG_ACCESS_FILE = '/tmp/pynance_access.log' -LOG_ERROR_FILE = '/tmp/pynance_error.log' - -# Options: -DATABASE_BACKEND = 'tiny' -#DATABASE_BACKEND = 'mongo' - -#DATABASE_URI = 'mongodb://testuser:testpassword@localhost:27017' # For mongo (URI) -DATABASE_URI = '/tmp' # For tiny (/path/to/) - -# For tiny: Filename ('testdata.json') -# For mongo: Collection name ('testdata') -DATABASE_NAME = 'testdata.json' - -IBAN = 'DE89370400440532013000' diff --git a/app/server.py b/app/server.py index c67367bb..45090603 100644 --- a/app/server.py +++ b/app/server.py @@ -3,6 +3,7 @@ import os import sys +import json from logging.config import dictConfig from flask import Flask @@ -41,7 +42,7 @@ def create_app(config_path: str) -> Flask: ) # Global Config - app.config.from_pyfile(config_path) + app.config.from_file(config_path, load=json.load) if app.config.get('DATABASE_BACKEND') is None: raise IOError(f"Config Pfad '{config_path}' konnte nicht geladen werden !") @@ -52,8 +53,11 @@ def create_app(config_path: str) -> Flask: if __name__ == '__main__': config = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - 'config.py' + os.path.dirname( + os.path.dirname( + os.path.abspath(__file__) + ) + ), 'configs', 'config_default.json' ) application = create_app(config) application.run(host='0.0.0.0', port=8110, debug=True) diff --git a/app/static/js/index.js b/app/static/js/index.js index 87281e7c..6eb1c6b5 100644 --- a/app/static/js/index.js +++ b/app/static/js/index.js @@ -52,7 +52,7 @@ function uploadFile() { function truncateDB() { const iban = document.getElementById('input_iban').value; - apiGet(iban + '/truncateDatabase/'+iban, {}, function (responseText, error) { + apiGet(iban + '/truncateDatabase', {}, function (responseText, error) { if (error) { printResult('Truncate failed: ' + '(' + error + ')' + responseText); diff --git a/app/templates/index.html b/app/templates/index.html index 62e8b635..c724d041 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -72,7 +72,7 @@

Switches to Click

diff --git a/app/ui.py b/app/ui.py index a29b06f1..3c69bd0f 100644 --- a/app/ui.py +++ b/app/ui.py @@ -405,14 +405,18 @@ def _read_settings(self, uri, metatype): Returns: list(dict): Geparste Objekte für das Einfügen in die Datenbank. """ - #with open(uri, 'rb') as infile: with open(uri, 'r', encoding='utf-8') as infile: - parsed_data = json.load(infile) + try: + parsed_data = json.load(infile) + + except json.JSONDecodeError as e: + logging.warning(f"Failed to parse JSON file: {e}") + return {'error': 'Invalid file format (not json)'}, 400 if isinstance(parsed_data, list): - for entry in parsed_data.keys(): - parsed_data[entry]['metatype'] = metatype + for i, _ in enumerate(parsed_data): + parsed_data[i]['metatype'] = metatype else: parsed_data['metatype'] = metatype diff --git a/configs/config_default.json b/configs/config_default.json new file mode 100644 index 00000000..f85d0952 --- /dev/null +++ b/configs/config_default.json @@ -0,0 +1,10 @@ +{ + "name": "default", + "metatype": "config", + "LOG_ACCESS_FILE": "/tmp/pynance_access.log", + "LOG_ERROR_FILE": "/tmp/pynance_error.log", + "DATABASE_BACKEND": "tiny", + "DATABASE_URI": "/tmp", + "DATABASE_NAME": "testdata.json", + "IBAN": "DE89370400440532013000" +} diff --git a/configs/parser_default.json b/configs/parser_default.json new file mode 100644 index 00000000..c8b67aa7 --- /dev/null +++ b/configs/parser_default.json @@ -0,0 +1,11 @@ +[ + { + "name": "Mandatsreferenz", + "metatype": "parser", + "regex": "Mandatsref\\\\:\\\\s?([A-z0-9]*)" + },{ + "name": "Gl\\u00e4ubiger-ID", + "metatype": "parser", + "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" + } +] \ No newline at end of file diff --git a/configs/rule_default.json b/configs/rule_default.json new file mode 100644 index 00000000..a7b11cb5 --- /dev/null +++ b/configs/rule_default.json @@ -0,0 +1,20 @@ +[ + { + "name": "Supermarkets", + "metatype": "rule", + "primary": "Lebenserhaltungskosten", + "secondary": "Lebensmittel", + "regex": "(EDEKA|Wucherpfennig|Penny|Aldi|Kaufland|netto)" + },{ + "name": "City Tax", + "metatype": "rule", + "primary": "Haus und Grund", + "secondary": "Stadtabgaben", + "parsed": { + "multi": "AND", + "query": { + "Gl\u00e4ubiger-ID": "DE7000100000077777" + } + } + } +] \ No newline at end of file diff --git a/handler/Tags.py b/handler/Tags.py index e605abaa..1b91ad28 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -386,14 +386,14 @@ def _load_parsers(self) -> dict: Der Key wird als Bezeichner für das Ergebnis verwendet. Jeder RegEx muss genau eine Gruppe matchen. """ - parsers = { - 'Mandatsreferenz': re.compile(r"Mandatsref\:\s?([A-z0-9]*)"), - 'Gläubiger-ID': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{11})"), - 'Gläubiger-ID-2': re.compile(r"([A-Z]{2}[0-9]{2}[0-9A-Z]{3}[0-9]{19})"), - } + raw_parser = self.db_handler.filter_metadata({"metatype":"parser"}) + parsers = {} + for p in raw_parser: + parsers[p.get('name')] = re.compile(p.get('regex')) + return parsers - def _load_ruleset(self, rule_name=None, namespace='both'): + def _load_ruleset(self, rule_name=None): """ Load Rules from the Settings of for the requesting User. @@ -408,45 +408,21 @@ def _load_ruleset(self, rule_name=None, namespace='both'): Returns: list(dict): Liste von Filterregeln """ - #TODO: Fake Funktion - system_rules = { - 'Supermarkets': { - 'primary': 'Lebenserhaltungskosten', - 'secondary': 'Lebensmittel', - 'regex': r"(EDEKA|Wucherpfennig|Penny|Aldi|Kaufland|netto)", - }, - } - user_rules = { - 'City Tax': { - 'primary': 'Haus und Grund', - 'secondary': 'Stadtabgaben', - 'parsed': { - 'Gläubiger-ID': r'DE7000100000077777' - }, - } - } - if rule_name: - # Bestimmte Regel laden - if namespace in ['system', 'both']: - # Allgemein - rule = system_rules.get(rule_name) - if namespace == 'both': - # oder speziell (falls vorhanden) - rule = user_rules.get(rule_name, rule) - if namespace == 'user': - # Nur User - rule = user_rules.get(rule_name) - - return {rule_name: rule} - - # Alle Regeln einzelner namespaces - if namespace == 'system': - return system_rules - if namespace == 'user': - return user_rules - - # Alle Regeln aller namespaces - system_rules.update(user_rules) - return system_rules + raw_rule = self.db_handler.filter_metadata( + {"metatype":"rule", "name": rule_name}, + multi='AND' + ) + rule = raw_rule[0] + rule['regex'] = re.compile(rule['regex']) + return {rule_name: raw_rule} + + # Alle Regeln laden + raw_rules = self.db_handler.filter_metadata({"metatype":"rule"}) + rules = {} + for r in raw_rules: + r['regex'] = re.compile(r.get('regex')) + rules[r.get('name')] = r + + return rules From 35018dbf237dd55c1221af0e6a34c57eefd19d72 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Sat, 5 Apr 2025 21:22:04 +0200 Subject: [PATCH 08/17] Metadatenspeicher vorbereitet. In Tags noch laden ; Test noch offen --- app/server.py | 10 +- configs/config_default.json | 10 -- ...arser_default.json => parser.default.json} | 0 .../{rule_default.json => rule.default.json} | 0 handler/BaseDb.py | 140 +++++++++++++----- handler/MongoDb.py | 24 ++- handler/TinyDb.py | 24 ++- 7 files changed, 136 insertions(+), 72 deletions(-) delete mode 100644 configs/config_default.json rename configs/{parser_default.json => parser.default.json} (100%) rename configs/{rule_default.json => rule.default.json} (100%) diff --git a/app/server.py b/app/server.py index 45090603..c67367bb 100644 --- a/app/server.py +++ b/app/server.py @@ -3,7 +3,6 @@ import os import sys -import json from logging.config import dictConfig from flask import Flask @@ -42,7 +41,7 @@ def create_app(config_path: str) -> Flask: ) # Global Config - app.config.from_file(config_path, load=json.load) + app.config.from_pyfile(config_path) if app.config.get('DATABASE_BACKEND') is None: raise IOError(f"Config Pfad '{config_path}' konnte nicht geladen werden !") @@ -53,11 +52,8 @@ def create_app(config_path: str) -> Flask: if __name__ == '__main__': config = os.path.join( - os.path.dirname( - os.path.dirname( - os.path.abspath(__file__) - ) - ), 'configs', 'config_default.json' + os.path.dirname(os.path.abspath(__file__)), + 'config.py' ) application = create_app(config) application.run(host='0.0.0.0', port=8110, debug=True) diff --git a/configs/config_default.json b/configs/config_default.json deleted file mode 100644 index f85d0952..00000000 --- a/configs/config_default.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "default", - "metatype": "config", - "LOG_ACCESS_FILE": "/tmp/pynance_access.log", - "LOG_ERROR_FILE": "/tmp/pynance_error.log", - "DATABASE_BACKEND": "tiny", - "DATABASE_URI": "/tmp", - "DATABASE_NAME": "testdata.json", - "IBAN": "DE89370400440532013000" -} diff --git a/configs/parser_default.json b/configs/parser.default.json similarity index 100% rename from configs/parser_default.json rename to configs/parser.default.json diff --git a/configs/rule_default.json b/configs/rule.default.json similarity index 100% rename from configs/rule_default.json rename to configs/rule.default.json diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 8fea7897..8e3e9487 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -3,16 +3,23 @@ import hashlib import re +import os +import logging +import glob +import json class BaseDb(): """Basisklasse für die Vererbung an Datenbankhandler mit allgemeinen Funktionen""" + def __init__(self): + self.create() + self._import_metadata() def create(self): """Erstellen des Datenbankspeichers""" raise NotImplementedError() - def select(self, collection, condition, multi): + def select(self, collection: str, condition: dict|list[dict], multi: str): """ Selektiert Datensätze aus der Datenbank, die die angegebene Bedingung erfüllen. @@ -34,7 +41,7 @@ def select(self, collection, condition, multi): """ raise NotImplementedError() - def insert(self, data, collection): + def insert(self, data: dict|list[dict], collection: str): """ Fügt einen oder mehrere Datensätze in die Datenbank ein. @@ -48,7 +55,7 @@ def insert(self, data, collection): """ raise NotImplementedError() - def update(self, data, collection, condition, multi): + def update(self, data: dict, collection: str, condition: dict|list[dict], multi:str): """ Aktualisiert Datensätze in der Datenbank, die die angegebene Bedingung erfüllen. @@ -71,7 +78,7 @@ def update(self, data, collection, condition, multi): """ raise NotImplementedError() - def delete(self, collection, condition): + def delete(self, collection: str, condition: dict | list[dict]): """ Löscht Datensätze in der Datenbank, die die angegebene Bedingung erfüllen. @@ -93,7 +100,7 @@ def delete(self, collection, condition): """ raise NotImplementedError() - def truncate(self, collection): + def truncate(self, collection: str): """Löscht alle Datensätze aus einer Tabelle/Collection Args: @@ -105,7 +112,44 @@ def truncate(self, collection): """ raise NotImplementedError() - def _generate_unique(self, tx_entries): + def get_metadata(self, uuid: str): + """ + Ruft Metadaten aus der Datenbank ab. + + Args: + uuid (str): Unique ID (key). + Returns: + dict: Die abgerufenen Metadaten. + """ + raise NotImplementedError() + + def filter_metadata(self, condition: dict, multi: str): + """ + Ruft Metadaten aus der Datenbank anhand von Kriterien ab. + + Args: + condition (dict): key-value-Paare für die Filterung der Metadaten. + multi (str) : ['AND' | 'OR'] Wenn 'condition' eine Liste mit conditions ist, + werden diese logisch wie hier angegeben verknüpft. Default: 'AND' + Returns: + dict: Die abgerufenen Metadaten. + """ + raise NotImplementedError() + + def set_metadata(self, entry: dict, overwrite: bool=True): + """ + Speichert oder ersetzt Metadaten in der Datenbank. + + Args: + entry (dict): Der Eintrag, der gespeichert werden soll. + overwrite (bool): Overwrite existing metadata with same uuid + if present (default: True) + Returns: + dict: Informationen über den Speichervorgang. + """ + raise NotImplementedError() + + def _generate_unique(self, tx_entries: dict | list[dict]): """ Erstellt einen einmaligen ID für jede Transaktion. @@ -142,7 +186,7 @@ def _generate_unique(self, tx_entries): return tx_list - def _generate_unique_meta(self, entry): + def _generate_unique_meta(self, entry: dict): """ Generiert eine eindeutige UUID für Metadaten basierend auf dem Eintrag. Args: @@ -163,37 +207,51 @@ def _generate_unique_meta(self, entry): return entry - def get_metadata(self, uuid): - """ - Ruft Metadaten aus der Datenbank ab. - - Args: - uuid (str): Unique ID (key). - Returns: - dict: Die abgerufenen Metadaten. - """ - raise NotImplementedError() - - def filter_metadata(self, condition, multi): - """ - Ruft Metadaten aus der Datenbank anhand von Kriterien ab. - - Args: - condition (dict): key-value-Paare für die Filterung der Metadaten. - multi (str) : ['AND' | 'OR'] Wenn 'condition' eine Liste mit conditions ist, - werden diese logisch wie hier angegeben verknüpft. Default: 'AND' - Returns: - dict: Die abgerufenen Metadaten. - """ - raise NotImplementedError() - - def set_metadata(self, entry): - """ - Speichert oder ersetzt Metadaten in der Datenbank. - - Args: - entry (dict): Der Eintrag, der gespeichert werden soll. - Returns: - dict: Informationen über den Speichervorgang. - """ - raise NotImplementedError() + def _import_metadata(self): + """Load content from json configs + (config, rules, parsers) into DB""" + config_path = os.path.join( + os.path.dirname( + os.path.dirname( + os.path.abspath(__file__) + ) + ), 'configs' + ) + + # Load given rules & parsers (do not overwrite) + for metatype in ['config', 'parser', 'rule']: + json_glob = os.path.join(config_path, f'{metatype}.*.json') + json_files = glob.glob(json_glob) + + # Load from found metadata files + for json_file in json_files: + + if not os.path.isfile(json_file): + # dead link + continue + + # Parse JSON + logging.info(f"Loading {metatype} from {json_file}") + with open(json_file, 'r', encoding='utf-8') as j: + try: + parsed_data = json.load(j) + + except json.JSONDecodeError as e: + logging.warning(f"Failed to parse JSON file: {e}") + + # Add metadata type and format as list + if isinstance(parsed_data, list): + + for i, _ in enumerate(parsed_data): + parsed_data[i]['metatype'] = metatype + + else: + parsed_data['metatype'] = metatype + parsed_data = [parsed_data] + + # Store in DB (do not overwrite) + inserted = 0 + for data in parsed_data: + inserted += self.set_metadata(data, overwrite=False).get('inserted') + + logging.info(f"Stored {inserted} {metatype} from {json_file}") diff --git a/handler/MongoDb.py b/handler/MongoDb.py index 45df1b45..cb2ef5ff 100644 --- a/handler/MongoDb.py +++ b/handler/MongoDb.py @@ -22,7 +22,8 @@ def __init__(self): self.connection = self.client[current_app.config['DATABASE_NAME']] if self.connection is None: raise IOError(f"Store {current_app.config['DATABASE_NAME']} not found !") - self.create() + + super().__init__() def create(self): """ @@ -192,18 +193,27 @@ def filter_metadata(self, condition, multi='AND'): query = self._form_complete_query(condition, multi) return list(collection.find(query)) - def set_metadata(self, entry): + def set_metadata(self, entry, overwrite=True): # Set uuid if not present if not entry.get('uuid'): entry = self._generate_unique_meta(entry) - # Remove Entry if exists collection = self.connection['metadata'] - result = collection.delete_one({'uuid': entry.get('uuid')}) - # Insert new Entry - result = collection.insert_one(entry) - return {'inserted': result.modified_count} + if overwrite: + # Remove Entry if exists + result = collection.delete_one({'uuid': entry.get('uuid')}) + + # Insert new Entry + result = collection.insert_one(entry) + return {'inserted': result.modified_count} + + # Only insert if not exists + if not collection.find({'uuid': entry.get('uuid')}): + result = collection.insert_one(entry) + return {'inserted': result.modified_count} + + return {'inserted': 0} def _form_condition(self, condition): """ diff --git a/handler/TinyDb.py b/handler/TinyDb.py index 88eaeceb..58a3411f 100644 --- a/handler/TinyDb.py +++ b/handler/TinyDb.py @@ -31,7 +31,7 @@ def __init__(self): except IOError as ex: logging.error(f"Fehler beim Verbindungsaufbau zur Datenbank: {ex}") - self.create() + super().__init__() def create(self): """ @@ -222,18 +222,28 @@ def filter_metadata(self, condition, multi='AND'): results = collection.search(query) return results - def set_metadata(self, entry): + def set_metadata(self, entry, overwrite=True): # Set uuid if not present if not entry.get('uuid'): entry = self._generate_unique_meta(entry) - # Remove Entry if exists collection = self.connection.table('metadata') - collection.remove(Query().uuid == entry.get('uuid')) - # Insert new Entry - result = collection.insert(entry) - return {'inserted': (1 if result else 0)} + if overwrite: + # Remove Entry if exists + collection.remove(Query().uuid == entry.get('uuid')) + + # Insert new Entry + result = collection.insert(entry) + return {'inserted': (1 if result else 0)} + + # Only insert if not exists + if not collection.search(Query().uuid == entry.get('uuid')): + result = collection.insert(entry) + return {'inserted': (1 if result else 0)} + + return {'inserted': 0} + def _form_where(self, condition): """ From a86c12ee06f3e1aac1c6babc765ebcade420205e Mon Sep 17 00:00:00 2001 From: Pitastic Date: Sat, 5 Apr 2025 22:50:12 +0200 Subject: [PATCH 09/17] =?UTF-8?q?offen:=20Mockdb=20Tests=20anpassen=20und?= =?UTF-8?q?=20eigene=20f=C3=BCr=20Metadata=20erstellen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/config.py | 15 ++++++++ handler/BaseDb.py | 2 +- handler/Tags.py | 43 ++++++++++++++++------- handler/TinyDb.py | 1 - {configs => settings}/parser.default.json | 4 +-- {configs => settings}/rule.default.json | 0 tests/helper.py | 14 ++++++++ 7 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 app/config.py rename {configs => settings}/parser.default.json (68%) rename {configs => settings}/rule.default.json (100%) diff --git a/app/config.py b/app/config.py new file mode 100644 index 00000000..03c406e8 --- /dev/null +++ b/app/config.py @@ -0,0 +1,15 @@ +LOG_ACCESS_FILE = '/tmp/pynance_access.log' +LOG_ERROR_FILE = '/tmp/pynance_error.log' + +# Options: +DATABASE_BACKEND = 'tiny' +#DATABASE_BACKEND = 'mongo' + +#DATABASE_URI = 'mongodb://testuser:testpassword@localhost:27017' # For mongo (URI) +DATABASE_URI = '/tmp' # For tiny (/path/to/) + +# For tiny: Filename ('testdata.json') +# For mongo: Collection name ('testdata') +DATABASE_NAME = 'testdata.json' + +IBAN = 'DE89370400440532013000' diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 8e3e9487..33e3a3c5 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -215,7 +215,7 @@ def _import_metadata(self): os.path.dirname( os.path.abspath(__file__) ) - ), 'configs' + ), 'settings' ) # Load given rules & parsers (do not overwrite) diff --git a/handler/Tags.py b/handler/Tags.py index 1b91ad28..5da14e4f 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -29,10 +29,10 @@ def parse(self, input_data): # RegExes # Der Key wird als Bezeichner für das Ergebnis verwendet. # Jeder RegEx muss genau eine Gruppe matchen. - parse_regexes = self._load_parsers() + parses = self._load_parsers() for d in input_data: - for name, regex in parse_regexes.items(): + for name, regex in parses.items(): re_match = regex.search(d['text_tx']) if re_match: d['parsed'][name] = re_match.group(1) @@ -177,9 +177,12 @@ def tag_regex(self, ruleset: dict, collection: str=None, prio: int=1, }) # -- Add Parsed Values + multi = 'AND' if rule.get('parsed') is not None: parsed_condition = rule.get('parsed') - for key, val in parsed_condition.items(): + multi = parsed_condition.get('multi', 'AND') + + for key, val in parsed_condition.get('query', {}).items(): rule_args['condition'].append({ 'key': {'parsed': key}, 'value': val, @@ -191,7 +194,7 @@ def tag_regex(self, ruleset: dict, collection: str=None, prio: int=1, matched = self.db_handler.select( collection=rule_args.get('collection'), condition=rule_args.get('condition'), - multi=rule_args.get('multi') + multi=multi ) # Nothing to update @@ -386,14 +389,16 @@ def _load_parsers(self) -> dict: Der Key wird als Bezeichner für das Ergebnis verwendet. Jeder RegEx muss genau eine Gruppe matchen. """ - raw_parser = self.db_handler.filter_metadata({"metatype":"parser"}) + raw_parser = self.db_handler.filter_metadata( + {"key": "metatype", "value": "parser"} + ) parsers = {} for p in raw_parser: - parsers[p.get('name')] = re.compile(p.get('regex')) + parsers[p['name']] = re.compile(p.get('regex')) return parsers - def _load_ruleset(self, rule_name=None): + def _load_ruleset(self, rule_name=None) -> dict|list[dict]: """ Load Rules from the Settings of for the requesting User. @@ -406,23 +411,35 @@ def _load_ruleset(self, rule_name=None): - user: nur private Regeln - both (default): alle Regeln Returns: - list(dict): Liste von Filterregeln + dict|list(dict): Liste von Filterregeln """ if rule_name: # Bestimmte Regel laden raw_rule = self.db_handler.filter_metadata( - {"metatype":"rule", "name": rule_name}, + [ + {"key": "metatype", "value": "rule"}, + {"key": "name", "value": rule_name} + ], multi='AND' ) rule = raw_rule[0] - rule['regex'] = re.compile(rule['regex']) - return {rule_name: raw_rule} + regex = rule.get('regex') + if regex: + rule['regex'] = re.compile(regex) + + return {rule_name: rule} # Alle Regeln laden - raw_rules = self.db_handler.filter_metadata({"metatype":"rule"}) + raw_rules = self.db_handler.filter_metadata( + {"key": "metatype", "value": "rule"} + ) rules = {} for r in raw_rules: - r['regex'] = re.compile(r.get('regex')) + regex = r.get('regex') + + if regex: + r['regex'] = re.compile(regex) + rules[r.get('name')] = r return rules diff --git a/handler/TinyDb.py b/handler/TinyDb.py index 58a3411f..d56575a8 100644 --- a/handler/TinyDb.py +++ b/handler/TinyDb.py @@ -244,7 +244,6 @@ def set_metadata(self, entry, overwrite=True): return {'inserted': 0} - def _form_where(self, condition): """ Erstellt aus einem Condition-Dict eine entsprechende Query diff --git a/configs/parser.default.json b/settings/parser.default.json similarity index 68% rename from configs/parser.default.json rename to settings/parser.default.json index c8b67aa7..9bafadfa 100644 --- a/configs/parser.default.json +++ b/settings/parser.default.json @@ -2,9 +2,9 @@ { "name": "Mandatsreferenz", "metatype": "parser", - "regex": "Mandatsref\\\\:\\\\s?([A-z0-9]*)" + "regex": "Mandatsref\\:\\s?([A-z0-9]*)" },{ - "name": "Gl\\u00e4ubiger-ID", + "name": "Gläubiger-ID", "metatype": "parser", "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" } diff --git a/configs/rule.default.json b/settings/rule.default.json similarity index 100% rename from configs/rule.default.json rename to settings/rule.default.json diff --git a/tests/helper.py b/tests/helper.py index d12876e0..a504dbac 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -249,3 +249,17 @@ def update(self, data, collection=None, condition=None, multi=None): # pylint: d return {'updated': 1} return {'updated': 0} + + def filter_metadata(self, *args, **kwargs): + # [ + # { + # "name": "Mandatsreferenz", + # "metatype": "parser", + # "regex": "Mandatsref\\:\\s?([A-z0-9]*)" + # },{ + # "name": "Gläubiger-ID", + # "metatype": "parser", + # "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" + # } + #] + return {} From d2cbdf3286bc4bbcbecd41fbb50c176397338053 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Mon, 7 Apr 2025 21:10:49 +0200 Subject: [PATCH 10/17] Fix all Pytests --- handler/Tags.py | 4 ++-- tests/helper.py | 34 ++++++++++++++++++++------------- tests/test_unit_handler_Tags.py | 28 ++++++++++++++++----------- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/handler/Tags.py b/handler/Tags.py index 5da14e4f..37bb58a5 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -398,7 +398,7 @@ def _load_parsers(self) -> dict: return parsers - def _load_ruleset(self, rule_name=None) -> dict|list[dict]: + def _load_ruleset(self, rule_name=None) -> dict: """ Load Rules from the Settings of for the requesting User. @@ -411,7 +411,7 @@ def _load_ruleset(self, rule_name=None) -> dict|list[dict]: - user: nur private Regeln - both (default): alle Regeln Returns: - dict|list(dict): Liste von Filterregeln + dict: Verzeichnis nach Namen der Filterregeln """ if rule_name: # Bestimmte Regel laden diff --git a/tests/helper.py b/tests/helper.py index a504dbac..9c252c88 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -250,16 +250,24 @@ def update(self, data, collection=None, condition=None, multi=None): # pylint: d return {'updated': 0} - def filter_metadata(self, *args, **kwargs): - # [ - # { - # "name": "Mandatsreferenz", - # "metatype": "parser", - # "regex": "Mandatsref\\:\\s?([A-z0-9]*)" - # },{ - # "name": "Gläubiger-ID", - # "metatype": "parser", - # "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" - # } - #] - return {} + def filter_metadata(self, condition, *args, **kwargs): # pylint: disable=unused-argument + """Mock der Filtermetadatenabfrage + Args: + condition (dict): Filterkriterien + *args, **kwargs: Weitere Argumente + Returns: + list: Liste der Metadaten + """ + if condition == {"key": "metatype", "value": "parser"}: + return [ + { + "name": "Mandatsreferenz", + "metatype": "parser", + "regex": "Mandatsref\\:\\s?([A-z0-9]*)" + },{ + "name": "Gläubiger-ID", + "metatype": "parser", + "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" + } + ] + return [] diff --git a/tests/test_unit_handler_Tags.py b/tests/test_unit_handler_Tags.py index 95c977e3..14267d56 100644 --- a/tests/test_unit_handler_Tags.py +++ b/tests/test_unit_handler_Tags.py @@ -19,21 +19,27 @@ # Test Tagging-Ruleset hinterlegen RULESET = { - 'Supermarkets': { - 'primary': 'Lebenserhaltungskosten', - 'secondary': 'Lebensmittel', - 'regex': r"(EDEKA|Wucherpfennig|Penny|Aldi|Kaufland|netto)", + "Supermarkets" : { + "name": "Supermarkets", + "metatype": "rule", + "primary": "Lebenserhaltungskosten", + "secondary": "Lebensmittel", + "regex": r"(EDEKA|Wucherpfennig|Penny|Aldi|Kaufland|netto)" }, - 'City Tax': { - 'primary': 'Haus und Grund', - 'secondary': 'Stadtabgaben', - 'parsed': { - 'Gläubiger-ID': r'DE7000100000077777' - }, + "City Tax": { + "name": "City Tax", + "metatype": "rule", + "primary": "Haus und Grund", + "secondary": "Stadtabgaben", + "parsed": { + "multi": "AND", + "query": { + 'Gläubiger-ID': r'DE7000100000077777' + } + } } } - def test_parsing_regex(test_app): """Testet das Parsen der Datensätze mit den fest hinterlegten RegExes""" with test_app.app_context(): From d319e4af14f7098b073577a92c449dbab92830f2 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Mon, 7 Apr 2025 21:37:26 +0200 Subject: [PATCH 11/17] JSON Settings Struktur angepasst --- handler/BaseDb.py | 7 +++++-- requirements.txt | 3 ++- settings/config/.gitkeep | 0 settings/parser.default.json | 11 ----------- settings/parser/00-default.json | 12 ++++++++++++ .../{rule.default.json => rules/00-default.json} | 4 ++-- tests/requirements.txt | 3 ++- 7 files changed, 23 insertions(+), 17 deletions(-) create mode 100644 settings/config/.gitkeep delete mode 100644 settings/parser.default.json create mode 100644 settings/parser/00-default.json rename settings/{rule.default.json => rules/00-default.json} (87%) diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 33e3a3c5..a4bc1904 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -7,6 +7,7 @@ import logging import glob import json +from natsort import natsorted class BaseDb(): @@ -210,7 +211,7 @@ def _generate_unique_meta(self, entry: dict): def _import_metadata(self): """Load content from json configs (config, rules, parsers) into DB""" - config_path = os.path.join( + settings_path = os.path.join( os.path.dirname( os.path.dirname( os.path.abspath(__file__) @@ -220,8 +221,10 @@ def _import_metadata(self): # Load given rules & parsers (do not overwrite) for metatype in ['config', 'parser', 'rule']: - json_glob = os.path.join(config_path, f'{metatype}.*.json') + json_path = os.path.join(settings_path, metatype) + json_glob = os.path.join(json_path, '*.json') json_files = glob.glob(json_glob) + json_files = natsorted(json_files) # Load from found metadata files for json_file in json_files: diff --git a/requirements.txt b/requirements.txt index f6ed88d9..2eb57e8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ tinydb==4.7.1 pymongo==4.3.3 -flask==3.1.0 \ No newline at end of file +flask==3.1.0 +natsort==8.4.0 \ No newline at end of file diff --git a/settings/config/.gitkeep b/settings/config/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/settings/parser.default.json b/settings/parser.default.json deleted file mode 100644 index 9bafadfa..00000000 --- a/settings/parser.default.json +++ /dev/null @@ -1,11 +0,0 @@ -[ - { - "name": "Mandatsreferenz", - "metatype": "parser", - "regex": "Mandatsref\\:\\s?([A-z0-9]*)" - },{ - "name": "Gläubiger-ID", - "metatype": "parser", - "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" - } -] \ No newline at end of file diff --git a/settings/parser/00-default.json b/settings/parser/00-default.json new file mode 100644 index 00000000..92545c55 --- /dev/null +++ b/settings/parser/00-default.json @@ -0,0 +1,12 @@ +[ + { + "name": "Mandatsreferenz", + "metatype": "parser", + "regex": "Mandatsref\\:\\s?([A-z0-9]*)" + }, + { + "name": "Gläubiger-ID", + "metatype": "parser", + "regex": "([A-Z]{2}[0-9]{2}[0-9A-Z]{3}(?:[0-9]{11}|[0-9]{19}))" + } +] \ No newline at end of file diff --git a/settings/rule.default.json b/settings/rules/00-default.json similarity index 87% rename from settings/rule.default.json rename to settings/rules/00-default.json index a7b11cb5..234d338a 100644 --- a/settings/rule.default.json +++ b/settings/rules/00-default.json @@ -5,7 +5,7 @@ "primary": "Lebenserhaltungskosten", "secondary": "Lebensmittel", "regex": "(EDEKA|Wucherpfennig|Penny|Aldi|Kaufland|netto)" - },{ + }, { "name": "City Tax", "metatype": "rule", "primary": "Haus und Grund", @@ -13,7 +13,7 @@ "parsed": { "multi": "AND", "query": { - "Gl\u00e4ubiger-ID": "DE7000100000077777" + "Gläubiger-ID": "DE7000100000077777" } } } diff --git a/tests/requirements.txt b/tests/requirements.txt index 42bbf9dc..206fc69c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,4 +1,5 @@ pytest==7.4.0 requests==2.31.0 beautifulsoup4==4.12.2 -requests_mock==1.12.1 \ No newline at end of file +requests_mock==1.12.1 +natsort==8.4.0 \ No newline at end of file From a9f7e11f1a7a47df779419007d2db289dd822830 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Tue, 8 Apr 2025 21:46:37 +0200 Subject: [PATCH 12/17] fix folder naming --- settings/{rules => rule}/00-default.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename settings/{rules => rule}/00-default.json (100%) diff --git a/settings/rules/00-default.json b/settings/rule/00-default.json similarity index 100% rename from settings/rules/00-default.json rename to settings/rule/00-default.json From 72969da74f91cef28af1e092e4f353aaec533eca Mon Sep 17 00:00:00 2001 From: Pitastic Date: Tue, 8 Apr 2025 22:03:15 +0200 Subject: [PATCH 13/17] Metadata test eingebaut --- handler/BaseDb.py | 2 +- tests/test_unit_handler_DB.py | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/handler/BaseDb.py b/handler/BaseDb.py index a4bc1904..1b3ee0cf 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -133,7 +133,7 @@ def filter_metadata(self, condition: dict, multi: str): multi (str) : ['AND' | 'OR'] Wenn 'condition' eine Liste mit conditions ist, werden diese logisch wie hier angegeben verknüpft. Default: 'AND' Returns: - dict: Die abgerufenen Metadaten. + list: Die abgerufenen Metadaten. """ raise NotImplementedError() diff --git a/tests/test_unit_handler_DB.py b/tests/test_unit_handler_DB.py index 3f61ecd0..724c0955 100644 --- a/tests/test_unit_handler_DB.py +++ b/tests/test_unit_handler_DB.py @@ -243,3 +243,46 @@ def test_delete(test_app): delete_many = deleted_db.get('deleted') assert delete_many == 4, \ f'Es wurde nicht die richtige Anzahl an Datensätzen gelöscht: {delete_many}' + + +def test_set_metadata(test_app): + """Testet das Setzen von Metadaten""" + with test_app.app_context(): + # Metadaten setzen + metadata = { + "uuid": "1234567890", + "name": "Wild Regex", + "metatype": "test", + "regex": "Mandatsref\\:\\s?([A-z0-9]*)" + } + set_metadata = test_app.host.db_handler.set_metadata(metadata) + assert set_metadata.get('inserted') == 1, "Die Metadaten konnten nicht gesetzt werden" + + # Overwrite with the same entry + set_metadata = test_app.host.db_handler.set_metadata(metadata) + assert set_metadata.get('inserted') == 1, "Die Metadaten wurde nicht überschrieben" + + # Do not overwrite equal uuids + set_metadata = test_app.host.db_handler.set_metadata(metadata, overwrite=False) + assert set_metadata.get('inserted') == 0, "Die Metadaten wurden überschrieben" + + +def test_get_metadata(test_app): + """Testet das Auslesen eines bestimmten Metadatums""" + with test_app.app_context(): + # Metadaten abfragen + metadata = test_app.host.db_handler.get_metadata(uuid='1234567890') + assert metadata is not None, "Es wurden keine Metadaten zurückgegeben" + assert isinstance(metadata, dict), "Metadaten sind keine LisDictte" + assert metadata.get('uuid') == '1234567890', "Es wurden der falsche Eintrag geladen" + + +def test_filter_metadata(test_app): + """Testet das Filtern von Metadaten""" + with test_app.app_context(): + # Metadaten abfragen + metadata = test_app.host.db_handler.filter_metadata({'key': 'name', 'value': 'Wild Regex'}) + assert metadata is not None, "Es wurden keine Metadaten zurückgegeben" + assert isinstance(metadata, list), "Metadaten sind keine Liste" + assert len(metadata) == 1, "Es wurden nicht die erwarteten Metadaten zurückgegeben" + assert metadata[0].get('uuid') == '1234567890', "Es wurden der falsche Eintrag geladen" From 818568f002ccb542453abb532db3a0878efc00c1 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Tue, 8 Apr 2025 23:17:34 +0200 Subject: [PATCH 14/17] try to fix routes: getTx catches getMeta --- app/ui.py | 78 ++++++++++++++++++++++++------- handler/BaseDb.py | 44 ++++++++++++++++- tests/test_integ_app_protected.py | 5 -- tests/test_integ_basics.py | 56 ++++++++++++++++++++++ 4 files changed, 159 insertions(+), 24 deletions(-) diff --git a/app/ui.py b/app/ui.py index 3c69bd0f..45e501dd 100644 --- a/app/ui.py +++ b/app/ui.py @@ -129,6 +129,67 @@ def index(iban) -> str: # - API Endpoints - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + @current_app.route('/api/saveMeta/', defaults={'rule_type':'rule'}, methods=['POST']) + @current_app.route('/api/saveMeta/', methods=['POST']) + def saveMeta(rule_type): + """ + Einfügen oder updaten von Metadaten in der Datenbank. + Args (json / file): + rule_type, str: Typ der Regel (rule | parser) + rule, dict: Regel-Objekt + """ + input_file = request.files.get('input_file') + if not input_file and not request.json: + return {'error': 'No file or json provided'}, 400 + + if input_file: + # Store Upload file to tmp + path = '/tmp/metadata.tmp' + _ = self._mv_fileupload(input_file, path) + r = self.db_handler.import_metadata(path=path, metatype=rule_type) + + else: + entry = request.json + entry['metatype'] = rule_type + r = self.db_handler.set_metadata(entry, overwrite=True) + + if not r.get('inserted'): + return {'error': 'No data inserted', 'reason': r.get('error')}, 400 + + return r, 201 + + @current_app.route('/api/getMeta/', methods=['GET'], + defaults={'rule_type':'-', 'uuid': None}) + @current_app.route('/api/getMeta//uuid', methods=['GET']) + def getMeta(rule_type, uuid): + """ + Auflisten von Metadaten (optional gefilter) + Args (json): + rule_type, str: Typ der Regel (rule | parser | config) + uuid, str: ID des Metadatums + """ + if uuid is not None: + # Select specific Meta + meta = self.db_handler.select( + rule_type, { + 'key': 'uuid', + 'value': uuid + } + ) + return meta[0], 200 + + if rule_type != '-': + # Select specific Meta Type + meta = self.db_handler.select({ + 'key': 'metatype', + 'value': rule_type}) + return meta, 200 + + # Select all Meta + meta = self.db_handler.select(collection='metadata', condition=None) + return meta, 200 + @current_app.route('/api/upload/', methods=['POST']) def uploadIban(iban): """ @@ -281,23 +342,6 @@ def setManualTags(iban): return updated_entries - @current_app.route('/api/saveRule/', defaults={'rule_type':'rule'}, methods=['POST']) - @current_app.route('/api/saveRule/', methods=['POST']) - def saveRule(rule_type): - """ - Einfügen oder updaten einer Regel in der Datenbank. - Args (json): - rule_type, str: Typ der Regel (rule | parser) - rule, dict: Regel-Objekt - - name, str: Name der Regel - - rule, dict: Regel-Objekt - """ - #TODO: Beide Arten in einer DB speichern, anhand eines Key aber unterscheiden. - # - Es müssen für alles mit metadata noch tests geschrieben werden. - # - Bisher ist nur der Import via FileUpload möglich - # - Das speichern eines Regexes in JSON ist noch problematisch - # - Laden, Filtern und Auflisten testen - raise NotImplementedError() def _set_manual_tag(self, iban, t_id, data): """ diff --git a/handler/BaseDb.py b/handler/BaseDb.py index 1b3ee0cf..be5ab1fe 100644 --- a/handler/BaseDb.py +++ b/handler/BaseDb.py @@ -14,7 +14,7 @@ class BaseDb(): """Basisklasse für die Vererbung an Datenbankhandler mit allgemeinen Funktionen""" def __init__(self): self.create() - self._import_metadata() + self._load_metadata() def create(self): """Erstellen des Datenbankspeichers""" @@ -208,7 +208,7 @@ def _generate_unique_meta(self, entry: dict): return entry - def _import_metadata(self): + def _load_metadata(self): """Load content from json configs (config, rules, parsers) into DB""" settings_path = os.path.join( @@ -258,3 +258,43 @@ def _import_metadata(self): inserted += self.set_metadata(data, overwrite=False).get('inserted') logging.info(f"Stored {inserted} {metatype} from {json_file}") + + def import_metadata(self, path: str=None, metatype: str='rule'): + """Import metadata from given path + + Args: + path (str): Path to the metadata json file + metatype (str): Type of metadata (default: 'rule') + """ + # Check if path exists + if not os.path.exists(path): + logging.error(f"Path {path} does not exist") + return + + # Parse JSON + with open(path, 'r', encoding='utf-8') as j: + try: + parsed_data = json.load(j) + + except json.JSONDecodeError as e: + error_msg = f"Failed to parse JSON file: {e}" + logging.warning(error_msg) + return {'error': error_msg} + + # Add metadata type and format as list + if isinstance(parsed_data, list): + + for i, _ in enumerate(parsed_data): + parsed_data[i]['metatype'] = metatype + + else: + parsed_data['metatype'] = metatype + parsed_data = [parsed_data] + + # Store in DB (do not overwrite) + inserted = 0 + for data in parsed_data: + inserted += self.set_metadata(data, overwrite=True).get('inserted') + + logging.info(f"Stored {inserted} imported metadata from {path}") + return {'inserted': inserted} diff --git a/tests/test_integ_app_protected.py b/tests/test_integ_app_protected.py index 56f21fb8..6a755e49 100644 --- a/tests/test_integ_app_protected.py +++ b/tests/test_integ_app_protected.py @@ -79,11 +79,6 @@ def test_create_user(): """Testet das Anlegen eines Users""" return -@pytest.mark.skip(reason="Currently not implemented yet") -def test_save_rule(): - """Testet das Speichern einer Regel (mehrfach) sowie das Update einer Regel""" - return - def test_load_ruleset_all(test_app): """Testet das Laden aller Regeln für den anfragenden Benutzer""" #TODO: User erkennen und für den Test setzen diff --git a/tests/test_integ_basics.py b/tests/test_integ_basics.py index ee24ee2a..d05bbd5c 100644 --- a/tests/test_integ_basics.py +++ b/tests/test_integ_basics.py @@ -165,6 +165,62 @@ def test_double_upload(test_app): assert len(rows) == 5, f"Es wurden zu viele Einträge ({len(rows)}) angelegt" +def test_save_meta(test_app): + """Testet das Speichern Metadaten""" + with test_app.app_context(): + + with test_app.test_client() as client: + + # Parser in MetadaDB schreiben + parameters = { + 'uuid': '1234567890', + 'name': 'Test Parsing 4 Digits', + 'regex': '[0-9]]{4}' + } + result = client.post("/api/saveMeta/parser", json=parameters) + assert result.status_code == 201, \ + "Der Statuscode war nicht wie erwartet" + + result = result.json + assert result.get('inserted') == 1, "Es wurde nichts eingefügt" + #TODO: Upload Rule with file + + +def test_list_meta(test_app): + """Testet das Speichern Metadaten""" + with test_app.app_context(): + + with test_app.test_client() as client: + + # Alle Einträge aus MetadatenDB holen + result = client.get("/api/getMeta") + result = result.json + assert isinstance(result, list), \ + "Die Antwort war keine Liste" + assert len(result) > 0, \ + "Die Liste war leer" + + # Alle Parser aus MetadatenDB holen + result = client.get("/api/getMeta/parser") + result = result.json + assert isinstance(result, list), \ + "Die Antwort war keine Liste" + assert len(result) > 0, \ + "Die Liste war leer" + + # Regel mit Namen aus der UserDB holen + result = client.get("/api/getMeta/-/1234567890") + result = result.json + assert isinstance(result, dict), \ + "Die Antwort war kein Dictionary" + assert result.get('name') == 'Test Parsing 4 Digits', \ + "Die Regel war nicht wie erwartet" + assert result.get('regex') == '[0-9]]{4}', \ + "Die Regel war nicht wie erwartet" + assert result.get('uuid') == '1234567890', \ + "Die Regel war nicht wie erwartet" + + def test_tag_stored(test_app): """Testet das Tagging, wenn es über den API Endpoint angesprochen wird""" with test_app.app_context(): From 2a5924c9b31ccb81e028cefab369c2b2516a3584 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Fri, 11 Apr 2025 22:40:31 +0200 Subject: [PATCH 15/17] =?UTF-8?q?Routing=20fix;=20Tests=20f=C3=BCr=20Metad?= =?UTF-8?q?aten;=20auch=20per=20file=20finish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/ui.py | 37 ++++++++++++++++--------------------- handler/TinyDb.py | 5 +++++ tests/conftest.py | 15 +++++++-------- tests/test_integ_basics.py | 30 ++++++++++++++++++++++++------ 4 files changed, 52 insertions(+), 35 deletions(-) diff --git a/app/ui.py b/app/ui.py index 45e501dd..30a35f79 100644 --- a/app/ui.py +++ b/app/ui.py @@ -160,34 +160,29 @@ def saveMeta(rule_type): return r, 201 @current_app.route('/api/getMeta/', methods=['GET'], - defaults={'rule_type':'-', 'uuid': None}) - @current_app.route('/api/getMeta//uuid', methods=['GET']) - def getMeta(rule_type, uuid): + defaults={'rule_filter':None}) + @current_app.route('/api/getMeta/', methods=['GET']) + def getMeta(rule_filter): """ Auflisten von Metadaten (optional gefilter) Args (json): - rule_type, str: Typ der Regel (rule | parser | config) - uuid, str: ID des Metadatums + rule_filter, str: Typ der Regel (rule | parser | config) oder ID """ - if uuid is not None: - # Select specific Meta - meta = self.db_handler.select( - rule_type, { - 'key': 'uuid', - 'value': uuid - } - ) - return meta[0], 200 - - if rule_type != '-': - # Select specific Meta Type - meta = self.db_handler.select({ - 'key': 'metatype', - 'value': rule_type}) + if rule_filter is not None: + + if rule_filter in ['rule', 'parser', 'config']: + # Select specific Meta Type + meta = self.db_handler.filter_metadata({ + 'key': 'metatype', + 'value': rule_filter}) + return meta, 200 + + # Select specific Meta ID + meta = self.db_handler.get_metadata(rule_filter) return meta, 200 # Select all Meta - meta = self.db_handler.select(collection='metadata', condition=None) + meta = self.db_handler.filter_metadata(condition=None) return meta, 200 @current_app.route('/api/upload/', methods=['POST']) diff --git a/handler/TinyDb.py b/handler/TinyDb.py index d56575a8..1f864efb 100644 --- a/handler/TinyDb.py +++ b/handler/TinyDb.py @@ -218,6 +218,11 @@ def get_metadata(self, uuid): def filter_metadata(self, condition, multi='AND'): collection = self.connection.table('metadata') + if condition is None: + # Return all + return collection.all() + + # Form condition into a query query = self._form_complete_query(condition, multi) results = collection.search(query) return results diff --git a/tests/conftest.py b/tests/conftest.py index 12d25cec..c6c9438d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,6 +2,7 @@ import os import sys +import shutil import pytest # Add Parent for importing from Modules @@ -16,8 +17,8 @@ def test_app(): """Managing Test-Flask-App for Tests""" # Creating App - #shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) - #os.makedirs("/tmp/pihomie-test", exist_ok=True) + shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) + os.makedirs("/tmp/pihomie-test", exist_ok=True) # Config root_path = os.path.dirname(os.path.realpath(__file__)) @@ -32,15 +33,14 @@ def test_app(): app.host.db_handler.truncate() yield app - #if os.path.isdir("/tmp/pihomie-test"): - # shutil.rmtree("/tmp/pihomie-test") + shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) @pytest.fixture(scope="module") def mocked_db(): """Special Instance with mocked DB""" # Creating App - #shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) - #os.makedirs("/tmp/pihomie-test", exist_ok=True) + shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) + os.makedirs("/tmp/pihomie-test", exist_ok=True) # Config root_path = os.path.dirname(os.path.realpath(__file__)) @@ -55,8 +55,7 @@ def mocked_db(): app.host.db_handler = MockDatabase() yield app - #if os.path.isdir("/tmp/pihomie-test"): - # shutil.rmtree("/tmp/pihomie-test") + shutil.rmtree("/tmp/pihomie-test", ignore_errors=True) def mock_method(*args, **kwargs): """Function for overwriting Method which return 'None'""" diff --git a/tests/test_integ_basics.py b/tests/test_integ_basics.py index d05bbd5c..918cde6d 100644 --- a/tests/test_integ_basics.py +++ b/tests/test_integ_basics.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 # pylint: disable=invalid-name """Basisc Module for easy Imports and Methods""" +import json import os import sys import io @@ -171,9 +172,9 @@ def test_save_meta(test_app): with test_app.test_client() as client: - # Parser in MetadaDB schreiben + # Parser in MetadaDB schreiben (form) parameters = { - 'uuid': '1234567890', + 'uuid': '555', 'name': 'Test Parsing 4 Digits', 'regex': '[0-9]]{4}' } @@ -183,7 +184,24 @@ def test_save_meta(test_app): result = result.json assert result.get('inserted') == 1, "Es wurde nichts eingefügt" - #TODO: Upload Rule with file + + # Parser in MetadaDB schreiben (file upload) + parameters = { + 'uuid': '0987654321', + 'name': 'By File', + 'regex': '[0-5]]{4}' + } + parameters = json.dumps(parameters).encode('utf-8') + files = {'input_file': (io.BytesIO(parameters), 'commerzbank.csv')} + result = client.post( + "/api/saveMeta/", + data=files, content_type='multipart/form-data' + ) + assert result.status_code == 201, \ + "Der Statuscode war nicht wie erwartet" + + result = result.json + assert result.get('inserted') == 1, "Es wurde nichts eingefügt" def test_list_meta(test_app): @@ -193,7 +211,7 @@ def test_list_meta(test_app): with test_app.test_client() as client: # Alle Einträge aus MetadatenDB holen - result = client.get("/api/getMeta") + result = client.get("/api/getMeta/") result = result.json assert isinstance(result, list), \ "Die Antwort war keine Liste" @@ -209,7 +227,7 @@ def test_list_meta(test_app): "Die Liste war leer" # Regel mit Namen aus der UserDB holen - result = client.get("/api/getMeta/-/1234567890") + result = client.get("/api/getMeta/555") result = result.json assert isinstance(result, dict), \ "Die Antwort war kein Dictionary" @@ -217,7 +235,7 @@ def test_list_meta(test_app): "Die Regel war nicht wie erwartet" assert result.get('regex') == '[0-9]]{4}', \ "Die Regel war nicht wie erwartet" - assert result.get('uuid') == '1234567890', \ + assert result.get('uuid') == '555', \ "Die Regel war nicht wie erwartet" From fdf48779191dc7c129a020719951d776f71d7b50 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Sat, 12 Apr 2025 23:29:17 +0200 Subject: [PATCH 16/17] New Dev INterace; Removoe Tagging; Metadata --- Models.md | 19 ++++++-- app/server.py | 2 +- app/static/js/index.js | 51 ++++++++++++++++++--- app/templates/index.html | 91 ++++++++++++++++++++++++-------------- app/ui.py | 86 ++++++++++++++++++++++++++++++++--- handler/Tags.py | 13 +++--- tests/test_integ_basics.py | 31 ++++++++++--- 7 files changed, 232 insertions(+), 61 deletions(-) diff --git a/Models.md b/Models.md index 2229c3d9..ace1c7c6 100644 --- a/Models.md +++ b/Models.md @@ -1,6 +1,6 @@ -# Models +## Models -## Datenbankeintrag für eine Transaktion +### Datenbankeintrag für eine Transaktion ``` { @@ -27,7 +27,7 @@ } ``` -## Datenbankeintrag für User Settings +### Datenbankeintrag für User Settings ``` { @@ -46,7 +46,7 @@ } ``` -### Dictionary eines Rulesets (Tag/Parse) +#### Dictionary eines Rulesets (Tag/Parse) ``` { @@ -69,3 +69,14 @@ ) } ``` + +## Handling von Prioritäten + +Die Priorität wird zwischen 0 und 100 automatisch gesetzt, kann aber auch abgegen werden. 0 ist unwichtig, 100 ist wichtig. + +Beim Tagging werden nur Einträge selektiert, die eine niedrigere Priorität haben als die akutelle Regel. + +Es wird beim Taggen entweder die Priorität 1 (automatisches Taggen), die der Regel gesetzt (wenn diese höher ist) oder die explizit übermittelte. Ausnahmen sind: + +- Das manuelle Taggen: Hier wird immer eine Priorität von 99 gesetzt. +- Das automatische Tagging mit einer explizit angegebenen Regel: Hier werden Einträge < 99 selektiert und überschrieben, dann aber wieder die Priorät der Regel (oder 1) gesetzt. diff --git a/app/server.py b/app/server.py index c67367bb..9eed5eae 100644 --- a/app/server.py +++ b/app/server.py @@ -56,4 +56,4 @@ def create_app(config_path: str) -> Flask: 'config.py' ) application = create_app(config) - application.run(host='0.0.0.0', port=8110, debug=True) + application.run(host='0.0.0.0', port=8110) diff --git a/app/static/js/index.js b/app/static/js/index.js index 6eb1c6b5..2ccaeb87 100644 --- a/app/static/js/index.js +++ b/app/static/js/index.js @@ -72,11 +72,11 @@ function truncateDB() { * 'input_tagging_name' (more in the Future) */ function tagEntries() { - // TODO: Implement more, complex tagging rules const iban = document.getElementById('input_iban').value; - const rule_name = document.getElementById('input_tagging_name').value; - const rules = { - 'rule_name': rule_name + const rule_name = document.getElementById('tagging_name').value; + let rules = {} + if (rule_name) { + rules['rule_name'] = rule_name } apiSubmit(iban + '/tag', rules, function (responseText, error) { @@ -92,6 +92,47 @@ function tagEntries() { } +function removeTags() { + const iban = document.getElementById('input_iban').value; + const checkboxes = document.querySelectorAll('input[name="entry-select[]"]'); + const t_ids = []; + checkboxes.forEach((checkbox) => { + if (checkbox.checked) { + t_ids.push(checkbox.value); + } + }); + + if (!iban) { + alert('Please provide an IBAN.'); + return; + } + if (!t_ids) { + alert('Please provide a Transaction ID (checkbox).'); + return; + } + + let api_function; + let tags = {}; + if (t_ids.length == 1) { + api_function = iban+'/removeTag/'+t_ids[0]; + } else { + api_function = iban+'/removeTags'; + tags['t_ids'] = t_ids; + }; + + apiSubmit(api_function, tags, function (responseText, error) { + if (error) { + printResult('Tagging failed: ' + '(' + error + ')' + responseText); + + } else { + alert('Entries tagged successfully!' + responseText); + window.location.reload(); + + } + }, false); +} + + /** * Tags the entries in the database in a direct manner (assign Categories, no rules) * Optional Tagging commands are read from the inputs with IDs @@ -170,7 +211,7 @@ function getInfo(uuid) { function saveMeta() { const meta_type = document.getElementById('select_meta').value; - const fileInput = document.getElementById('input_file'); + const fileInput = document.getElementById('input-json'); if (fileInput.files.length === 0) { alert('Please select a file to upload.'); return; diff --git a/app/templates/index.html b/app/templates/index.html index c724d041..eaee9535 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -37,47 +37,70 @@ -
-

Input Forms

-
    -
  • Filename:
  • -
-
    -
  • -
  • -
-
    -
  • -
-
    -
  • -
  • -
- -
-

Switches to Click

- Upload - truncate Database - tag Entries - tag Entries (manually) - save Meta +

Actions for

+ + +
+
+

Datenbank

+ + + +
+
+

Tagging

+ + + + + + +
+
+

Metadata

+ + + +
+
-
- - - - -
diff --git a/app/ui.py b/app/ui.py index 30a35f79..34504cd5 100644 --- a/app/ui.py +++ b/app/ui.py @@ -120,9 +120,18 @@ def index(iban) -> str: 'primary_tag', 'secondary_tag', 'prio', 'parsed'] + # Rules for Selection + rules = self.db_handler.filter_metadata({ + 'key': 'metatype', + 'value': 'rule' + }) + rule_list = [] + for rule in rules: + rule_list.append(rule.get('name')) + return render_template('index.html', iban=iban, table_header=table_header, - table_data=rows) + table_data=rows, rule_list=rule_list) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -270,7 +279,7 @@ def getTx(iban, t_id): ) return tx_details[0], 200 - @current_app.route('/api//truncateDatabase', methods=['DELETE']) + @current_app.route('/api//truncateDatabase/', methods=['DELETE']) def truncateDatabase(iban): """ Leert die Datenbank zu einer IBAN @@ -283,7 +292,7 @@ def truncateDatabase(iban): deleted_entries = self.db_handler.truncate(iban) return {'deleted': deleted_entries}, 200 - @current_app.route('/api//tag', methods=['PUT']) + @current_app.route('/api//tag/', methods=['PUT']) def tag(iban) -> dict: """ Kategorisiert die Kontoumsätze und aktualisiert die Daten in der Instanz. @@ -313,7 +322,7 @@ def setManualTag(iban, t_id): data = request.json return self._set_manual_tag(iban, t_id, data) - @current_app.route('/api//setManualTags', methods=['PUT']) + @current_app.route('/api//setManualTags/', methods=['PUT']) def setManualTags(iban): """ Handler für _set_manual_tag() für mehrere Einträge. @@ -329,7 +338,7 @@ def setManualTags(iban): """ data = request.json updated_entries = {'updated': 0} - + #TODO: Frontend prüfen for tx in data.get('t_ids'): updated = self._set_manual_tag(iban, tx, data) @@ -337,6 +346,48 @@ def setManualTags(iban): return updated_entries + @current_app.route('/api//removeTag/', methods=['PUT']) + def removeTag(iban, t_id): + """ + Entfernt gesetzte Tags für einen Eintrag- + + Args (uri/json): + iban, str: IBAN + t_id, str: Datenbank ID der Transaktion, + die bereinigt werden soll. + Returns: + dict: updated, int: Anzahl der gespeicherten Datensätzen + """ + if t_id is None: + return {'error': 'No t_id provided'}, 400 + + return self._remove_tags(iban, t_id) + + @current_app.route('/api//removeTags/', methods=['PUT']) + def removeTags(iban): + """ + Entfernt gesetzte Tags für mehrere Einträge. + + Args (uri/json): + iban, str: IBAN + t_ids, list[str]: Datenbank IDs der Transaktionen, + die bereinigt werden sollen. + Returns: + dict: updated, int: Anzahl der gespeicherten Datensätzen + """ + data = request.json + t_ids = data.get('t_ids') + if t_ids is None: + return {'error': 'No t_id provided'}, 400 + + updated_entries = {'updated': 0} + for t_id in t_ids: + + updated = self._remove_tags(iban, t_id) + updated_entries['updated'] += updated.get('updated') + + return updated_entries + def _set_manual_tag(self, iban, t_id, data): """ @@ -372,6 +423,31 @@ def _set_manual_tag(self, iban, t_id, data): updated_entries = self.db_handler.update(new_tag_data, iban, condition) return updated_entries + def _remove_tags(self, iban, t_id): + """ + Entfernt ein gesetztes Tag für einen Eintrag. + + Args: + iban, str: IBAN + t_id, str: Datenbank ID der Transaktion, + die bereinigt werden soll. + Returns: + dict: updated, int: Anzahl der gespeicherten Datensätzen + """ + new_daata = { + 'prio': 0, + 'primary_tag': None, + 'secondary_tag': None + } + condition = [{ + 'key': 'uuid', + 'value': t_id, + 'compare': '==' + }] + + updated_entries = self.db_handler.update(new_daata, iban, condition) + return updated_entries + def _mv_fileupload(self, input_file, path): """ Verschiebt die hochgeladene Datei in ein temporäres Verzeichnis. diff --git a/handler/Tags.py b/handler/Tags.py index 37bb58a5..77fb4c12 100644 --- a/handler/Tags.py +++ b/handler/Tags.py @@ -49,7 +49,7 @@ def tag(self, iban, Args: data (dict): Dictionary mit den Parametern für das Tagging: iban Name der Collection - rule_name: Name der anzuwendenden Taggingregel. + rule_name: UUID der anzuwendenden Taggingregel. Reserviertes Keyword 'ai' führt nur das AI Tagging aus. Default: Es werden alle Regeln des Benutzers ohne das AI Tagging angewendet. @@ -117,6 +117,12 @@ def tag(self, iban, raise ValueError('Es existieren noch keine Regeln für den Benutzer') + # Benutzer Regeln überschreibt alle autpomatischen Tags, + # setzt aber wieder nur seine eigene Prio. + if rule_name is not None: + prio_set = rules[rule_name].get('prioriry', prio) + prio = 99 + # Benutzer Regeln anwenden result_rx = self.tag_regex(rules, iban, prio=prio, prio_set=prio_set, dry_run=dry_run) result_ai = self.tag_ai(iban, dry_run=dry_run) @@ -405,11 +411,6 @@ def _load_ruleset(self, rule_name=None) -> dict: Args: rule_name (str, optional): Lädt die Regel mit diesem Namen. Default: Es werden alle Regeln geladen. - namespace (str, system|user|both): Unterscheidung aus weclhem Set Regeln - geladen oder gesucht werden soll. - - system: nur allgemeine Regeln - - user: nur private Regeln - - both (default): alle Regeln Returns: dict: Verzeichnis nach Namen der Filterregeln """ diff --git a/tests/test_integ_basics.py b/tests/test_integ_basics.py index 918cde6d..3abe8b60 100644 --- a/tests/test_integ_basics.py +++ b/tests/test_integ_basics.py @@ -24,7 +24,7 @@ def test_truncate(test_app): with test_app.app_context(): with test_app.test_client() as client: - result = client.delete(f'/api/{test_app.config['IBAN']}/truncateDatabase') + result = client.delete(f'/api/{test_app.config['IBAN']}/truncateDatabase/') assert result.status_code == 200, "Fehler beim Leeren der Datenbank" @@ -251,7 +251,7 @@ def test_tag_stored(test_app): 'dry_run': True, 'prio': 2 } - result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag/", json=parameters) result = result.json assert result.get('tagged') == 0, \ @@ -266,7 +266,7 @@ def test_tag_stored(test_app): 'rule_name': 'City Tax', 'prio': 2 } - result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag/", json=parameters) result = result.json assert result.get('tagged') == 1, \ @@ -300,7 +300,7 @@ def test_own_rules(test_app): 'rule_regex': r'EDEKA', 'prio': 0, } - result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag/", json=parameters) result = result.json # Es sollte eine Transaktion zutreffen, @@ -321,7 +321,7 @@ def test_own_rules(test_app): 'prio': 9, 'prio_set': 3, } - result = client.put(f"/api/{test_app.config['IBAN']}/tag", json=parameters) + result = client.put(f"/api/{test_app.config['IBAN']}/tag/", json=parameters) result = result.json assert result.get('tagged') == 1, \ @@ -360,7 +360,7 @@ def test_manual_multi_tagging(test_app): "fdd4649484137572ac642e2c0f34f9af"] } r = client.put( - f"/api/{test_app.config['IBAN']}/setManualTags", + f"/api/{test_app.config['IBAN']}/setManualTags/", json=new_tag ) r = r.json @@ -383,3 +383,22 @@ def test_get_tx(test_app): result = result.json assert result.get('primary_tag') == 'Tets_PRIMARY_2', \ "Der Primary Tag war nicht wie erwartet" + +def test_remove_tag(test_app): + """Testet das Entfernen eines Tags""" + with test_app.app_context(): + + with test_app.test_client() as client: + # Remove Tag + result = client.put( + f"/api/{test_app.config['IBAN']}/removeTag/6884802db5e07ee68a68e2c64f9c0cdd" + ) + result = result.json + assert result.get('updated') == 1, \ + "Der Tag wurde nicht entfernt" + assert not result.get('primary_tag'), \ + "Der Primary Tag war nicht wie erwartet" + assert not result.get('secondary_tag'), \ + "Der Secondary Tag war nicht wie erwartet" + assert not result.get('prio'), \ + "Die Prio war nicht wie erwartet" From e979c18d0a15910aa269bceb783ea93bfe3ffb09 Mon Sep 17 00:00:00 2001 From: Pitastic Date: Sun, 13 Apr 2025 21:53:42 +0200 Subject: [PATCH 17/17] misc --- Models.md | 2 ++ app/ui.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Models.md b/Models.md index ace1c7c6..52a59327 100644 --- a/Models.md +++ b/Models.md @@ -48,6 +48,8 @@ #### Dictionary eines Rulesets (Tag/Parse) +Regeln können Attribute einer Transaktion untersuchen und anhand dessen klassifizieren oder taggen. Zu den Werten kann nicht die primäre Kategorie zählen, wohl aber andere Tags, parsing Informationen oder Regexes auf den Buchungstext (und mehr). + ``` { 'uuid': str # (generated) diff --git a/app/ui.py b/app/ui.py index 34504cd5..44bede99 100644 --- a/app/ui.py +++ b/app/ui.py @@ -338,7 +338,6 @@ def setManualTags(iban): """ data = request.json updated_entries = {'updated': 0} - #TODO: Frontend prüfen for tx in data.get('t_ids'): updated = self._set_manual_tag(iban, tx, data)