From 50e36be6d0dd517fb2f824fe06a43b7b91d11aec Mon Sep 17 00:00:00 2001 From: Aleksey Filippov Date: Fri, 2 Jun 2023 17:41:05 +0400 Subject: [PATCH] Add dirty code for working with the new ontology --- main.py | 2 +- src/myontology.py | 81 +++++++++++++++++++++++++++-------------------- src/nlp.py | 3 +- 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/main.py b/main.py index 745dc6d..67d7538 100644 --- a/main.py +++ b/main.py @@ -10,7 +10,7 @@ from src.syntax import Syntax def _main(wav_file: str): # text: str = Speech().run_recognition(wav_file) - text: str = 'Можно ли рефинансировать ипотеку?' + text: str = 'Как получить деньги с заблокированной карты?' print(f'Text: {text}') parse_tree: ParseTree = Syntax().get_parse_tree(text) print(f'Parse tree:\n{parse_tree}') diff --git a/src/myontology.py b/src/myontology.py index 3db66c1..047afa3 100644 --- a/src/myontology.py +++ b/src/myontology.py @@ -1,45 +1,58 @@ -from typing import List, Dict +from typing import List +from ordered_set import OrderedSet from owlready2 import get_ontology, Ontology class MyOntology: def __init__(self) -> None: - self.__onto: Ontology = get_ontology("file://./ontology.owl").load() + self.__onto: Ontology = get_ontology("file://./new-ontology.owx").load() - def __find_str_in_list(self, string: str, string_list: List[str]) -> int: - try: - string_list.index(string.replace("_", " ")) - return 1 - except ValueError: - return 0 + def __get_parent_with_hierarchy(self, root, parents): + pdict = {} + for parent in parents: + level = 0 + subclass = parent.is_a[0] + if subclass == self.__onto.Concept and len(pdict.keys()) == 0: + pdict[1] = [parent] + while subclass != self.__onto.Concept: + level += 1 + if subclass == root: + if pdict.get(level) is None: + pdict[level] = [] + pdict[level].append(parent) + subclass = subclass.is_a[0] + keys = sorted(pdict.keys()) + if len(keys) == 0: + return None, None + return keys[-1], pdict[keys[-1]] - def __get_property_value(self, values: [], instance): - if len(values) != 1: - raise ValueError(f'Wrong values in {instance.name}') - return values[0] - - def __get_event_instance(self, instances: []): - events: List[Dict[int, str]] = [] - for instance in instances: - event = self.__get_property_value(instance.hasEvent, instance) - priority = self.__get_property_value(event.hasPriority, event) - events.append({priority: self.__get_property_value(event.hasDescription, event)}) - events.sort(key=lambda item: list(item.keys())[0], reverse=True) - return events - - def __find_instances_by_terms(self, my_terms: List[str]) -> []: - instances = [] - for instance in self.__onto.Concept.instances(): - terms = instance.hasTerm - match: int = 0 - for term in terms: - match = match + self.__find_str_in_list(term.name, my_terms) - if match >= 1: - instances.append(instance) + def __find_instance(self, root, level: int, terms: List[str]) -> OrderedSet[(int, [])]: + level += 1 + instances: OrderedSet[(int, [])] = OrderedSet() + for current_class in root.subclasses(): + for instance in current_class.instances(): + if instance.name == terms[-1] is not None: + plevel, parents = self.__get_parent_with_hierarchy(current_class, instance.is_instance_of) + filtered_terms = list(filter(lambda term: term != terms[-1], terms)) + if parents is None: + plevel = level + parents = [current_class] + if len(filtered_terms) == 0: + instances.append((plevel, current_class)) + return instances + for parent in parents: + result = self.__find_instance(parent, plevel, filtered_terms) + if len(result) == 0: + instances.append((plevel, parent)) + else: + [instances.append(item) for item in result] return instances def get_event_description(self, terms: List[str]) -> str: - instances: [] = self.__find_instances_by_terms(terms) - events = self.__get_event_instance(instances) - return '\n'.join(list(map(lambda item: f'{list(item.keys())[0]}: {list(item.values())[0]}', events))) + instances = OrderedSet() + for term in terms: + for item in self.__find_instance(self.__onto.Concept, 0, term.split(' ')): + instances.append(item) + instances = sorted(list(instances), reverse=True) + return '\n'.join(list(map(lambda instance: ' '.join(instance[1].hasDescription), instances))) diff --git a/src/nlp.py b/src/nlp.py index 530522d..2ebff1c 100644 --- a/src/nlp.py +++ b/src/nlp.py @@ -109,7 +109,8 @@ class NLP: for child in split: self.__tree.add_to_tree(child, term_type, parent_node) - def __merge_terms(self, leaves: List[List[SemanticTreeNode]]) -> List[str]: + @staticmethod + def __merge_terms(leaves: List[List[SemanticTreeNode]]) -> List[str]: terms: List[str] = [] for group in leaves: if len(group) == 0: