Improve semantic analysis

This commit is contained in:
Aleksey Filippov 2023-05-31 23:59:18 +04:00
parent e876220057
commit c0c29d5115
5 changed files with 90 additions and 29 deletions

View File

@ -10,7 +10,7 @@ from src.syntax import Syntax
def _main(wav_file: str):
# text: str = Speech().run_recognition(wav_file)
text: str = 'У меня кредит в другом банке. Можно ли его перевести в ваш банк? '
text: str = 'Можно ли рефинансировать ипотеку?'
print(f'Text: {text}')
parse_tree: ParseTree = Syntax().get_parse_tree(text)
print(f'Parse tree:\n{parse_tree}')

View File

@ -3,4 +3,5 @@ requests==2.27.1
anytree==2.8.0
spacy==3.3.0
https://github.com/explosion/spacy-models/releases/download/ru_core_news_sm-3.3.0/ru_core_news_sm-3.3.0.tar.gz
Owlready2==0.36
Owlready2==0.36
ordered-set==4.1.0

View File

@ -3,14 +3,17 @@ from typing import List, Set
import ru_core_news_sm
from anytree import LevelOrderIter, Resolver, ChildResolverError, LevelOrderGroupIter, \
ResolverError
from ordered_set import OrderedSet
from src.parse_tree.parse_tree import ParseTree
from src.parse_tree.parse_tree_node import ParseTreeNode
from src.semantic_tree.semantic_tree import SemanticTree
from src.semantic_tree.semantic_tree_node import SemanticTreeNode
class NLP:
def __init__(self) -> None:
self.__tree = SemanticTree()
self.__model = ru_core_news_sm.load()
def __lemmatizer(self, text: str):
@ -36,14 +39,15 @@ class NLP:
return nodes
def __get_terms_by_template(self, tree: ParseTree, template: str, reverse: bool = False) -> List[str]:
terms: Set[str] = set()
terms: OrderedSet[str] = OrderedSet()
nodes: list = []
tmplt = template
if '/' in template:
split = template.split('/', 1)
tmplt = f'*/{split.pop(0)}*/{"".join(split)}'
tmplt = f'{split.pop(0)}*/{"".join(split)}'
nodes.extend(self.__get_nodes_by_template(tree, f'/{template}'))
nodes.extend(self.__get_nodes_by_template(tree, tmplt))
nodes.extend(self.__get_nodes_by_template(tree, f'*/{tmplt}'))
for node in nodes:
if node.parent is None:
continue
@ -80,7 +84,10 @@ class NLP:
return terms
def __get_verb_noun(self, tree: ParseTree) -> List[str]:
return self.__get_terms_by_template(tree, 'VERB/NOUN', True)
terms: List[str] = []
terms.extend(self.__get_terms_by_template(tree, 'VERB/NOUN', True))
terms.extend(self.__get_terms_by_template(tree, 'NOUN/VERB', True))
return terms
def __get_single_terms_by_template(self, tree: ParseTree, template: List[str]) -> List[str]:
nouns: List[ParseTreeNode] = []
@ -90,31 +97,40 @@ class NLP:
nouns.append(node)
return list(set([self.__lemmatizer(noun.lemma) for noun in nouns]))
@staticmethod
def __merge_nouns(nouns: List[str], verb_nouns: List[str]) -> List[str]:
terms: Set[str] = set()
if len(nouns) == 0 or len(verb_nouns) == 0:
return list(terms)
for verb_noun in verb_nouns:
split = verb_noun.split(' ')
current_noun = split.pop()
current_verb = split.pop()
for noun in nouns:
if current_noun in noun.split(' '):
terms.add(f'{current_verb} {noun}')
break
return list(terms)
def __add_to_semantic_tree(self, terms: List[str], term_type: str):
if terms is None or len(terms) == 0:
return
for term in terms:
split = term.split(' ')
if len(split) < 2:
return
noun = split.pop()
parent_node = self.__tree.add_to_tree(noun, 'noun')
for child in split:
self.__tree.add_to_tree(child, term_type, parent_node)
def __merge_terms(self, leaves: List[List[SemanticTreeNode]]) -> List[str]:
terms: List[str] = []
for group in leaves:
if len(group) == 0:
continue
current_term = ' '.join([leaf.name for leaf in group])
current_leaf = group[0]
while current_leaf.parent is not None:
current_leaf = current_leaf.parent
current_term = f'{current_term} {current_leaf.name}'
terms.append(current_term.strip())
return terms
def get_terms(self, tree: ParseTree) -> List[str]:
terms: List[str] = []
terms.extend(self.__get_adj_noun(tree))
nouns: List[str] = self.__get_nouns(tree)
terms.extend(nouns)
# verb_nouns: List[str] = self.__get_verb_noun(tree)
# terms.extend(verb_nouns)
# merged: List[str] = self.__merge_nouns(nouns, verb_nouns)
# if len(merged) > 0:
# return merged
if len(terms) == 0:
terms.extend(self.__get_single_terms_by_template(tree, ['NOUN', 'VERB', 'ADJ']))
self.__add_to_semantic_tree(nouns, 'noun')
verb_nouns: List[str] = self.__get_verb_noun(tree)
self.__add_to_semantic_tree(verb_nouns, 'verb')
adj_nouns = self.__get_adj_noun(tree)
self.__add_to_semantic_tree(adj_nouns, 'adj')
print(f'Semantic tree:\n{self.__tree}')
terms: List[str] = self.__merge_terms(self.__tree.get_leaves())
# terms.extend(self.__get_single_terms_by_template(tree, ['NOUN', 'VERB', 'ADJ']))
return terms

View File

@ -0,0 +1,32 @@
from typing import List, Dict
from anytree import RenderTree, PreOrderIter
from src.semantic_tree.semantic_tree_node import SemanticTreeNode
class SemanticTree:
def __init__(self) -> None:
self.__tree_root = SemanticTreeNode('')
self.__nodes = {}
def add_to_tree(self, name: str, node_type: str, parent: SemanticTreeNode = None) -> SemanticTreeNode:
new_node = self.__nodes.get(name)
if new_node is None:
new_node = SemanticTreeNode(name, node_type,
self.__tree_root if parent is None else parent)
self.__nodes[name] = new_node
return new_node
def get_leaves(self) -> List[List[SemanticTreeNode]]:
leaves: List[SemanticTreeNode] = list(PreOrderIter(self.__tree_root, filter_=lambda node: node.is_leaf))
leaves_dict: Dict[List[str]] = {}
for leaf in leaves:
if leaves_dict.get(leaf.parent) is None:
leaves_dict[leaf.parent] = []
leaves_dict[leaf.parent].append(leaf)
return list(leaves_dict.values())
def __repr__(self) -> str:
return '\n'.join([f'{pre}{node}' for pre, fill, node in RenderTree(self.__tree_root)])

View File

@ -0,0 +1,12 @@
from anytree import NodeMixin
class SemanticTreeNode(NodeMixin):
def __init__(self, name: str, node_type: str = None, parent: NodeMixin = None):
super(SemanticTreeNode, self).__init__()
self.type = node_type
self.name = name
self.parent = parent
def __repr__(self) -> str:
return f'{"ROOT" if not self.name else self.name} {self.type}'