Improve semantic analysis
This commit is contained in:
parent
e876220057
commit
c0c29d5115
2
main.py
2
main.py
@ -10,7 +10,7 @@ from src.syntax import Syntax
|
||||
|
||||
def _main(wav_file: str):
|
||||
# text: str = Speech().run_recognition(wav_file)
|
||||
text: str = 'У меня кредит в другом банке. Можно ли его перевести в ваш банк? '
|
||||
text: str = 'Можно ли рефинансировать ипотеку?'
|
||||
print(f'Text: {text}')
|
||||
parse_tree: ParseTree = Syntax().get_parse_tree(text)
|
||||
print(f'Parse tree:\n{parse_tree}')
|
||||
|
@ -3,4 +3,5 @@ requests==2.27.1
|
||||
anytree==2.8.0
|
||||
spacy==3.3.0
|
||||
https://github.com/explosion/spacy-models/releases/download/ru_core_news_sm-3.3.0/ru_core_news_sm-3.3.0.tar.gz
|
||||
Owlready2==0.36
|
||||
Owlready2==0.36
|
||||
ordered-set==4.1.0
|
70
src/nlp.py
70
src/nlp.py
@ -3,14 +3,17 @@ from typing import List, Set
|
||||
import ru_core_news_sm
|
||||
from anytree import LevelOrderIter, Resolver, ChildResolverError, LevelOrderGroupIter, \
|
||||
ResolverError
|
||||
from ordered_set import OrderedSet
|
||||
|
||||
from src.parse_tree.parse_tree import ParseTree
|
||||
from src.parse_tree.parse_tree_node import ParseTreeNode
|
||||
from src.semantic_tree.semantic_tree import SemanticTree
|
||||
from src.semantic_tree.semantic_tree_node import SemanticTreeNode
|
||||
|
||||
|
||||
class NLP:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.__tree = SemanticTree()
|
||||
self.__model = ru_core_news_sm.load()
|
||||
|
||||
def __lemmatizer(self, text: str):
|
||||
@ -36,14 +39,15 @@ class NLP:
|
||||
return nodes
|
||||
|
||||
def __get_terms_by_template(self, tree: ParseTree, template: str, reverse: bool = False) -> List[str]:
|
||||
terms: Set[str] = set()
|
||||
terms: OrderedSet[str] = OrderedSet()
|
||||
nodes: list = []
|
||||
tmplt = template
|
||||
if '/' in template:
|
||||
split = template.split('/', 1)
|
||||
tmplt = f'*/{split.pop(0)}*/{"".join(split)}'
|
||||
tmplt = f'{split.pop(0)}*/{"".join(split)}'
|
||||
nodes.extend(self.__get_nodes_by_template(tree, f'/{template}'))
|
||||
nodes.extend(self.__get_nodes_by_template(tree, tmplt))
|
||||
nodes.extend(self.__get_nodes_by_template(tree, f'*/{tmplt}'))
|
||||
for node in nodes:
|
||||
if node.parent is None:
|
||||
continue
|
||||
@ -80,7 +84,10 @@ class NLP:
|
||||
return terms
|
||||
|
||||
def __get_verb_noun(self, tree: ParseTree) -> List[str]:
|
||||
return self.__get_terms_by_template(tree, 'VERB/NOUN', True)
|
||||
terms: List[str] = []
|
||||
terms.extend(self.__get_terms_by_template(tree, 'VERB/NOUN', True))
|
||||
terms.extend(self.__get_terms_by_template(tree, 'NOUN/VERB', True))
|
||||
return terms
|
||||
|
||||
def __get_single_terms_by_template(self, tree: ParseTree, template: List[str]) -> List[str]:
|
||||
nouns: List[ParseTreeNode] = []
|
||||
@ -90,31 +97,40 @@ class NLP:
|
||||
nouns.append(node)
|
||||
return list(set([self.__lemmatizer(noun.lemma) for noun in nouns]))
|
||||
|
||||
@staticmethod
|
||||
def __merge_nouns(nouns: List[str], verb_nouns: List[str]) -> List[str]:
|
||||
terms: Set[str] = set()
|
||||
if len(nouns) == 0 or len(verb_nouns) == 0:
|
||||
return list(terms)
|
||||
for verb_noun in verb_nouns:
|
||||
split = verb_noun.split(' ')
|
||||
current_noun = split.pop()
|
||||
current_verb = split.pop()
|
||||
for noun in nouns:
|
||||
if current_noun in noun.split(' '):
|
||||
terms.add(f'{current_verb} {noun}')
|
||||
break
|
||||
return list(terms)
|
||||
def __add_to_semantic_tree(self, terms: List[str], term_type: str):
|
||||
if terms is None or len(terms) == 0:
|
||||
return
|
||||
for term in terms:
|
||||
split = term.split(' ')
|
||||
if len(split) < 2:
|
||||
return
|
||||
noun = split.pop()
|
||||
parent_node = self.__tree.add_to_tree(noun, 'noun')
|
||||
for child in split:
|
||||
self.__tree.add_to_tree(child, term_type, parent_node)
|
||||
|
||||
def __merge_terms(self, leaves: List[List[SemanticTreeNode]]) -> List[str]:
|
||||
terms: List[str] = []
|
||||
for group in leaves:
|
||||
if len(group) == 0:
|
||||
continue
|
||||
current_term = ' '.join([leaf.name for leaf in group])
|
||||
current_leaf = group[0]
|
||||
while current_leaf.parent is not None:
|
||||
current_leaf = current_leaf.parent
|
||||
current_term = f'{current_term} {current_leaf.name}'
|
||||
terms.append(current_term.strip())
|
||||
return terms
|
||||
|
||||
def get_terms(self, tree: ParseTree) -> List[str]:
|
||||
terms: List[str] = []
|
||||
terms.extend(self.__get_adj_noun(tree))
|
||||
nouns: List[str] = self.__get_nouns(tree)
|
||||
terms.extend(nouns)
|
||||
# verb_nouns: List[str] = self.__get_verb_noun(tree)
|
||||
# terms.extend(verb_nouns)
|
||||
# merged: List[str] = self.__merge_nouns(nouns, verb_nouns)
|
||||
# if len(merged) > 0:
|
||||
# return merged
|
||||
if len(terms) == 0:
|
||||
terms.extend(self.__get_single_terms_by_template(tree, ['NOUN', 'VERB', 'ADJ']))
|
||||
self.__add_to_semantic_tree(nouns, 'noun')
|
||||
verb_nouns: List[str] = self.__get_verb_noun(tree)
|
||||
self.__add_to_semantic_tree(verb_nouns, 'verb')
|
||||
adj_nouns = self.__get_adj_noun(tree)
|
||||
self.__add_to_semantic_tree(adj_nouns, 'adj')
|
||||
print(f'Semantic tree:\n{self.__tree}')
|
||||
terms: List[str] = self.__merge_terms(self.__tree.get_leaves())
|
||||
# terms.extend(self.__get_single_terms_by_template(tree, ['NOUN', 'VERB', 'ADJ']))
|
||||
return terms
|
||||
|
32
src/semantic_tree/semantic_tree.py
Normal file
32
src/semantic_tree/semantic_tree.py
Normal file
@ -0,0 +1,32 @@
|
||||
from typing import List, Dict
|
||||
|
||||
from anytree import RenderTree, PreOrderIter
|
||||
|
||||
from src.semantic_tree.semantic_tree_node import SemanticTreeNode
|
||||
|
||||
|
||||
class SemanticTree:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.__tree_root = SemanticTreeNode('')
|
||||
self.__nodes = {}
|
||||
|
||||
def add_to_tree(self, name: str, node_type: str, parent: SemanticTreeNode = None) -> SemanticTreeNode:
|
||||
new_node = self.__nodes.get(name)
|
||||
if new_node is None:
|
||||
new_node = SemanticTreeNode(name, node_type,
|
||||
self.__tree_root if parent is None else parent)
|
||||
self.__nodes[name] = new_node
|
||||
return new_node
|
||||
|
||||
def get_leaves(self) -> List[List[SemanticTreeNode]]:
|
||||
leaves: List[SemanticTreeNode] = list(PreOrderIter(self.__tree_root, filter_=lambda node: node.is_leaf))
|
||||
leaves_dict: Dict[List[str]] = {}
|
||||
for leaf in leaves:
|
||||
if leaves_dict.get(leaf.parent) is None:
|
||||
leaves_dict[leaf.parent] = []
|
||||
leaves_dict[leaf.parent].append(leaf)
|
||||
return list(leaves_dict.values())
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return '\n'.join([f'{pre}{node}' for pre, fill, node in RenderTree(self.__tree_root)])
|
12
src/semantic_tree/semantic_tree_node.py
Normal file
12
src/semantic_tree/semantic_tree_node.py
Normal file
@ -0,0 +1,12 @@
|
||||
from anytree import NodeMixin
|
||||
|
||||
|
||||
class SemanticTreeNode(NodeMixin):
|
||||
def __init__(self, name: str, node_type: str = None, parent: NodeMixin = None):
|
||||
super(SemanticTreeNode, self).__init__()
|
||||
self.type = node_type
|
||||
self.name = name
|
||||
self.parent = parent
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'{"ROOT" if not self.name else self.name} {self.type}'
|
Loading…
Reference in New Issue
Block a user