helper/src/nlp.py

24 lines
696 B
Python

from typing import List
import ru_core_news_sm
from anytree import LevelOrderIter
from src.parse_tree.parse_tree import ParseTree
from src.parse_tree.parse_tree_node import ParseTreeNode
class NLP:
@staticmethod
def _lemmatizer(text: str):
doc = ru_core_news_sm.load()(text)
tokens = [token.lemma_ for token in doc]
return ' '.join(tokens)
def get_nouns(self, tree: ParseTree) -> List[str]:
nouns: List[ParseTreeNode] = []
for node in LevelOrderIter(tree.get_tree_root()):
if node.upos != 'NOUN':
continue
nouns.append(node)
return list(set([self.lemmatizer(noun.lemma) for noun in nouns]))