from typing import List import ru_core_news_sm from anytree import LevelOrderIter from src.parse_tree.parse_tree import ParseTree from src.parse_tree.parse_tree_node import ParseTreeNode class NLP: @staticmethod def _lemmatizer(text: str): doc = ru_core_news_sm.load()(text) tokens = [token.lemma_ for token in doc] return ' '.join(tokens) def get_nouns(self, tree: ParseTree) -> List[str]: nouns: List[ParseTreeNode] = [] for node in LevelOrderIter(tree.get_tree_root()): if node.upos != 'NOUN': continue nouns.append(node) return list(set([self.lemmatizer(noun.lemma) for noun in nouns]))