Delete repeating nouns

This commit is contained in:
Aleksey Filippov 2022-01-25 16:08:53 +04:00
parent 442c0fe483
commit 4020ac2d35

View File

@ -14,13 +14,10 @@ class NLP:
tokens = [token.lemma_ for token in doc]
return ' '.join(tokens)
def _get_nouns(self, tree: ParseTreeNode) -> List[str]:
def get_nouns(self, tree: ParseTree) -> List[str]:
nouns: List[ParseTreeNode] = []
for node in LevelOrderIter(tree):
for node in LevelOrderIter(tree.get_tree_root()):
if node.upos != 'NOUN':
continue
nouns.append(node)
return [self._lemmatizer(noun.lemma) for noun in nouns]
def get_nouns(self, tree: ParseTree) -> List[str]:
return self._get_nouns(tree.get_tree_root())
return list(set([self.lemmatizer(noun.lemma) for noun in nouns]))