Delete repeating nouns

This commit is contained in:
Aleksey Filippov 2022-01-25 16:08:53 +04:00
parent 442c0fe483
commit 4020ac2d35

View File

@ -14,13 +14,10 @@ class NLP:
tokens = [token.lemma_ for token in doc] tokens = [token.lemma_ for token in doc]
return ' '.join(tokens) return ' '.join(tokens)
def _get_nouns(self, tree: ParseTreeNode) -> List[str]: def get_nouns(self, tree: ParseTree) -> List[str]:
nouns: List[ParseTreeNode] = [] nouns: List[ParseTreeNode] = []
for node in LevelOrderIter(tree): for node in LevelOrderIter(tree.get_tree_root()):
if node.upos != 'NOUN': if node.upos != 'NOUN':
continue continue
nouns.append(node) nouns.append(node)
return [self._lemmatizer(noun.lemma) for noun in nouns] return list(set([self.lemmatizer(noun.lemma) for noun in nouns]))
def get_nouns(self, tree: ParseTree) -> List[str]:
return self._get_nouns(tree.get_tree_root())