From 4020ac2d35f7cd48709daa2c284dd36589627cec Mon Sep 17 00:00:00 2001 From: Aleksey Filippov Date: Tue, 25 Jan 2022 16:08:53 +0400 Subject: [PATCH] Delete repeating nouns --- src/nlp.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/nlp.py b/src/nlp.py index 70c1e8d..c11c951 100644 --- a/src/nlp.py +++ b/src/nlp.py @@ -14,13 +14,10 @@ class NLP: tokens = [token.lemma_ for token in doc] return ' '.join(tokens) - def _get_nouns(self, tree: ParseTreeNode) -> List[str]: + def get_nouns(self, tree: ParseTree) -> List[str]: nouns: List[ParseTreeNode] = [] - for node in LevelOrderIter(tree): + for node in LevelOrderIter(tree.get_tree_root()): if node.upos != 'NOUN': continue nouns.append(node) - return [self._lemmatizer(noun.lemma) for noun in nouns] - - def get_nouns(self, tree: ParseTree) -> List[str]: - return self._get_nouns(tree.get_tree_root()) + return list(set([self.lemmatizer(noun.lemma) for noun in nouns]))