diff --git a/src/main.py b/src/main.py index 35fc5a4..3a8ea5e 100644 --- a/src/main.py +++ b/src/main.py @@ -10,16 +10,10 @@ from src.speech import Speech from syntax import Syntax -def _main(): - if len(sys.argv) < 2: - print(f'Usage: {sys.argv[0]} FILE') - exit(1) - wav_file: str = sys.argv[1] - speech_server: str = 'http://vosk.athene.tech' - text: str = Speech().run_recognition(wav_file, speech_server) +def _main(wav_file: str): + text: str = Speech().run_recognition(wav_file) print(f'Text: {text}') - syntax_server: str = 'http://syntaxnet.athene.tech' - parse_tree: ParseTree = Syntax().get_parse_tree(text, syntax_server) + parse_tree: ParseTree = Syntax().get_parse_tree(text) print(f'Parse tree:\n{parse_tree}') nouns: List[str] = NLP().get_nouns(parse_tree) print(f'Nouns:\n{" ".join([noun for noun in nouns])}') @@ -28,4 +22,7 @@ def _main(): if __name__ == '__main__': - _main() + if len(sys.argv) < 2: + print(f'Usage: {sys.argv[0]} FILE') + exit(1) + _main(sys.argv[1]) diff --git a/src/myontology.py b/src/myontology.py index 82389d0..ff7aec5 100644 --- a/src/myontology.py +++ b/src/myontology.py @@ -5,45 +5,45 @@ from owlready2 import get_ontology, Ontology class MyOntology: def __init__(self) -> None: - self._onto: Ontology = get_ontology("file://./ontology.owl").load() + self.__onto: Ontology = get_ontology("file://./ontology.owl").load() - def _find(self, string: str, string_list: List[str]) -> int: + def __find_str_in_list(self, string: str, string_list: List[str]) -> int: try: string_list.index(string) return 1 except ValueError: return 0 - def _get_property_value(self, values: [], instance): + def __get_property_value(self, values: [], instance): if len(values) != 1: raise ValueError(f'Wrong values in {instance.name}') return values[0] - def _get_event_instance(self, instances: []): + def __get_event_instance(self, instances: []): max_instance: Optional = None max_priority: int = 0 for instance in instances: - event = self._get_property_value(instance.hasEvent, instance) - priority = self._get_property_value(event.hasPriority, event) + event = self.__get_property_value(instance.hasEvent, instance) + priority = self.__get_property_value(event.hasPriority, event) if priority > max_priority: max_instance = event max_priority = priority return max_instance - def _find_instances_by_terms(self, nouns: List[str]) -> []: + def __find_instances_by_terms(self, nouns: List[str]) -> []: instances = [] - for instance in self._onto.Concept.instances(): + for instance in self.__onto.Concept.instances(): terms = instance.hasTerm match: int = 0 for term in terms: - match = match + self._find(term.name, nouns) + match = match + self.__find_str_in_list(term.name, nouns) if match >= len(terms) * 0.5: instances.append(instance) return instances def get_event_description(self, nouns: List[str]) -> str: - instances: [] = self._find_instances_by_terms(nouns) - event = self._get_event_instance(instances) + instances: [] = self.__find_instances_by_terms(nouns) + event = self.__get_event_instance(instances) if event is None: return '' - return self._get_property_value(event.hasDescription, event) + return self.__get_property_value(event.hasDescription, event) diff --git a/src/nlp.py b/src/nlp.py index aeababc..1b55aa7 100644 --- a/src/nlp.py +++ b/src/nlp.py @@ -8,8 +8,7 @@ from src.parse_tree.parse_tree_node import ParseTreeNode class NLP: - @staticmethod - def _lemmatizer(text: str): + def lemmatizer(self, text: str): doc = ru_core_news_sm.load()(text) tokens = [token.lemma_ for token in doc] return ' '.join(tokens) @@ -20,4 +19,4 @@ class NLP: if node.upos != 'NOUN': continue nouns.append(node) - return list(set([self._lemmatizer(noun.lemma) for noun in nouns])) + return list(set([self.lemmatizer(noun.lemma) for noun in nouns])) diff --git a/src/parse_tree/parse_tree.py b/src/parse_tree/parse_tree.py index 7a04deb..62ad7c9 100644 --- a/src/parse_tree/parse_tree.py +++ b/src/parse_tree/parse_tree.py @@ -8,7 +8,7 @@ from src.parse_tree.parse_tree_node import ParseTreeNode class ParseTree: def __init__(self, raw_tree: str): - self._tree: ParseTreeNode = self.__create_tree(self.__create_nodes_array(raw_tree)) + self.__tree: ParseTreeNode = self.__create_tree(self.__create_nodes_array(raw_tree)) @staticmethod def __parse_raw_tree_line(raw_tree_line: str) -> Optional[ParseTreeNode]: @@ -45,8 +45,8 @@ class ParseTree: break return root - def __repr__(self) -> str: - return '\n'.join([f'{pre}{node}' for pre, fill, node in RenderTree(self._tree)]) - def get_tree_root(self) -> ParseTreeNode: - return self._tree + return self.__tree + + def __repr__(self) -> str: + return '\n'.join([f'{pre}{node}' for pre, fill, node in RenderTree(self.__tree)]) diff --git a/src/speech.py b/src/speech.py index daa02db..97aeb64 100644 --- a/src/speech.py +++ b/src/speech.py @@ -1,8 +1,11 @@ +from typing import Final + import requests from scipy.io import wavfile class Speech: + __server: Final[str] = 'http://vosk.athene.tech' @staticmethod def __check_wav(wav_file): @@ -23,10 +26,9 @@ class Speech: result = file.read() return result - @staticmethod - def __stt(wav_file, server): - print(f'Connecting to \'{server}\'...') - response = requests.post(url=f'{server}/stt', + def __stt(self, wav_file): + print(f'Connecting to \'{self.__server}\'...') + response = requests.post(url=f'{self.__server}/stt', data=Speech.__load_wav(wav_file), headers={'Content-Type': 'audio/wav'}) result = response.json() @@ -36,5 +38,5 @@ class Speech: return result['text'] if not result['code'] else f'Server error: {result}' - def run_recognition(self, wav_file: str, server: str) -> str: - return self.__stt(wav_file, server) + def run_recognition(self, wav_file: str) -> str: + return self.__stt(wav_file) diff --git a/src/syntax.py b/src/syntax.py index a416bc2..62a3d06 100644 --- a/src/syntax.py +++ b/src/syntax.py @@ -1,14 +1,16 @@ +from typing import Final + import requests from src.parse_tree.parse_tree import ParseTree class Syntax: + __server: Final[str] = 'http://syntaxnet.athene.tech' - @staticmethod - def __parsey(text, server): - print(f'Connecting to \'{server}\'...') - response = requests.post(url=f'{server}/v1/parsey-universal-full', + def __parsey(self, text): + print(f'Connecting to \'{self.__server}\'...') + response = requests.post(url=f'{self.__server}/v1/parsey-universal-full', data=text.encode('utf-8'), headers={ 'Content-Type': 'text/plain; charset=utf-8', @@ -20,5 +22,5 @@ class Syntax: return result - def get_parse_tree(self, text: str, server: str) -> ParseTree: - return ParseTree(self.__parsey(text, server)) + def get_parse_tree(self, text: str) -> ParseTree: + return ParseTree(self.__parsey(text))