helper/src/speech.py

from typing import Final

import requests
from scipy.io import wavfile


class Speech:
    __server: Final[str] = 'http://vosk.athene.tech'

    @staticmethod
    def __check_wav(wav_file):
        sample_rate, sig = wavfile.read(wav_file)
        channels = len(sig.shape)
        bits = sig.dtype.base.name
        if sample_rate != 16000:
            raise Exception(f'Sample rate is not 16000: {sample_rate}')
        if channels != 1:
            raise Exception(f'Number of Channels is not 1 (Not mono): {channels}')
        if bits != 'int16':
            raise Exception(f'Bits per sample 16: {bits}')

    @staticmethod
    def __load_wav(wav_file):
        Speech.__check_wav(wav_file)
        with open(wav_file, 'rb') as file:
            result = file.read()
        return result

    def __stt(self, wav_file):
        print(f'Connecting to \'{self.__server}\'...')
        response = requests.post(url=f'{self.__server}/stt',
                                 data=Speech.__load_wav(wav_file),
                                 headers={'Content-Type': 'audio/wav'})
        result = response.json()

        if response.status_code != requests.codes.ok:
            response.raise_for_status()

        return result['text'] if not result['code'] else f'Server error: {result}'

    def run_recognition(self, wav_file: str) -> str:
        return self.__stt(wav_file)
Simplify main.py, change protected access modifiers to private 2022-01-27 23:15:40 +04:00			`from typing import Final`

Migrate to requests library 2022-01-14 17:41:27 +04:00			`import requests`
Move speech code to separate file 2022-01-14 16:58:50 +04:00			`from scipy.io import wavfile`


			`class Speech:`
Simplify main.py, change protected access modifiers to private 2022-01-27 23:15:40 +04:00			`__server: Final[str] = 'http://vosk.athene.tech'`
Move parse_tree_node.py to NodeMixin class 2022-01-22 11:58:15 +04:00
Move speech code to separate file 2022-01-14 16:58:50 +04:00			`@staticmethod`
			`def __check_wav(wav_file):`
			`sample_rate, sig = wavfile.read(wav_file)`
			`channels = len(sig.shape)`
			`bits = sig.dtype.base.name`
			`if sample_rate != 16000:`
			`raise Exception(f'Sample rate is not 16000: {sample_rate}')`
			`if channels != 1:`
			`raise Exception(f'Number of Channels is not 1 (Not mono): {channels}')`
			`if bits != 'int16':`
			`raise Exception(f'Bits per sample 16: {bits}')`

			`@staticmethod`
			`def __load_wav(wav_file):`
			`Speech.__check_wav(wav_file)`
			`with open(wav_file, 'rb') as file:`
			`result = file.read()`
			`return result`

Simplify main.py, change protected access modifiers to private 2022-01-27 23:15:40 +04:00			`def __stt(self, wav_file):`
			`print(f'Connecting to \'{self.__server}\'...')`
			`response = requests.post(url=f'{self.__server}/stt',`
Migrate to requests library 2022-01-14 17:41:27 +04:00			`data=Speech.__load_wav(wav_file),`
			`headers={'Content-Type': 'audio/wav'})`
			`result = response.json()`

			`if response.status_code != requests.codes.ok:`
			`response.raise_for_status()`
Move speech code to separate file 2022-01-14 16:58:50 +04:00
			`return result['text'] if not result['code'] else f'Server error: {result}'`

Simplify main.py, change protected access modifiers to private 2022-01-27 23:15:40 +04:00			`def run_recognition(self, wav_file: str) -> str:`
			`return self.__stt(wav_file)`