helper/speech.py

import json
from urllib.request import Request, urlopen

from scipy.io import wavfile


class Speech:
    @staticmethod
    def __check_wav(wav_file):
        sample_rate, sig = wavfile.read(wav_file)
        channels = len(sig.shape)
        bits = sig.dtype.base.name
        if sample_rate != 16000:
            raise Exception(f'Sample rate is not 16000: {sample_rate}')
        if channels != 1:
            raise Exception(f'Number of Channels is not 1 (Not mono): {channels}')
        if bits != 'int16':
            raise Exception(f'Bits per sample 16: {bits}')

    @staticmethod
    def __load_wav(wav_file):
        Speech.__check_wav(wav_file)
        with open(wav_file, 'rb') as file:
            result = file.read()
        return result

    @staticmethod
    def __stt(wav_file: str, server: str) -> str:
        print(f'Connecting to \'{server}\'...')
        request = Request(url=f'{server}/stt',
                          data=Speech.__load_wav(wav_file),
                          headers={'Content-Type': 'audio/wav'})
        result = json.loads(urlopen(request).read().decode('utf-8'))

        if not ('code' in result and 'text' in result):
            raise RuntimeError(f'Wrong reply from server: {result}')
        return result['text'] if not result['code'] else f'Server error: {result}'

    def run(self, wav_file, server):
        return self.__stt(wav_file, server)