import requests from scipy.io import wavfile class Speech: @staticmethod def __check_wav(wav_file): sample_rate, sig = wavfile.read(wav_file) channels = len(sig.shape) bits = sig.dtype.base.name if sample_rate != 16000: raise Exception(f'Sample rate is not 16000: {sample_rate}') if channels != 1: raise Exception(f'Number of Channels is not 1 (Not mono): {channels}') if bits != 'int16': raise Exception(f'Bits per sample 16: {bits}') @staticmethod def __load_wav(wav_file): Speech.__check_wav(wav_file) with open(wav_file, 'rb') as file: result = file.read() return result @staticmethod def __stt(wav_file, server): print(f'Connecting to \'{server}\'...') response = requests.post(url=f'{server}/stt', data=Speech.__load_wav(wav_file), headers={'Content-Type': 'audio/wav'}) result = response.json() if response.status_code != requests.codes.ok: response.raise_for_status() return result['text'] if not result['code'] else f'Server error: {result}' def run_recognition(self, wav_file: str, server: str) -> str: return self.__stt(wav_file, server)