2022-01-14 17:41:27 +04:00
|
|
|
import requests
|
2022-01-14 16:58:50 +04:00
|
|
|
from scipy.io import wavfile
|
|
|
|
|
|
|
|
|
|
|
|
class Speech:
|
2022-01-22 11:58:15 +04:00
|
|
|
|
2022-01-14 16:58:50 +04:00
|
|
|
@staticmethod
|
|
|
|
def __check_wav(wav_file):
|
|
|
|
sample_rate, sig = wavfile.read(wav_file)
|
|
|
|
channels = len(sig.shape)
|
|
|
|
bits = sig.dtype.base.name
|
|
|
|
if sample_rate != 16000:
|
|
|
|
raise Exception(f'Sample rate is not 16000: {sample_rate}')
|
|
|
|
if channels != 1:
|
|
|
|
raise Exception(f'Number of Channels is not 1 (Not mono): {channels}')
|
|
|
|
if bits != 'int16':
|
|
|
|
raise Exception(f'Bits per sample 16: {bits}')
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def __load_wav(wav_file):
|
|
|
|
Speech.__check_wav(wav_file)
|
|
|
|
with open(wav_file, 'rb') as file:
|
|
|
|
result = file.read()
|
|
|
|
return result
|
|
|
|
|
|
|
|
@staticmethod
|
2022-01-14 17:05:27 +04:00
|
|
|
def __stt(wav_file, server):
|
2022-01-14 16:58:50 +04:00
|
|
|
print(f'Connecting to \'{server}\'...')
|
2022-01-14 17:41:27 +04:00
|
|
|
response = requests.post(url=f'{server}/stt',
|
|
|
|
data=Speech.__load_wav(wav_file),
|
|
|
|
headers={'Content-Type': 'audio/wav'})
|
|
|
|
result = response.json()
|
|
|
|
|
|
|
|
if response.status_code != requests.codes.ok:
|
|
|
|
response.raise_for_status()
|
2022-01-14 16:58:50 +04:00
|
|
|
|
|
|
|
return result['text'] if not result['code'] else f'Server error: {result}'
|
|
|
|
|
2022-01-25 14:12:45 +04:00
|
|
|
def run_recognition(self, wav_file: str, server: str) -> str:
|
2022-01-14 16:58:50 +04:00
|
|
|
return self.__stt(wav_file, server)
|