#!/usr/bin/env python3 import json import struct import sys import wave from io import BytesIO from urllib.request import Request, urlopen from scipy.io import wavfile from scipy.io.wavfile import read as read_wav def stt(wav_file: str, url: str) -> str: print('Connecting to \'{}\'...'.format(url)) request = Request('{}/stt'.format(url), data=_load_wav(wav_file), headers={'Content-Type': 'audio/wav'}) result = json.loads(urlopen(request).read().decode('utf-8')) if not ('code' in result and 'text' in result): raise RuntimeError('Wrong reply from server: {}'.format(result)) return result['text'] if not result['code'] else 'Server error: [{code}]: {text}'.format(**result) def _load_wav(wav_file, convert_rate=16000, convert_width=2, channels=1): _check_wav(wav_file) with wave.open(wav_file, 'rb') as in_: src_data = in_.readframes(in_.getnframes()) with BytesIO() as file: with wave.open(file, 'wb') as out: out.setframerate(convert_rate) out.setsampwidth(convert_width) out.setnchannels(channels) out.writeframes(src_data) result = file.getvalue() return result def _check_wav(wav_file): sample_rate, sig = wavfile.read(wav_file) channels = len(sig.shape) bits = sig.dtype.base.name if sample_rate != 16000: raise Exception(f'Sample rate is not 16000: {sample_rate}') if channels != 1: raise Exception(f'Number of Channels is not 1 (Not mono): {channels}') if bits != 'int16': raise Exception(f'Bits per sample 16: {bits}') def _main(): if len(sys.argv) < 2: print('Usage: {} FILE [URL]'.format(sys.argv[0])) exit(1) file = sys.argv[1] server = '' if len(sys.argv) < 3 else sys.argv[2] print('Result: {}'.format(stt(file, server))) if __name__ == '__main__': _main()