#!/usr/bin/env python3.8 import websockets,asyncio import sys from pyaudio import PyAudio, Stream, paInt16 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack from typing import AsyncGenerator, Generator from urllib.parse import urlencode, quote import urllib3, base64, json import configparser from os.path import expanduser from streamp3 import MP3Decoder from time import time, sleep import webrtcvad @contextmanager def _pyaudio() -> Generator[PyAudio, None, None]: p = PyAudio() try: yield p finally: print('Terminating PyAudio object') p.terminate() @contextmanager def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]: s = p.open(*args, **kwargs) try: yield s finally: print('Closing PyAudio Stream') s.close() @asynccontextmanager async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]: try: yield ws finally: print('Terminating connection') await ws.send('{"eof" : 1}') print(await ws.recv()) def SkipSource(source,seconds): global config try: if config["debug"]: print("Skipping: ", seconds) bufs = int((seconds)*source._rate/source._frames_per_buffer) for i in range(bufs): buffer = source.read(source._frames_per_buffer) except KeyboardInterrupt: raise except: pass def PlayBack(pyaud, text, mic = None): global config, last_time http = urllib3.PoolManager() playback_url = config["tts_url"] playback_param = config["tts_param"] if playback_url and text: try: if playback_param: url = playback_url.format(urlencode({playback_param:text})) else: url = playback_url+quote(text) req = http.request('GET', url, preload_content=False) decoder = MP3Decoder(req) speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) for chunk in decoder: speaker.write(chunk) sleep(0.1) speaker.stop_stream() speaker.close() elapsed = time() - last_time last_time = time() if mic: SkipSource(mic, elapsed + 0.5) return elapsed except KeyboardInterrupt: raise except: pass else: return 0 def RunCommand(command, pyaud, mic = None): global config http = urllib3.PoolManager() command_url = config["command_url"] reply_url = config["reply_url"] command_user = config["api_user"] command_pwd = config["api_pwd"] api_attempts = config["api_attempts"] if command_url: try: if command_user: my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) else: my_headers = urllib3.util.make_headers() my_headers['Content-Type']='text/plain' my_headers['Accept']='apllication/json' http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) if reply_url: sleep(0.5) res="NULL" for i in range(api_attempts): try: if command_user: my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) else: my_headers = urllib3.util.make_headers() req=http.request('GET',reply_url,headers=my_headers).data res = json.loads(req)['state'].strip() if config["debug"]: print(res) if not(res == 'NULL'): break sleep(1) except KeyboardInterrupt: raise except: sleep(1) if res and not(res=="NULL"): PlayBack(pyaud, res, mic=mic) elif res=="NULL": PlayBack(pyaud, "Сервер не ответил", mic=mic) http.request('POST',command_url, headers=my_headers, body="") except KeyboardInterrupt: raise except: try: http.request('POST',command_url, headers=my_headers, body="") except: pass async def ListenPhrase(mic, server): global config,last_time, vad frame = 30/1000 # 30 ms pause = 2 sz = int(mic._rate*frame) sp = int(pause/frame) try: phrase = "" voice = False while not phrase: data = mic.read(sz) if len(data) == 0: break vd = vad.is_speech(data, mic._rate) if vd and not voice: voice = True if config["debug"]: print("+", end="") cnt = 0 if voice and not vd: cnt = cnt + 1 if cnt > sp: cnt = 0 voice = False if config["debug"]: print("-") if voice: print("*",end="") await server.send(data) datatxt = await server.recv() data = json.loads(datatxt) try: phrase = data["text"] confidence = min(map(lambda x: x["conf"], data["result"])) except: pass last_time = time() return phrase, confidence except KeyboardInterrupt: raise except websockets.exceptions.ConnectionClosedError: raise except: raise return '',0 async def hello(uri): global config, last_time keyphrase = config["keyphrase"] confidence_treshold = config["confidence_treshold"] rec_attempts = config["rec_attempts"] commands = config["commands"] async with AsyncExitStack() as stack: ws = await stack.enter_async_context(websockets.connect(uri)) print('Type Ctrl-C to exit') phrases = config["commands"] phrases.append(config["keyphrase"]) phrases = json.dumps(phrases, ensure_ascii=False) await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') ws = await stack.enter_async_context(_polite_websocket(ws)) p = stack.enter_context(_pyaudio()) s = stack.enter_context(_pyaudio_open_stream(p, format = paInt16, channels = 1, rate = 16000, input = True, frames_per_buffer = 2000)) while True: phrase, confidence = await ListenPhrase(s, ws) if config["debug"]: print(phrase,confidence) if phrase == keyphrase and confidence>=confidence_treshold : PlayBack(p, "Я жду команду", mic=s) command = "" for i in range(rec_attempts): phrase, confidence = await ListenPhrase(s, ws) if config["debug"]: print(phrase,confidence) if confidence > confidence_treshold: if (not commands) or (phrase in commands): if config["debug"]: print("Command: ", phrase) command = phrase RunCommand(command, p, s) break else: PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) else: PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) if not command: PlayBack(p, "Так команду и не поняла...", mic=s) def get_config(path): config = configparser.ConfigParser() config.read(path) try: keyphrase = config['vosk']['keyphrase'] except: print ("Обязательный параметр - ключевое слово - не задан!") raise try: rec_attempts = int(config['vosk']['attempts']) except: rec_attempts = 4 try: confidence_treshold = float(config['vosk']['confidence_treshold']) except: confidence_treshold = 0.4 try: vosk_server = config['vosk']['server'] except: print ("Обязательный параметр - сервер распознавания - не задан!") raise try: command_file=config['commands']['command_file'] with open(command_file) as file: commands = file.read().splitlines() except: commands = None try: tts_url=config['rest']['tts_url'] except: tts_url = None try: tts_param=config['rest']['tts_param'] except: tts_param = None try: api_attempts=int(config['rest']['attempts']) except: api_attempts = 2 try: api_user=config['rest']['api_user'] api_pwd=config['rest']['api_pwd'] except: api_user = None api_pwd = None try: command_url=config['rest']['command_url'] except: command_url = None try: reply_url=config['rest']['reply_url'] except: reply_url = None try: vad_mode=config['vad']['agressive'] except: vad_mode = 3 try: debug = (config['system']['debug'].lower() == "true") except: debug = False if command_file: with open(command_file) as file: commands = file.read().splitlines() return { "asr_server": vosk_server, "keyphrase": keyphrase, "rec_attempts": rec_attempts, "confidence_treshold": confidence_treshold, "tts_url": tts_url, "tts_param": tts_param, "api_attempts": api_attempts, "api_user": api_user, "api_pwd": api_pwd, "command_url": command_url, "reply_url": reply_url, "debug": debug, "commands": commands, "vad_mode": vad_mode } if len(sys.argv) == 2: conf_file = sys.argv[1] else: conf_file = expanduser("~")+"/.config/voicecontrol.ini" config = get_config(conf_file) server = config['asr_server'] vad = webrtcvad.Vad(config['vad_mode']) while True: try: loop = asyncio.get_event_loop() loop.run_until_complete( hello(f'ws://' + server)) except (Exception, KeyboardInterrupt) as e: loop.run_until_complete( loop.shutdown_asyncgens()) if isinstance(e, KeyboardInterrupt): loop.stop() print('Bye') exit(0) else: print(f'Oops! {e}') print('Restarting process...') sleep(10)