X-Git-Url: https://git.rvb.name/voicecontrol.git/blobdiff_plain/fa707ac30c7a66e4888012415590917f6e5cfa34..HEAD:/voicecontrol diff --git a/voicecontrol b/voicecontrol index fafda2c..6b4f17b 100755 --- a/voicecontrol +++ b/voicecontrol @@ -1,9 +1,9 @@ -#!/usr/bin/env python3.8 +#!/usr/bin/env python3 import websockets,asyncio import sys from pyaudio import PyAudio, Stream, paInt16 -from contextlib import asynccontextmanager, contextmanager, AsyncExitStack +from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack from typing import AsyncGenerator, Generator from urllib.parse import urlencode, quote @@ -17,6 +17,8 @@ from time import time, sleep import webrtcvad +from pprint import pprint + @contextmanager def _pyaudio() -> Generator[PyAudio, None, None]: p = PyAudio() @@ -48,7 +50,7 @@ def SkipSource(source,seconds): global config try: if config["debug"]: - print("Skipping: ", seconds) + print("Skipping: ", seconds) bufs = int((seconds)*source._rate/source._frames_per_buffer) for i in range(bufs): buffer = source.read(source._frames_per_buffer) @@ -78,6 +80,7 @@ def PlayBack(pyaud, text, mic = None): decoder = MP3Decoder(req) speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) + pprint(speaker) for chunk in decoder: speaker.write(chunk) @@ -90,7 +93,7 @@ def PlayBack(pyaud, text, mic = None): last_time = time() if mic: - SkipSource(mic, elapsed + 0.5) + SkipSource(mic, elapsed + 0.2) return elapsed @@ -98,7 +101,7 @@ def PlayBack(pyaud, text, mic = None): raise except: - pass + raise else: return 0 @@ -117,42 +120,68 @@ def RunCommand(command, pyaud, mic = None): if command_url: try: + if config["debug"]: + print('Preparing command') if command_user: my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) else: my_headers = urllib3.util.make_headers() my_headers['Content-Type']='text/plain' my_headers['Accept']='apllication/json' - http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) - if reply_url: - sleep(0.5) - res="NULL" - for i in range(api_attempts): - try: - if command_user: - my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) - else: - my_headers = urllib3.util.make_headers() - req=http.request('GET',reply_url,headers=my_headers).data - res = json.loads(req)['state'].strip() - if config["debug"]: - print(res) - if not(res == 'NULL'): - break - sleep(1) - except KeyboardInterrupt: - raise - except: - sleep(1) - if res and not(res=="NULL"): - PlayBack(pyaud, res, mic=mic) - elif res=="NULL": - PlayBack(pyaud, "Сервер не ответил", mic=mic) - http.request('POST',command_url, headers=my_headers, body="") + if config["debug"]: + print('Sending command') + sent = False + for i in range(api_attempts): + try: + http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) + sent = True + break + except Exception as e: + print('Exception: '+str(e)) + sleep(0.5) + if sent: + if config["debug"]: + print('Command sent') + if reply_url: + sleep(0.5) + res="NULL" + for i in range(api_attempts): + try: + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + req=http.request('GET',reply_url,headers=my_headers).data + res = json.loads(req)['state'].strip() + if config["debug"]: + print(res) + if not(res == 'NULL'): + break + sleep(1) + except KeyboardInterrupt: + raise + except Exception as e: + print('Exception: '+str(e)) + sleep(1) + if res and not(res=="NULL"): + PlayBack(pyaud, res, mic=mic) + elif res=="NULL": + PlayBack(pyaud, "Сервер не ответил", mic=mic) + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + my_headers['Content-Type']='text/plain' + my_headers['Accept']='apllication/json' + command="" + http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8')) + else: + PlayBack(pyaud, "Сервер недоступен", mic=mic) except KeyboardInterrupt: raise - except: + except Exception as e: try: + print('Exception: '+str(e)) http.request('POST',command_url, headers=my_headers, body="") except: pass @@ -165,104 +194,101 @@ async def ListenPhrase(mic, server): sz = int(mic._rate*frame) sp = int(pause/frame) - try: - - phrase = "" - voice = False + phrase = "" + voice = False - while not phrase: - data = mic.read(sz) - if len(data) == 0: - break - vd = vad.is_speech(data, mic._rate) - if vd and not voice: - voice = True - if config["debug"]: - print("+", end="") + while not phrase: + data = mic.read(sz) + if len(data) == 0: + break + vd = vad.is_speech(data, mic._rate) + if vd and not voice: + voice = True + if config["debug"]: + print("+", end="") + cnt = 0 + if voice and not vd: + cnt = cnt + 1 + if cnt > sp: cnt = 0 - if voice and not vd: - cnt = cnt + 1 - if cnt > sp: - cnt = 0 - voice = False - if config["debug"]: - print("-") - if voice: - print("*",end="") - await server.send(data) - datatxt = await server.recv() - data = json.loads(datatxt) - try: - phrase = data["text"] - confidence = min(map(lambda x: x["conf"], data["result"])) - except: - pass + voice = False + if config["debug"]: + print("-") + if voice: + print("*",end="") + await server.send(data) + datatxt = await server.recv() + data = json.loads(datatxt) + try: + phrase = data["text"] + except: + pass - last_time = time() - - return phrase, confidence - - except KeyboardInterrupt: - raise - except websockets.exceptions.ConnectionClosedError: - raise - except: - raise - return '',0 + last_time = time() + return phrase -async def hello(uri): +async def main_loop(uri): global config, last_time keyphrase = config["keyphrase"] - confidence_treshold = config["confidence_treshold"] rec_attempts = config["rec_attempts"] commands = config["commands"] - async with AsyncExitStack() as stack: - ws = await stack.enter_async_context(websockets.connect(uri)) - print('Type Ctrl-C to exit') - phrases = config["commands"] - phrases.append(config["keyphrase"]) - phrases = json.dumps(phrases, ensure_ascii=False) - await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') - - ws = await stack.enter_async_context(_polite_websocket(ws)) - p = stack.enter_context(_pyaudio()) - s = stack.enter_context(_pyaudio_open_stream(p, + + with ExitStack() as audio_stack: + p = audio_stack.enter_context(_pyaudio()) + + s = audio_stack.enter_context(_pyaudio_open_stream(p, format = paInt16, channels = 1, rate = 16000, input = True, frames_per_buffer = 2000)) - while True: - phrase, confidence = await ListenPhrase(s, ws) - if config["debug"]: - print(phrase,confidence) - if phrase == keyphrase and confidence>=confidence_treshold : - PlayBack(p, "Я жду команду", mic=s) - command = "" - - for i in range(rec_attempts): - phrase, confidence = await ListenPhrase(s, ws) - if config["debug"]: - print(phrase,confidence) - if confidence > confidence_treshold: - if (not commands) or (phrase in commands): - if config["debug"]: - print("Command: ", phrase) - command = phrase - RunCommand(command, p, s) - break - else: - PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) - else: - PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) - - if not command: - PlayBack(p, "Так команду и не поняла...", mic=s) + while True: + try: + async with AsyncExitStack() as web_stack: + ws = await web_stack.enter_async_context(websockets.connect(uri)) + print('Type Ctrl-C to exit') + phrases = [] + config["commands"] + phrases.append(config["keyphrase"]) + phrases = json.dumps(phrases, ensure_ascii=False) + await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') + + ws = await web_stack.enter_async_context(_polite_websocket(ws)) + while True: + phrase = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase) + if phrase == keyphrase : + print("COMMAND!") + PlayBack(p, "Слушаю!", mic=s) + command = "" + + for i in range(rec_attempts): + phrase = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase) + if (not commands) or (phrase in commands): + if config["debug"]: + print("Command: ", phrase) + command = phrase + RunCommand(command, p, s) + break + else: + PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + else: + PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) + + if not command: + PlayBack(p, "Так команду и не поняла...", mic=s) + except KeyboardInterrupt: + raise + except Exception as e: + print('Exception: '+str(e)) + pass def get_config(path): @@ -280,11 +306,6 @@ def get_config(path): except: rec_attempts = 4 - try: - confidence_treshold = float(config['vosk']['confidence_treshold']) - except: - confidence_treshold = 0.4 - try: vosk_server = config['vosk']['server'] except: @@ -348,7 +369,6 @@ def get_config(path): "asr_server": vosk_server, "keyphrase": keyphrase, "rec_attempts": rec_attempts, - "confidence_treshold": confidence_treshold, "tts_url": tts_url, "tts_param": tts_param, "api_attempts": api_attempts, @@ -372,6 +392,7 @@ config = get_config(conf_file) server = config['asr_server'] vad = webrtcvad.Vad(config['vad_mode']) +last_time = time() while True: @@ -379,9 +400,10 @@ while True: loop = asyncio.get_event_loop() loop.run_until_complete( - hello(f'ws://' + server)) + main_loop(f'ws://' + server)) except (Exception, KeyboardInterrupt) as e: + raise loop.run_until_complete( loop.shutdown_asyncgens()) if isinstance(e, KeyboardInterrupt):