From 44bc308cdcb0da8d84fccaada79c82ee853de32f Mon Sep 17 00:00:00 2001 From: Roman Bazalevskiy Date: Fri, 3 Nov 2023 08:56:00 +0300 Subject: [PATCH] =?utf8?q?=D0=9F=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB=D0=B0?= =?utf8?q?=D0=BD=D0=BE=20=D0=BF=D0=BE=D0=B4=20=D1=81=D0=B2=D0=B5=D0=B6?= =?utf8?q?=D0=B8=D0=B9=20Python=20=D0=B8=20Vosk?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- voicecontrol | 125 ++++++++++++++++++++++----------------------------- 1 file changed, 53 insertions(+), 72 deletions(-) diff --git a/voicecontrol b/voicecontrol index 7216fef..6b4f17b 100755 --- a/voicecontrol +++ b/voicecontrol @@ -17,6 +17,8 @@ from time import time, sleep import webrtcvad +from pprint import pprint + @contextmanager def _pyaudio() -> Generator[PyAudio, None, None]: p = PyAudio() @@ -57,12 +59,6 @@ def SkipSource(source,seconds): except: pass -def Silence(speaker, seconds): - buf = bytes(speaker._frames_per_buffer) - bufs = int((seconds)*speaker._rate/speaker._frames_per_buffer) - for i in range(bufs): - speaker.write(buf) - def PlayBack(pyaud, text, mic = None): global config, last_time @@ -84,7 +80,7 @@ def PlayBack(pyaud, text, mic = None): decoder = MP3Decoder(req) speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) - Silence(speaker, 0.3) + pprint(speaker) for chunk in decoder: speaker.write(chunk) @@ -97,7 +93,7 @@ def PlayBack(pyaud, text, mic = None): last_time = time() if mic: - SkipSource(mic, elapsed + 0.5) + SkipSource(mic, elapsed + 0.2) return elapsed @@ -105,7 +101,7 @@ def PlayBack(pyaud, text, mic = None): raise except: - pass + raise else: return 0 @@ -198,49 +194,38 @@ async def ListenPhrase(mic, server): sz = int(mic._rate*frame) sp = int(pause/frame) - try: - - phrase = "" - voice = False + phrase = "" + voice = False - while not phrase: - data = mic.read(sz) - if len(data) == 0: - break - vd = vad.is_speech(data, mic._rate) - if vd and not voice: - voice = True - if config["debug"]: - print("+", end="") + while not phrase: + data = mic.read(sz) + if len(data) == 0: + break + vd = vad.is_speech(data, mic._rate) + if vd and not voice: + voice = True + if config["debug"]: + print("+", end="") + cnt = 0 + if voice and not vd: + cnt = cnt + 1 + if cnt > sp: cnt = 0 - if voice and not vd: - cnt = cnt + 1 - if cnt > sp: - cnt = 0 - voice = False - if config["debug"]: - print("-") - if voice: - print("*",end="") - await server.send(data) - datatxt = await server.recv() - data = json.loads(datatxt) - try: - phrase = data["text"] - confidence = min(map(lambda x: x["conf"], data["result"])) - except: - pass + voice = False + if config["debug"]: + print("-") + if voice: + print("*",end="") + await server.send(data) + datatxt = await server.recv() + data = json.loads(datatxt) + try: + phrase = data["text"] + except: + pass - last_time = time() - - return phrase, confidence - - except KeyboardInterrupt: - raise - except websockets.exceptions.ConnectionClosedError: - raise - except: - return '',0 + last_time = time() + return phrase async def main_loop(uri): @@ -248,13 +233,13 @@ async def main_loop(uri): global config, last_time keyphrase = config["keyphrase"] - confidence_treshold = config["confidence_treshold"] rec_attempts = config["rec_attempts"] commands = config["commands"] with ExitStack() as audio_stack: p = audio_stack.enter_context(_pyaudio()) + s = audio_stack.enter_context(_pyaudio_open_stream(p, format = paInt16, channels = 1, @@ -274,28 +259,28 @@ async def main_loop(uri): ws = await web_stack.enter_async_context(_polite_websocket(ws)) while True: - phrase, confidence = await ListenPhrase(s, ws) + phrase = await ListenPhrase(s, ws) if config["debug"]: - print(phrase,confidence) - if phrase == keyphrase and confidence>=confidence_treshold : - PlayBack(p, "Я жду команду", mic=s) + print(phrase) + if phrase == keyphrase : + print("COMMAND!") + PlayBack(p, "Слушаю!", mic=s) command = "" for i in range(rec_attempts): - phrase, confidence = await ListenPhrase(s, ws) + phrase = await ListenPhrase(s, ws) if config["debug"]: - print(phrase,confidence) - if confidence > confidence_treshold: - if (not commands) or (phrase in commands): - if config["debug"]: - print("Command: ", phrase) - command = phrase - RunCommand(command, p, s) - break - else: - PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + print(phrase) + if (not commands) or (phrase in commands): + if config["debug"]: + print("Command: ", phrase) + command = phrase + RunCommand(command, p, s) + break else: - PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) + PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + else: + PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) if not command: PlayBack(p, "Так команду и не поняла...", mic=s) @@ -321,11 +306,6 @@ def get_config(path): except: rec_attempts = 4 - try: - confidence_treshold = float(config['vosk']['confidence_treshold']) - except: - confidence_treshold = 0.4 - try: vosk_server = config['vosk']['server'] except: @@ -389,7 +369,6 @@ def get_config(path): "asr_server": vosk_server, "keyphrase": keyphrase, "rec_attempts": rec_attempts, - "confidence_treshold": confidence_treshold, "tts_url": tts_url, "tts_param": tts_param, "api_attempts": api_attempts, @@ -413,6 +392,7 @@ config = get_config(conf_file) server = config['asr_server'] vad = webrtcvad.Vad(config['vad_mode']) +last_time = time() while True: @@ -423,6 +403,7 @@ while True: main_loop(f'ws://' + server)) except (Exception, KeyboardInterrupt) as e: + raise loop.run_until_complete( loop.shutdown_asyncgens()) if isinstance(e, KeyboardInterrupt): -- 2.34.1