From fa707ac30c7a66e4888012415590917f6e5cfa34 Mon Sep 17 00:00:00 2001 From: Roman Bazalevsky Date: Sat, 28 Nov 2020 10:50:27 +0300 Subject: [PATCH 1/4] =?utf8?q?=D0=93=D0=BE=D0=BB=D0=BE=D1=81=D0=BE=D0=B2?= =?utf8?q?=D0=BE=D0=B5=20=D1=83=D0=BF=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD?= =?utf8?q?=D0=B8=D0=B5:?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit - на сервере устанавливается vosk-api, голосовая модель и запускается vosk-server - на клиенте устанавливается WebRTC VAD и запускается vosk-client Из-за некоторых особенностей OpenHAB использует такой протокол: - при обнаружении ключевой фразы клиент принимает команду и через REST API передает ее исполнителю - исполнитель обрабатывает команду и по завершении отвечает клиенту на запрос по другому URL - клиент передает пустую команду, после этого ответ тоже начинает отдаваться пустым Каждый клиентский хост использует свой URL, соответствующий Item'у в OpenHAB. Сервер, распознающий речь - общий на локальную сеть. --- voicecontrol | 394 ++++++++++++++++++++++++++++++++++++++++++++ voicecontrol.ini | 21 +++ vosk-client.service | 13 ++ vosk-server | 83 ++++++++++ vosk-server.service | 12 ++ 5 files changed, 523 insertions(+) create mode 100755 voicecontrol create mode 100644 voicecontrol.ini create mode 100644 vosk-client.service create mode 100755 vosk-server create mode 100644 vosk-server.service diff --git a/voicecontrol b/voicecontrol new file mode 100755 index 0000000..fafda2c --- /dev/null +++ b/voicecontrol @@ -0,0 +1,394 @@ +#!/usr/bin/env python3.8 + +import websockets,asyncio +import sys +from pyaudio import PyAudio, Stream, paInt16 +from contextlib import asynccontextmanager, contextmanager, AsyncExitStack +from typing import AsyncGenerator, Generator + +from urllib.parse import urlencode, quote +import urllib3, base64, json + +import configparser +from os.path import expanduser +from streamp3 import MP3Decoder + +from time import time, sleep + +import webrtcvad + +@contextmanager +def _pyaudio() -> Generator[PyAudio, None, None]: + p = PyAudio() + try: + yield p + finally: + print('Terminating PyAudio object') + p.terminate() + +@contextmanager +def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]: + s = p.open(*args, **kwargs) + try: + yield s + finally: + print('Closing PyAudio Stream') + s.close() + +@asynccontextmanager +async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]: + try: + yield ws + finally: + print('Terminating connection') + await ws.send('{"eof" : 1}') + print(await ws.recv()) + +def SkipSource(source,seconds): + global config + try: + if config["debug"]: + print("Skipping: ", seconds) + bufs = int((seconds)*source._rate/source._frames_per_buffer) + for i in range(bufs): + buffer = source.read(source._frames_per_buffer) + except KeyboardInterrupt: + raise + except: + pass + +def PlayBack(pyaud, text, mic = None): + global config, last_time + + http = urllib3.PoolManager() + + playback_url = config["tts_url"] + playback_param = config["tts_param"] + + if playback_url and text: + + try: + + if playback_param: + url = playback_url.format(urlencode({playback_param:text})) + else: + url = playback_url+quote(text) + + req = http.request('GET', url, preload_content=False) + decoder = MP3Decoder(req) + + speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) + + for chunk in decoder: + speaker.write(chunk) + + sleep(0.1) + speaker.stop_stream() + speaker.close() + + elapsed = time() - last_time + last_time = time() + + if mic: + SkipSource(mic, elapsed + 0.5) + + return elapsed + + except KeyboardInterrupt: + raise + + except: + pass + + else: + return 0 + +def RunCommand(command, pyaud, mic = None): + + global config + + http = urllib3.PoolManager() + + command_url = config["command_url"] + reply_url = config["reply_url"] + command_user = config["api_user"] + command_pwd = config["api_pwd"] + api_attempts = config["api_attempts"] + + if command_url: + try: + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + my_headers['Content-Type']='text/plain' + my_headers['Accept']='apllication/json' + http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) + if reply_url: + sleep(0.5) + res="NULL" + for i in range(api_attempts): + try: + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + req=http.request('GET',reply_url,headers=my_headers).data + res = json.loads(req)['state'].strip() + if config["debug"]: + print(res) + if not(res == 'NULL'): + break + sleep(1) + except KeyboardInterrupt: + raise + except: + sleep(1) + if res and not(res=="NULL"): + PlayBack(pyaud, res, mic=mic) + elif res=="NULL": + PlayBack(pyaud, "Сервер не ответил", mic=mic) + http.request('POST',command_url, headers=my_headers, body="") + except KeyboardInterrupt: + raise + except: + try: + http.request('POST',command_url, headers=my_headers, body="") + except: + pass + +async def ListenPhrase(mic, server): + global config,last_time, vad + + frame = 30/1000 # 30 ms + pause = 2 + sz = int(mic._rate*frame) + sp = int(pause/frame) + + try: + + phrase = "" + voice = False + + while not phrase: + data = mic.read(sz) + if len(data) == 0: + break + vd = vad.is_speech(data, mic._rate) + if vd and not voice: + voice = True + if config["debug"]: + print("+", end="") + cnt = 0 + if voice and not vd: + cnt = cnt + 1 + if cnt > sp: + cnt = 0 + voice = False + if config["debug"]: + print("-") + if voice: + print("*",end="") + await server.send(data) + datatxt = await server.recv() + data = json.loads(datatxt) + try: + phrase = data["text"] + confidence = min(map(lambda x: x["conf"], data["result"])) + except: + pass + + last_time = time() + + return phrase, confidence + + except KeyboardInterrupt: + raise + except websockets.exceptions.ConnectionClosedError: + raise + except: + raise + return '',0 + + +async def hello(uri): + + global config, last_time + + keyphrase = config["keyphrase"] + confidence_treshold = config["confidence_treshold"] + rec_attempts = config["rec_attempts"] + commands = config["commands"] + + async with AsyncExitStack() as stack: + ws = await stack.enter_async_context(websockets.connect(uri)) + print('Type Ctrl-C to exit') + phrases = config["commands"] + phrases.append(config["keyphrase"]) + phrases = json.dumps(phrases, ensure_ascii=False) + await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') + + ws = await stack.enter_async_context(_polite_websocket(ws)) + p = stack.enter_context(_pyaudio()) + s = stack.enter_context(_pyaudio_open_stream(p, + format = paInt16, + channels = 1, + rate = 16000, + input = True, + frames_per_buffer = 2000)) + while True: + phrase, confidence = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase,confidence) + if phrase == keyphrase and confidence>=confidence_treshold : + PlayBack(p, "Я жду команду", mic=s) + command = "" + + for i in range(rec_attempts): + phrase, confidence = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase,confidence) + if confidence > confidence_treshold: + if (not commands) or (phrase in commands): + if config["debug"]: + print("Command: ", phrase) + command = phrase + RunCommand(command, p, s) + break + else: + PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + else: + PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) + + if not command: + PlayBack(p, "Так команду и не поняла...", mic=s) + + +def get_config(path): + + config = configparser.ConfigParser() + config.read(path) + + try: + keyphrase = config['vosk']['keyphrase'] + except: + print ("Обязательный параметр - ключевое слово - не задан!") + raise + + try: + rec_attempts = int(config['vosk']['attempts']) + except: + rec_attempts = 4 + + try: + confidence_treshold = float(config['vosk']['confidence_treshold']) + except: + confidence_treshold = 0.4 + + try: + vosk_server = config['vosk']['server'] + except: + print ("Обязательный параметр - сервер распознавания - не задан!") + raise + + try: + command_file=config['commands']['command_file'] + with open(command_file) as file: + commands = file.read().splitlines() + except: + commands = None + + try: + tts_url=config['rest']['tts_url'] + except: + tts_url = None + + try: + tts_param=config['rest']['tts_param'] + except: + tts_param = None + + try: + api_attempts=int(config['rest']['attempts']) + except: + api_attempts = 2 + + try: + api_user=config['rest']['api_user'] + api_pwd=config['rest']['api_pwd'] + except: + api_user = None + api_pwd = None + + try: + command_url=config['rest']['command_url'] + except: + command_url = None + + try: + reply_url=config['rest']['reply_url'] + except: + reply_url = None + + try: + vad_mode=config['vad']['agressive'] + except: + vad_mode = 3 + + try: + debug = (config['system']['debug'].lower() == "true") + except: + debug = False + + if command_file: + with open(command_file) as file: + commands = file.read().splitlines() + + return { + "asr_server": vosk_server, + "keyphrase": keyphrase, + "rec_attempts": rec_attempts, + "confidence_treshold": confidence_treshold, + "tts_url": tts_url, + "tts_param": tts_param, + "api_attempts": api_attempts, + "api_user": api_user, + "api_pwd": api_pwd, + "command_url": command_url, + "reply_url": reply_url, + "debug": debug, + "commands": commands, + "vad_mode": vad_mode + } + + +if len(sys.argv) == 2: + conf_file = sys.argv[1] +else: + conf_file = expanduser("~")+"/.config/voicecontrol.ini" + +config = get_config(conf_file) + +server = config['asr_server'] + +vad = webrtcvad.Vad(config['vad_mode']) + +while True: + + try: + + loop = asyncio.get_event_loop() + loop.run_until_complete( + hello(f'ws://' + server)) + + except (Exception, KeyboardInterrupt) as e: + loop.run_until_complete( + loop.shutdown_asyncgens()) + if isinstance(e, KeyboardInterrupt): + loop.stop() + print('Bye') + exit(0) + else: + print(f'Oops! {e}') + print('Restarting process...') + sleep(10) diff --git a/voicecontrol.ini b/voicecontrol.ini new file mode 100644 index 0000000..bceffb3 --- /dev/null +++ b/voicecontrol.ini @@ -0,0 +1,21 @@ +[vosk] +keyphrase = окей гестия +attempts = 3 +confidence_threshold = 0.6 +server = 192.168.1.100:2700 + +[commands] +command_file = /etc/vosk/commands.txt + +[rest] +attempts = 5 +tts_url = http://estia.rvb-home.lan/festival?{0} +tts_param = q + +api_user = openhabrest +api_pwd = somestrictpwd +command_url = https://openhab.rvb.name/rest/items/Command +reply_url = https://openhab.rvb.name/rest/items/Reply + +[system] +debug = True diff --git a/vosk-client.service b/vosk-client.service new file mode 100644 index 0000000..5bfb7b2 --- /dev/null +++ b/vosk-client.service @@ -0,0 +1,13 @@ +[Unit] +Description=Vosk ASR service client + +[Service] +Environment=PULSE_SERVER=localhost +Type=simple +ExecStartPre=/usr/bin/pactl set-source-volume alsa_input.usb-TP6920_TP6920_0621-00.analog-mono 135% +ExecStart=/usr/local/bin/voicecontrol /etc/vosk/voicecontrol.ini +Restart=always +RestartSec=30s + +[Install] +WantedBy=multi-user.target diff --git a/vosk-server b/vosk-server new file mode 100755 index 0000000..78f1210 --- /dev/null +++ b/vosk-server @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +import asyncio +import pathlib +import websockets +import concurrent.futures +import logging +from vosk import Model, KaldiRecognizer + +from pprint import pprint + +# Enable loging if needed +# +# logger = logging.getLogger('websockets') +# logger.setLevel(logging.INFO) +# logger.addHandler(logging.StreamHandler()) + +vosk_interface = os.environ.get('VOSK_SERVER_INTERFACE', '0.0.0.0') +vosk_port = int(os.environ.get('VOSK_SERVER_PORT', 2700)) +vosk_model_path = os.environ.get('VOSK_MODEL_PATH', 'model') +vosk_sample_rate = float(os.environ.get('VOSK_SAMPLE_RATE', 8000)) + +if len(sys.argv) > 1: + vosk_model_path = sys.argv[1] + +# Gpu part, uncomment if vosk-api has gpu support +# +# from vosk import GpuInit, GpuInstantiate +# GpuInit() +# def thread_init(): +# GpuInstantiate() +# pool = concurrent.futures.ThreadPoolExecutor(initializer=thread_init) + +model = Model(vosk_model_path) +pool = concurrent.futures.ThreadPoolExecutor((os.cpu_count() or 1)) +loop = asyncio.get_event_loop() + +def process_chunk(rec, message): + if message == '{"eof" : 1}': + return rec.FinalResult(), True + elif rec.AcceptWaveform(message): + return rec.Result(), False + else: + return rec.PartialResult(), False + +async def recognize(websocket, path): + + rec = None + phrase_list = None + sample_rate = vosk_sample_rate + + while True: + + message = await websocket.recv() + + # Load configuration if provided + if isinstance(message, str) and 'config' in message: + jobj = json.loads(message)['config'] + if 'phrase_list' in jobj: + phrase_list = jobj['phrase_list'] + if 'sample_rate' in jobj: + sample_rate = float(jobj['sample_rate']) + continue + + # Create the recognizer, word list is temporary disabled since not every model supports it + if not rec: + if phrase_list: + rec = KaldiRecognizer(model, sample_rate, json.dumps(phrase_list, ensure_ascii=False)) + else: + rec = KaldiRecognizer(model, sample_rate) + + response, stop = await loop.run_in_executor(pool, process_chunk, rec, message) + await websocket.send(response) + if stop: break + +start_server = websockets.serve( + recognize, vosk_interface, vosk_port) + +loop.run_until_complete(start_server) +loop.run_forever() diff --git a/vosk-server.service b/vosk-server.service new file mode 100644 index 0000000..a8957d2 --- /dev/null +++ b/vosk-server.service @@ -0,0 +1,12 @@ +[Unit] +Description=Vosk ASR service + +[Service] +Environment=VOSK_MODEL_PATH=/opt/vosk/vosk-model-ru-0.10-lgraph +Type=simple +ExecStart=/usr/local/bin/vosk-server +Restart=always +RestartSec=30s + +[Install] +WantedBy=multi-user.target -- 2.34.1 From 3edf2645cdd55eb3b6677e37d182fe4f905c92f7 Mon Sep 17 00:00:00 2001 From: Roman Bazalevskiy Date: Tue, 9 Nov 2021 09:29:34 +0300 Subject: [PATCH 2/4] Minor API fixes for OH3 --- voicecontrol | 107 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 43 deletions(-) diff --git a/voicecontrol b/voicecontrol index fafda2c..703e109 100755 --- a/voicecontrol +++ b/voicecontrol @@ -1,9 +1,9 @@ -#!/usr/bin/env python3.8 +#!/usr/bin/env python3 import websockets,asyncio import sys from pyaudio import PyAudio, Stream, paInt16 -from contextlib import asynccontextmanager, contextmanager, AsyncExitStack +from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack from typing import AsyncGenerator, Generator from urllib.parse import urlencode, quote @@ -57,6 +57,12 @@ def SkipSource(source,seconds): except: pass +def Silence(speaker, seconds): + buf = bytes(speaker._frames_per_buffer) + bufs = int((seconds)*speaker._rate/speaker._frames_per_buffer) + for i in range(bufs): + speaker.write(buf) + def PlayBack(pyaud, text, mic = None): global config, last_time @@ -78,6 +84,7 @@ def PlayBack(pyaud, text, mic = None): decoder = MP3Decoder(req) speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) + Silence(speaker, 0.3) for chunk in decoder: speaker.write(chunk) @@ -148,7 +155,14 @@ def RunCommand(command, pyaud, mic = None): PlayBack(pyaud, res, mic=mic) elif res=="NULL": PlayBack(pyaud, "Сервер не ответил", mic=mic) - http.request('POST',command_url, headers=my_headers, body="") + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + my_headers['Content-Type']='text/plain' + my_headers['Accept']='apllication/json' + command="" + http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8')) except KeyboardInterrupt: raise except: @@ -207,11 +221,10 @@ async def ListenPhrase(mic, server): except websockets.exceptions.ConnectionClosedError: raise except: - raise return '',0 -async def hello(uri): +async def main_loop(uri): global config, last_time @@ -220,49 +233,57 @@ async def hello(uri): rec_attempts = config["rec_attempts"] commands = config["commands"] - async with AsyncExitStack() as stack: - ws = await stack.enter_async_context(websockets.connect(uri)) - print('Type Ctrl-C to exit') - phrases = config["commands"] - phrases.append(config["keyphrase"]) - phrases = json.dumps(phrases, ensure_ascii=False) - await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') - - ws = await stack.enter_async_context(_polite_websocket(ws)) - p = stack.enter_context(_pyaudio()) - s = stack.enter_context(_pyaudio_open_stream(p, + + with ExitStack() as audio_stack: + p = audio_stack.enter_context(_pyaudio()) + s = audio_stack.enter_context(_pyaudio_open_stream(p, format = paInt16, channels = 1, rate = 16000, input = True, frames_per_buffer = 2000)) - while True: - phrase, confidence = await ListenPhrase(s, ws) - if config["debug"]: - print(phrase,confidence) - if phrase == keyphrase and confidence>=confidence_treshold : - PlayBack(p, "Я жду команду", mic=s) - command = "" - - for i in range(rec_attempts): - phrase, confidence = await ListenPhrase(s, ws) - if config["debug"]: - print(phrase,confidence) - if confidence > confidence_treshold: - if (not commands) or (phrase in commands): - if config["debug"]: - print("Command: ", phrase) - command = phrase - RunCommand(command, p, s) - break - else: - PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) - else: - PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) - - if not command: - PlayBack(p, "Так команду и не поняла...", mic=s) + while True: + try: + async with AsyncExitStack() as web_stack: + ws = await web_stack.enter_async_context(websockets.connect(uri)) + print('Type Ctrl-C to exit') + phrases = [] + config["commands"] + phrases.append(config["keyphrase"]) + phrases = json.dumps(phrases, ensure_ascii=False) + await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}') + + ws = await web_stack.enter_async_context(_polite_websocket(ws)) + while True: + phrase, confidence = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase,confidence) + if phrase == keyphrase and confidence>=confidence_treshold : + PlayBack(p, "Я жду команду", mic=s) + command = "" + + for i in range(rec_attempts): + phrase, confidence = await ListenPhrase(s, ws) + if config["debug"]: + print(phrase,confidence) + if confidence > confidence_treshold: + if (not commands) or (phrase in commands): + if config["debug"]: + print("Command: ", phrase) + command = phrase + RunCommand(command, p, s) + break + else: + PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + else: + PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) + + if not command: + PlayBack(p, "Так команду и не поняла...", mic=s) + except KeyboardInterrupt: + raise + except: + pass def get_config(path): @@ -379,7 +400,7 @@ while True: loop = asyncio.get_event_loop() loop.run_until_complete( - hello(f'ws://' + server)) + main_loop(f'ws://' + server)) except (Exception, KeyboardInterrupt) as e: loop.run_until_complete( -- 2.34.1 From 40d3028fd97291c60a966792ef2efdde114b8e71 Mon Sep 17 00:00:00 2001 From: Roman Bazalevskiy Date: Mon, 15 Nov 2021 10:52:46 +0300 Subject: [PATCH 3/4] =?utf8?q?=D0=9E=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?utf8?q?=D0=BA=D0=B0=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BE=D0=BA=20=D0=BE?= =?utf8?q?=D0=B1=D1=80=D1=8B=D0=B2=D0=B0=20=D1=81=D0=BE=D0=B5=D0=B4=D0=B8?= =?utf8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- voicecontrol | 90 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 35 deletions(-) diff --git a/voicecontrol b/voicecontrol index 703e109..7216fef 100755 --- a/voicecontrol +++ b/voicecontrol @@ -48,7 +48,7 @@ def SkipSource(source,seconds): global config try: if config["debug"]: - print("Skipping: ", seconds) + print("Skipping: ", seconds) bufs = int((seconds)*source._rate/source._frames_per_buffer) for i in range(bufs): buffer = source.read(source._frames_per_buffer) @@ -124,49 +124,68 @@ def RunCommand(command, pyaud, mic = None): if command_url: try: + if config["debug"]: + print('Preparing command') if command_user: my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) else: my_headers = urllib3.util.make_headers() my_headers['Content-Type']='text/plain' my_headers['Accept']='apllication/json' - http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) - if reply_url: - sleep(0.5) - res="NULL" - for i in range(api_attempts): - try: - if command_user: - my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) - else: - my_headers = urllib3.util.make_headers() - req=http.request('GET',reply_url,headers=my_headers).data - res = json.loads(req)['state'].strip() - if config["debug"]: - print(res) - if not(res == 'NULL'): - break - sleep(1) - except KeyboardInterrupt: - raise - except: - sleep(1) - if res and not(res=="NULL"): - PlayBack(pyaud, res, mic=mic) - elif res=="NULL": - PlayBack(pyaud, "Сервер не ответил", mic=mic) - if command_user: - my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + if config["debug"]: + print('Sending command') + sent = False + for i in range(api_attempts): + try: + http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8')) + sent = True + break + except Exception as e: + print('Exception: '+str(e)) + sleep(0.5) + if sent: + if config["debug"]: + print('Command sent') + if reply_url: + sleep(0.5) + res="NULL" + for i in range(api_attempts): + try: + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + req=http.request('GET',reply_url,headers=my_headers).data + res = json.loads(req)['state'].strip() + if config["debug"]: + print(res) + if not(res == 'NULL'): + break + sleep(1) + except KeyboardInterrupt: + raise + except Exception as e: + print('Exception: '+str(e)) + sleep(1) + if res and not(res=="NULL"): + PlayBack(pyaud, res, mic=mic) + elif res=="NULL": + PlayBack(pyaud, "Сервер не ответил", mic=mic) + if command_user: + my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd) + else: + my_headers = urllib3.util.make_headers() + my_headers['Content-Type']='text/plain' + my_headers['Accept']='apllication/json' + command="" + http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8')) else: - my_headers = urllib3.util.make_headers() - my_headers['Content-Type']='text/plain' - my_headers['Accept']='apllication/json' - command="" - http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8')) + PlayBack(pyaud, "Сервер недоступен", mic=mic) except KeyboardInterrupt: raise - except: + except Exception as e: try: + print('Exception: '+str(e)) http.request('POST',command_url, headers=my_headers, body="") except: pass @@ -282,7 +301,8 @@ async def main_loop(uri): PlayBack(p, "Так команду и не поняла...", mic=s) except KeyboardInterrupt: raise - except: + except Exception as e: + print('Exception: '+str(e)) pass def get_config(path): -- 2.34.1 From 44bc308cdcb0da8d84fccaada79c82ee853de32f Mon Sep 17 00:00:00 2001 From: Roman Bazalevskiy Date: Fri, 3 Nov 2023 08:56:00 +0300 Subject: [PATCH 4/4] =?utf8?q?=D0=9F=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?utf8?q?=D0=B0=D0=BD=D0=BE=20=D0=BF=D0=BE=D0=B4=20=D1=81=D0=B2=D0=B5?= =?utf8?q?=D0=B6=D0=B8=D0=B9=20Python=20=D0=B8=20Vosk?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- voicecontrol | 125 ++++++++++++++++++++++----------------------------- 1 file changed, 53 insertions(+), 72 deletions(-) diff --git a/voicecontrol b/voicecontrol index 7216fef..6b4f17b 100755 --- a/voicecontrol +++ b/voicecontrol @@ -17,6 +17,8 @@ from time import time, sleep import webrtcvad +from pprint import pprint + @contextmanager def _pyaudio() -> Generator[PyAudio, None, None]: p = PyAudio() @@ -57,12 +59,6 @@ def SkipSource(source,seconds): except: pass -def Silence(speaker, seconds): - buf = bytes(speaker._frames_per_buffer) - bufs = int((seconds)*speaker._rate/speaker._frames_per_buffer) - for i in range(bufs): - speaker.write(buf) - def PlayBack(pyaud, text, mic = None): global config, last_time @@ -84,7 +80,7 @@ def PlayBack(pyaud, text, mic = None): decoder = MP3Decoder(req) speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate) - Silence(speaker, 0.3) + pprint(speaker) for chunk in decoder: speaker.write(chunk) @@ -97,7 +93,7 @@ def PlayBack(pyaud, text, mic = None): last_time = time() if mic: - SkipSource(mic, elapsed + 0.5) + SkipSource(mic, elapsed + 0.2) return elapsed @@ -105,7 +101,7 @@ def PlayBack(pyaud, text, mic = None): raise except: - pass + raise else: return 0 @@ -198,49 +194,38 @@ async def ListenPhrase(mic, server): sz = int(mic._rate*frame) sp = int(pause/frame) - try: - - phrase = "" - voice = False + phrase = "" + voice = False - while not phrase: - data = mic.read(sz) - if len(data) == 0: - break - vd = vad.is_speech(data, mic._rate) - if vd and not voice: - voice = True - if config["debug"]: - print("+", end="") + while not phrase: + data = mic.read(sz) + if len(data) == 0: + break + vd = vad.is_speech(data, mic._rate) + if vd and not voice: + voice = True + if config["debug"]: + print("+", end="") + cnt = 0 + if voice and not vd: + cnt = cnt + 1 + if cnt > sp: cnt = 0 - if voice and not vd: - cnt = cnt + 1 - if cnt > sp: - cnt = 0 - voice = False - if config["debug"]: - print("-") - if voice: - print("*",end="") - await server.send(data) - datatxt = await server.recv() - data = json.loads(datatxt) - try: - phrase = data["text"] - confidence = min(map(lambda x: x["conf"], data["result"])) - except: - pass + voice = False + if config["debug"]: + print("-") + if voice: + print("*",end="") + await server.send(data) + datatxt = await server.recv() + data = json.loads(datatxt) + try: + phrase = data["text"] + except: + pass - last_time = time() - - return phrase, confidence - - except KeyboardInterrupt: - raise - except websockets.exceptions.ConnectionClosedError: - raise - except: - return '',0 + last_time = time() + return phrase async def main_loop(uri): @@ -248,13 +233,13 @@ async def main_loop(uri): global config, last_time keyphrase = config["keyphrase"] - confidence_treshold = config["confidence_treshold"] rec_attempts = config["rec_attempts"] commands = config["commands"] with ExitStack() as audio_stack: p = audio_stack.enter_context(_pyaudio()) + s = audio_stack.enter_context(_pyaudio_open_stream(p, format = paInt16, channels = 1, @@ -274,28 +259,28 @@ async def main_loop(uri): ws = await web_stack.enter_async_context(_polite_websocket(ws)) while True: - phrase, confidence = await ListenPhrase(s, ws) + phrase = await ListenPhrase(s, ws) if config["debug"]: - print(phrase,confidence) - if phrase == keyphrase and confidence>=confidence_treshold : - PlayBack(p, "Я жду команду", mic=s) + print(phrase) + if phrase == keyphrase : + print("COMMAND!") + PlayBack(p, "Слушаю!", mic=s) command = "" for i in range(rec_attempts): - phrase, confidence = await ListenPhrase(s, ws) + phrase = await ListenPhrase(s, ws) if config["debug"]: - print(phrase,confidence) - if confidence > confidence_treshold: - if (not commands) or (phrase in commands): - if config["debug"]: - print("Command: ", phrase) - command = phrase - RunCommand(command, p, s) - break - else: - PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + print(phrase) + if (not commands) or (phrase in commands): + if config["debug"]: + print("Command: ", phrase) + command = phrase + RunCommand(command, p, s) + break else: - PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) + PlayBack(p, "Не знаю такой команды: "+phrase, mic=s) + else: + PlayBack(p, "Не поняла, слишком неразборчиво", mic=s) if not command: PlayBack(p, "Так команду и не поняла...", mic=s) @@ -321,11 +306,6 @@ def get_config(path): except: rec_attempts = 4 - try: - confidence_treshold = float(config['vosk']['confidence_treshold']) - except: - confidence_treshold = 0.4 - try: vosk_server = config['vosk']['server'] except: @@ -389,7 +369,6 @@ def get_config(path): "asr_server": vosk_server, "keyphrase": keyphrase, "rec_attempts": rec_attempts, - "confidence_treshold": confidence_treshold, "tts_url": tts_url, "tts_param": tts_param, "api_attempts": api_attempts, @@ -413,6 +392,7 @@ config = get_config(conf_file) server = config['asr_server'] vad = webrtcvad.Vad(config['vad_mode']) +last_time = time() while True: @@ -423,6 +403,7 @@ while True: main_loop(f'ws://' + server)) except (Exception, KeyboardInterrupt) as e: + raise loop.run_until_complete( loop.shutdown_asyncgens()) if isinstance(e, KeyboardInterrupt): -- 2.34.1