+#!/usr/bin/env python3.8
+import websockets,asyncio
+import sys
+from pyaudio import PyAudio, Stream, paInt16
+from contextlib import asynccontextmanager, contextmanager, AsyncExitStack
+from typing import AsyncGenerator, Generator
+from urllib.parse import urlencode, quote
+import urllib3, base64, json
+import configparser
+from os.path import expanduser
+from streamp3 import MP3Decoder
+from time import time, sleep
+import webrtcvad
+def _pyaudio() -> Generator[PyAudio, None, None]:
+ p = PyAudio()
+ try:
+ yield p
+ finally:
+ print('Terminating PyAudio object')
+ p.terminate()
+def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
+ s = p.open(*args, **kwargs)
+ try:
+ yield s
+ finally:
+ print('Closing PyAudio Stream')
+ s.close()
+async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
+ try:
+ yield ws
+ finally:
+ print('Terminating connection')
+ await ws.send('{"eof" : 1}')
+ print(await ws.recv())
+def SkipSource(source,seconds):
+ global config
+ try:
+ if config["debug"]:
+ print("Skipping: ", seconds)
+ bufs = int((seconds)*source._rate/source._frames_per_buffer)
+ for i in range(bufs):
+ buffer = source.read(source._frames_per_buffer)
+ except KeyboardInterrupt:
+ raise
+ except:
+ pass
+def PlayBack(pyaud, text, mic = None):
+ global config, last_time
+ http = urllib3.PoolManager()
+ playback_url = config["tts_url"]
+ playback_param = config["tts_param"]
+ if playback_url and text:
+ try:
+ if playback_param:
+ url = playback_url.format(urlencode({playback_param:text}))
+ else:
+ url = playback_url+quote(text)
+ req = http.request('GET', url, preload_content=False)
+ decoder = MP3Decoder(req)
+ speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
+ for chunk in decoder:
+ speaker.write(chunk)
+ sleep(0.1)
+ speaker.stop_stream()
+ speaker.close()
+ elapsed = time() - last_time
+ last_time = time()
+ if mic:
+ SkipSource(mic, elapsed + 0.5)
+ return elapsed
+ except KeyboardInterrupt:
+ raise
+ except:
+ pass
+ else:
+ return 0
+def RunCommand(command, pyaud, mic = None):
+ global config
+ http = urllib3.PoolManager()
+ command_url = config["command_url"]
+ reply_url = config["reply_url"]
+ command_user = config["api_user"]
+ command_pwd = config["api_pwd"]
+ api_attempts = config["api_attempts"]
+ if command_url:
+ try:
+ if command_user:
+ my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
+ else:
+ my_headers = urllib3.util.make_headers()
+ my_headers['Content-Type']='text/plain'
+ my_headers['Accept']='apllication/json'
+ http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
+ if reply_url:
+ sleep(0.5)
+ res="NULL"
+ for i in range(api_attempts):
+ try:
+ if command_user:
+ my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
+ else:
+ my_headers = urllib3.util.make_headers()
+ req=http.request('GET',reply_url,headers=my_headers).data
+ res = json.loads(req)['state'].strip()
+ if config["debug"]:
+ print(res)
+ if not(res == 'NULL'):
+ break
+ sleep(1)
+ except KeyboardInterrupt:
+ raise
+ except:
+ sleep(1)
+ if res and not(res=="NULL"):
+ PlayBack(pyaud, res, mic=mic)
+ elif res=="NULL":
+ PlayBack(pyaud, "Сервер не ответил", mic=mic)
+ http.request('POST',command_url, headers=my_headers, body="")
+ except KeyboardInterrupt:
+ raise
+ except:
+ try:
+ http.request('POST',command_url, headers=my_headers, body="")
+ except:
+ pass
+async def ListenPhrase(mic, server):
+ global config,last_time, vad
+ frame = 30/1000 # 30 ms
+ pause = 2
+ sz = int(mic._rate*frame)
+ sp = int(pause/frame)
+ try:
+ phrase = ""
+ voice = False
+ while not phrase:
+ data = mic.read(sz)
+ if len(data) == 0:
+ break
+ vd = vad.is_speech(data, mic._rate)
+ if vd and not voice:
+ voice = True
+ if config["debug"]:
+ print("+", end="")
+ cnt = 0
+ if voice and not vd:
+ cnt = cnt + 1
+ if cnt > sp:
+ cnt = 0
+ voice = False
+ if config["debug"]:
+ print("-")
+ if voice:
+ print("*",end="")
+ await server.send(data)
+ datatxt = await server.recv()
+ data = json.loads(datatxt)
+ try:
+ phrase = data["text"]
+ confidence = min(map(lambda x: x["conf"], data["result"]))
+ except:
+ pass
+ last_time = time()
+ return phrase, confidence
+ except KeyboardInterrupt:
+ raise
+ except websockets.exceptions.ConnectionClosedError:
+ raise
+ except:
+ raise
+ return '',0
+async def hello(uri):
+ global config, last_time
+ keyphrase = config["keyphrase"]
+ confidence_treshold = config["confidence_treshold"]
+ rec_attempts = config["rec_attempts"]
+ commands = config["commands"]
+ async with AsyncExitStack() as stack:
+ ws = await stack.enter_async_context(websockets.connect(uri))
+ print('Type Ctrl-C to exit')
+ phrases = config["commands"]
+ phrases.append(config["keyphrase"])
+ phrases = json.dumps(phrases, ensure_ascii=False)
+ await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
+ ws = await stack.enter_async_context(_polite_websocket(ws))
+ p = stack.enter_context(_pyaudio())
+ s = stack.enter_context(_pyaudio_open_stream(p,
+ format = paInt16,
+ channels = 1,
+ rate = 16000,
+ input = True,
+ frames_per_buffer = 2000))
+ while True:
+ phrase, confidence = await ListenPhrase(s, ws)
+ if config["debug"]:
+ print(phrase,confidence)
+ if phrase == keyphrase and confidence>=confidence_treshold :
+ PlayBack(p, "Я жду команду", mic=s)
+ command = ""
+ for i in range(rec_attempts):
+ phrase, confidence = await ListenPhrase(s, ws)
+ if config["debug"]:
+ print(phrase,confidence)
+ if confidence > confidence_treshold:
+ if (not commands) or (phrase in commands):
+ if config["debug"]:
+ print("Command: ", phrase)
+ command = phrase
+ RunCommand(command, p, s)
+ break
+ else:
+ PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
+ else:
+ PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
+ if not command:
+ PlayBack(p, "Так команду и не поняла...", mic=s)
+def get_config(path):
+ config = configparser.ConfigParser()
+ config.read(path)
+ try:
+ keyphrase = config['vosk']['keyphrase']
+ except:
+ print ("Обязательный параметр - ключевое слово - не задан!")
+ raise
+ try:
+ rec_attempts = int(config['vosk']['attempts'])
+ except:
+ rec_attempts = 4
+ try:
+ confidence_treshold = float(config['vosk']['confidence_treshold'])
+ except:
+ confidence_treshold = 0.4
+ try:
+ vosk_server = config['vosk']['server']
+ except:
+ print ("Обязательный параметр - сервер распознавания - не задан!")
+ raise
+ try:
+ command_file=config['commands']['command_file']
+ with open(command_file) as file:
+ commands = file.read().splitlines()
+ except:
+ commands = None
+ try:
+ tts_url=config['rest']['tts_url']
+ except:
+ tts_url = None
+ try:
+ tts_param=config['rest']['tts_param']
+ except:
+ tts_param = None
+ try:
+ api_attempts=int(config['rest']['attempts'])
+ except:
+ api_attempts = 2
+ try:
+ api_user=config['rest']['api_user']
+ api_pwd=config['rest']['api_pwd']
+ except:
+ api_user = None
+ api_pwd = None
+ try:
+ command_url=config['rest']['command_url']
+ except:
+ command_url = None
+ try:
+ reply_url=config['rest']['reply_url']
+ except:
+ reply_url = None
+ try:
+ vad_mode=config['vad']['agressive']
+ except:
+ vad_mode = 3
+ try:
+ debug = (config['system']['debug'].lower() == "true")
+ except:
+ debug = False
+ if command_file:
+ with open(command_file) as file:
+ commands = file.read().splitlines()
+ return {
+ "asr_server": vosk_server,
+ "keyphrase": keyphrase,
+ "rec_attempts": rec_attempts,
+ "confidence_treshold": confidence_treshold,
+ "tts_url": tts_url,
+ "tts_param": tts_param,
+ "api_attempts": api_attempts,
+ "api_user": api_user,
+ "api_pwd": api_pwd,
+ "command_url": command_url,
+ "reply_url": reply_url,
+ "debug": debug,
+ "commands": commands,
+ "vad_mode": vad_mode
+ }
+if len(sys.argv) == 2:
+ conf_file = sys.argv[1]
+ conf_file = expanduser("~")+"/.config/voicecontrol.ini"
+config = get_config(conf_file)
+server = config['asr_server']
+vad = webrtcvad.Vad(config['vad_mode'])
+while True:
+ try:
+ loop = asyncio.get_event_loop()
+ loop.run_until_complete(
+ hello(f'ws://' + server))
+ except (Exception, KeyboardInterrupt) as e:
+ loop.run_until_complete(
+ loop.shutdown_asyncgens())
+ if isinstance(e, KeyboardInterrupt):
+ loop.stop()
+ print('Bye')
+ exit(0)
+ else:
+ print(f'Oops! {e}')
+ print('Restarting process...')
+ sleep(10)
+#!/usr/bin/env python3
+import json
+import os
+import sys
+import asyncio
+import pathlib
+import websockets
+import concurrent.futures
+import logging
+from vosk import Model, KaldiRecognizer
+from pprint import pprint
+# Enable loging if needed
+# logger = logging.getLogger('websockets')
+# logger.setLevel(logging.INFO)
+# logger.addHandler(logging.StreamHandler())
+vosk_interface = os.environ.get('VOSK_SERVER_INTERFACE', '')
+vosk_port = int(os.environ.get('VOSK_SERVER_PORT', 2700))
+vosk_model_path = os.environ.get('VOSK_MODEL_PATH', 'model')
+vosk_sample_rate = float(os.environ.get('VOSK_SAMPLE_RATE', 8000))
+if len(sys.argv) > 1:
+ vosk_model_path = sys.argv[1]
+# Gpu part, uncomment if vosk-api has gpu support
+# from vosk import GpuInit, GpuInstantiate
+# GpuInit()
+# def thread_init():
+# GpuInstantiate()
+# pool = concurrent.futures.ThreadPoolExecutor(initializer=thread_init)
+model = Model(vosk_model_path)
+pool = concurrent.futures.ThreadPoolExecutor((os.cpu_count() or 1))
+loop = asyncio.get_event_loop()
+def process_chunk(rec, message):
+ if message == '{"eof" : 1}':
+ return rec.FinalResult(), True
+ elif rec.AcceptWaveform(message):
+ return rec.Result(), False
+ else:
+ return rec.PartialResult(), False
+async def recognize(websocket, path):
+ rec = None
+ phrase_list = None
+ sample_rate = vosk_sample_rate
+ while True:
+ message = await websocket.recv()
+ # Load configuration if provided
+ if isinstance(message, str) and 'config' in message:
+ jobj = json.loads(message)['config']
+ if 'phrase_list' in jobj:
+ phrase_list = jobj['phrase_list']
+ if 'sample_rate' in jobj:
+ sample_rate = float(jobj['sample_rate'])
+ continue
+ # Create the recognizer, word list is temporary disabled since not every model supports it
+ if not rec:
+ if phrase_list:
+ rec = KaldiRecognizer(model, sample_rate, json.dumps(phrase_list, ensure_ascii=False))
+ else:
+ rec = KaldiRecognizer(model, sample_rate)
+ response, stop = await loop.run_in_executor(pool, process_chunk, rec, message)
+ await websocket.send(response)
+ if stop: break
+start_server = websockets.serve(
+ recognize, vosk_interface, vosk_port)