3 import websockets,asyncio
5 from pyaudio import PyAudio, Stream, paInt16
6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack
7 from typing import AsyncGenerator, Generator
9 from urllib.parse import urlencode, quote
10 import urllib3, base64, json
13 from os.path import expanduser
14 from streamp3 import MP3Decoder
16 from time import time, sleep
21 def _pyaudio() -> Generator[PyAudio, None, None]:
26 print('Terminating PyAudio object')
30 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
31 s = p.open(*args, **kwargs)
35 print('Closing PyAudio Stream')
39 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
43 print('Terminating connection')
44 await ws.send('{"eof" : 1}')
45 print(await ws.recv())
47 def SkipSource(source,seconds):
51 print("Skipping: ", seconds)
52 bufs = int((seconds)*source._rate/source._frames_per_buffer)
54 buffer = source.read(source._frames_per_buffer)
55 except KeyboardInterrupt:
60 def Silence(speaker, seconds):
61 buf = bytes(speaker._frames_per_buffer)
62 bufs = int((seconds)*speaker._rate/speaker._frames_per_buffer)
66 def PlayBack(pyaud, text, mic = None):
67 global config, last_time
69 http = urllib3.PoolManager()
71 playback_url = config["tts_url"]
72 playback_param = config["tts_param"]
74 if playback_url and text:
79 url = playback_url.format(urlencode({playback_param:text}))
81 url = playback_url+quote(text)
83 req = http.request('GET', url, preload_content=False)
84 decoder = MP3Decoder(req)
86 speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
96 elapsed = time() - last_time
100 SkipSource(mic, elapsed + 0.5)
104 except KeyboardInterrupt:
113 def RunCommand(command, pyaud, mic = None):
117 http = urllib3.PoolManager()
119 command_url = config["command_url"]
120 reply_url = config["reply_url"]
121 command_user = config["api_user"]
122 command_pwd = config["api_pwd"]
123 api_attempts = config["api_attempts"]
128 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
130 my_headers = urllib3.util.make_headers()
131 my_headers['Content-Type']='text/plain'
132 my_headers['Accept']='apllication/json'
133 http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
137 for i in range(api_attempts):
140 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
142 my_headers = urllib3.util.make_headers()
143 req=http.request('GET',reply_url,headers=my_headers).data
144 res = json.loads(req)['state'].strip()
147 if not(res == 'NULL'):
150 except KeyboardInterrupt:
154 if res and not(res=="NULL"):
155 PlayBack(pyaud, res, mic=mic)
157 PlayBack(pyaud, "Сервер не ответил", mic=mic)
159 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
161 my_headers = urllib3.util.make_headers()
162 my_headers['Content-Type']='text/plain'
163 my_headers['Accept']='apllication/json'
165 http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8'))
166 except KeyboardInterrupt:
170 http.request('POST',command_url, headers=my_headers, body="")
174 async def ListenPhrase(mic, server):
175 global config,last_time, vad
177 frame = 30/1000 # 30 ms
179 sz = int(mic._rate*frame)
180 sp = int(pause/frame)
191 vd = vad.is_speech(data, mic._rate)
206 await server.send(data)
207 datatxt = await server.recv()
208 data = json.loads(datatxt)
210 phrase = data["text"]
211 confidence = min(map(lambda x: x["conf"], data["result"]))
217 return phrase, confidence
219 except KeyboardInterrupt:
221 except websockets.exceptions.ConnectionClosedError:
227 async def main_loop(uri):
229 global config, last_time
231 keyphrase = config["keyphrase"]
232 confidence_treshold = config["confidence_treshold"]
233 rec_attempts = config["rec_attempts"]
234 commands = config["commands"]
237 with ExitStack() as audio_stack:
238 p = audio_stack.enter_context(_pyaudio())
239 s = audio_stack.enter_context(_pyaudio_open_stream(p,
244 frames_per_buffer = 2000))
248 async with AsyncExitStack() as web_stack:
249 ws = await web_stack.enter_async_context(websockets.connect(uri))
250 print('Type Ctrl-C to exit')
251 phrases = [] + config["commands"]
252 phrases.append(config["keyphrase"])
253 phrases = json.dumps(phrases, ensure_ascii=False)
254 await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
256 ws = await web_stack.enter_async_context(_polite_websocket(ws))
258 phrase, confidence = await ListenPhrase(s, ws)
260 print(phrase,confidence)
261 if phrase == keyphrase and confidence>=confidence_treshold :
262 PlayBack(p, "Я жду команду", mic=s)
265 for i in range(rec_attempts):
266 phrase, confidence = await ListenPhrase(s, ws)
268 print(phrase,confidence)
269 if confidence > confidence_treshold:
270 if (not commands) or (phrase in commands):
272 print("Command: ", phrase)
274 RunCommand(command, p, s)
277 PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
279 PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
282 PlayBack(p, "Так команду и не поняла...", mic=s)
283 except KeyboardInterrupt:
288 def get_config(path):
290 config = configparser.ConfigParser()
294 keyphrase = config['vosk']['keyphrase']
296 print ("Обязательный параметр - ключевое слово - не задан!")
300 rec_attempts = int(config['vosk']['attempts'])
305 confidence_treshold = float(config['vosk']['confidence_treshold'])
307 confidence_treshold = 0.4
310 vosk_server = config['vosk']['server']
312 print ("Обязательный параметр - сервер распознавания - не задан!")
316 command_file=config['commands']['command_file']
317 with open(command_file) as file:
318 commands = file.read().splitlines()
323 tts_url=config['rest']['tts_url']
328 tts_param=config['rest']['tts_param']
333 api_attempts=int(config['rest']['attempts'])
338 api_user=config['rest']['api_user']
339 api_pwd=config['rest']['api_pwd']
345 command_url=config['rest']['command_url']
350 reply_url=config['rest']['reply_url']
355 vad_mode=config['vad']['agressive']
360 debug = (config['system']['debug'].lower() == "true")
365 with open(command_file) as file:
366 commands = file.read().splitlines()
369 "asr_server": vosk_server,
370 "keyphrase": keyphrase,
371 "rec_attempts": rec_attempts,
372 "confidence_treshold": confidence_treshold,
374 "tts_param": tts_param,
375 "api_attempts": api_attempts,
376 "api_user": api_user,
378 "command_url": command_url,
379 "reply_url": reply_url,
381 "commands": commands,
386 if len(sys.argv) == 2:
387 conf_file = sys.argv[1]
389 conf_file = expanduser("~")+"/.config/voicecontrol.ini"
391 config = get_config(conf_file)
393 server = config['asr_server']
395 vad = webrtcvad.Vad(config['vad_mode'])
401 loop = asyncio.get_event_loop()
402 loop.run_until_complete(
403 main_loop(f'ws://' + server))
405 except (Exception, KeyboardInterrupt) as e:
406 loop.run_until_complete(
407 loop.shutdown_asyncgens())
408 if isinstance(e, KeyboardInterrupt):
414 print('Restarting process...')