1 #!/usr/bin/env python3.8
3 import websockets,asyncio
5 from pyaudio import PyAudio, Stream, paInt16
6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack
7 from typing import AsyncGenerator, Generator
9 from urllib.parse import urlencode, quote
10 import urllib3, base64, json
13 from os.path import expanduser
14 from streamp3 import MP3Decoder
16 from time import time, sleep
21 def _pyaudio() -> Generator[PyAudio, None, None]:
26 print('Terminating PyAudio object')
30 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
31 s = p.open(*args, **kwargs)
35 print('Closing PyAudio Stream')
39 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
43 print('Terminating connection')
44 await ws.send('{"eof" : 1}')
45 print(await ws.recv())
47 def SkipSource(source,seconds):
51 print("Skipping: ", seconds)
52 bufs = int((seconds)*source._rate/source._frames_per_buffer)
54 buffer = source.read(source._frames_per_buffer)
55 except KeyboardInterrupt:
60 def PlayBack(pyaud, text, mic = None):
61 global config, last_time
63 http = urllib3.PoolManager()
65 playback_url = config["tts_url"]
66 playback_param = config["tts_param"]
68 if playback_url and text:
73 url = playback_url.format(urlencode({playback_param:text}))
75 url = playback_url+quote(text)
77 req = http.request('GET', url, preload_content=False)
78 decoder = MP3Decoder(req)
80 speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
89 elapsed = time() - last_time
93 SkipSource(mic, elapsed + 0.5)
97 except KeyboardInterrupt:
106 def RunCommand(command, pyaud, mic = None):
110 http = urllib3.PoolManager()
112 command_url = config["command_url"]
113 reply_url = config["reply_url"]
114 command_user = config["api_user"]
115 command_pwd = config["api_pwd"]
116 api_attempts = config["api_attempts"]
121 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
123 my_headers = urllib3.util.make_headers()
124 my_headers['Content-Type']='text/plain'
125 my_headers['Accept']='apllication/json'
126 http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
130 for i in range(api_attempts):
133 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
135 my_headers = urllib3.util.make_headers()
136 req=http.request('GET',reply_url,headers=my_headers).data
137 res = json.loads(req)['state'].strip()
140 if not(res == 'NULL'):
143 except KeyboardInterrupt:
147 if res and not(res=="NULL"):
148 PlayBack(pyaud, res, mic=mic)
150 PlayBack(pyaud, "Сервер не ответил", mic=mic)
151 http.request('POST',command_url, headers=my_headers, body="")
152 except KeyboardInterrupt:
156 http.request('POST',command_url, headers=my_headers, body="")
160 async def ListenPhrase(mic, server):
161 global config,last_time, vad
163 frame = 30/1000 # 30 ms
165 sz = int(mic._rate*frame)
166 sp = int(pause/frame)
177 vd = vad.is_speech(data, mic._rate)
192 await server.send(data)
193 datatxt = await server.recv()
194 data = json.loads(datatxt)
196 phrase = data["text"]
197 confidence = min(map(lambda x: x["conf"], data["result"]))
203 return phrase, confidence
205 except KeyboardInterrupt:
207 except websockets.exceptions.ConnectionClosedError:
214 async def hello(uri):
216 global config, last_time
218 keyphrase = config["keyphrase"]
219 confidence_treshold = config["confidence_treshold"]
220 rec_attempts = config["rec_attempts"]
221 commands = config["commands"]
223 async with AsyncExitStack() as stack:
224 ws = await stack.enter_async_context(websockets.connect(uri))
225 print('Type Ctrl-C to exit')
226 phrases = config["commands"]
227 phrases.append(config["keyphrase"])
228 phrases = json.dumps(phrases, ensure_ascii=False)
229 await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
231 ws = await stack.enter_async_context(_polite_websocket(ws))
232 p = stack.enter_context(_pyaudio())
233 s = stack.enter_context(_pyaudio_open_stream(p,
238 frames_per_buffer = 2000))
240 phrase, confidence = await ListenPhrase(s, ws)
242 print(phrase,confidence)
243 if phrase == keyphrase and confidence>=confidence_treshold :
244 PlayBack(p, "Я жду команду", mic=s)
247 for i in range(rec_attempts):
248 phrase, confidence = await ListenPhrase(s, ws)
250 print(phrase,confidence)
251 if confidence > confidence_treshold:
252 if (not commands) or (phrase in commands):
254 print("Command: ", phrase)
256 RunCommand(command, p, s)
259 PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
261 PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
264 PlayBack(p, "Так команду и не поняла...", mic=s)
267 def get_config(path):
269 config = configparser.ConfigParser()
273 keyphrase = config['vosk']['keyphrase']
275 print ("Обязательный параметр - ключевое слово - не задан!")
279 rec_attempts = int(config['vosk']['attempts'])
284 confidence_treshold = float(config['vosk']['confidence_treshold'])
286 confidence_treshold = 0.4
289 vosk_server = config['vosk']['server']
291 print ("Обязательный параметр - сервер распознавания - не задан!")
295 command_file=config['commands']['command_file']
296 with open(command_file) as file:
297 commands = file.read().splitlines()
302 tts_url=config['rest']['tts_url']
307 tts_param=config['rest']['tts_param']
312 api_attempts=int(config['rest']['attempts'])
317 api_user=config['rest']['api_user']
318 api_pwd=config['rest']['api_pwd']
324 command_url=config['rest']['command_url']
329 reply_url=config['rest']['reply_url']
334 vad_mode=config['vad']['agressive']
339 debug = (config['system']['debug'].lower() == "true")
344 with open(command_file) as file:
345 commands = file.read().splitlines()
348 "asr_server": vosk_server,
349 "keyphrase": keyphrase,
350 "rec_attempts": rec_attempts,
351 "confidence_treshold": confidence_treshold,
353 "tts_param": tts_param,
354 "api_attempts": api_attempts,
355 "api_user": api_user,
357 "command_url": command_url,
358 "reply_url": reply_url,
360 "commands": commands,
365 if len(sys.argv) == 2:
366 conf_file = sys.argv[1]
368 conf_file = expanduser("~")+"/.config/voicecontrol.ini"
370 config = get_config(conf_file)
372 server = config['asr_server']
374 vad = webrtcvad.Vad(config['vad_mode'])
380 loop = asyncio.get_event_loop()
381 loop.run_until_complete(
382 hello(f'ws://' + server))
384 except (Exception, KeyboardInterrupt) as e:
385 loop.run_until_complete(
386 loop.shutdown_asyncgens())
387 if isinstance(e, KeyboardInterrupt):
393 print('Restarting process...')