Переделано под свежий Python и Vosk
[voicecontrol.git] / voicecontrol
1 #!/usr/bin/env python3
2
3 import websockets,asyncio
4 import sys
5 from pyaudio import PyAudio, Stream, paInt16
6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack
7 from typing import AsyncGenerator, Generator
8
9 from urllib.parse import urlencode, quote
10 import urllib3, base64, json
11
12 import configparser 
13 from os.path import expanduser
14 from streamp3 import MP3Decoder
15
16 from time import time, sleep
17
18 import webrtcvad
19
20 from pprint import pprint
21
22 @contextmanager
23 def _pyaudio() -> Generator[PyAudio, None, None]:
24     p = PyAudio()
25     try:
26         yield p
27     finally:
28         print('Terminating PyAudio object')
29         p.terminate()
30
31 @contextmanager
32 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
33     s = p.open(*args, **kwargs)
34     try:
35         yield s
36     finally:
37         print('Closing PyAudio Stream')
38         s.close()
39
40 @asynccontextmanager
41 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
42     try:
43         yield ws
44     finally:
45         print('Terminating connection')
46         await ws.send('{"eof" : 1}')
47         print(await ws.recv())
48
49 def SkipSource(source,seconds):
50   global config
51   try:
52     if config["debug"]:
53       print("Skipping: ", seconds)
54     bufs = int((seconds)*source._rate/source._frames_per_buffer)
55     for i in range(bufs):
56       buffer = source.read(source._frames_per_buffer)
57   except KeyboardInterrupt:
58     raise
59   except:
60     pass
61
62 def PlayBack(pyaud, text, mic = None):
63   global config, last_time
64   
65   http = urllib3.PoolManager()
66
67   playback_url = config["tts_url"]
68   playback_param = config["tts_param"]
69
70   if playback_url and text:
71
72     try:
73
74       if playback_param:
75         url = playback_url.format(urlencode({playback_param:text}))
76       else:
77         url = playback_url+quote(text)  
78
79       req = http.request('GET', url, preload_content=False)
80       decoder = MP3Decoder(req)
81
82       speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
83       pprint(speaker)
84
85       for chunk in decoder:
86         speaker.write(chunk)
87
88       sleep(0.1)
89       speaker.stop_stream()
90       speaker.close()
91
92       elapsed = time() - last_time
93       last_time = time()
94
95       if mic:
96         SkipSource(mic, elapsed + 0.2)
97
98       return elapsed
99
100     except KeyboardInterrupt:
101       raise
102
103     except:
104       raise
105
106   else:
107     return 0
108
109 def RunCommand(command, pyaud, mic = None):
110
111   global config
112   
113   http = urllib3.PoolManager()
114
115   command_url = config["command_url"]
116   reply_url = config["reply_url"]
117   command_user = config["api_user"]
118   command_pwd = config["api_pwd"]
119   api_attempts = config["api_attempts"]
120
121   if command_url:
122     try:
123       if config["debug"]:
124         print('Preparing command')
125       if command_user:
126         my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
127       else:
128         my_headers = urllib3.util.make_headers()  
129       my_headers['Content-Type']='text/plain'
130       my_headers['Accept']='apllication/json'
131       if config["debug"]:
132         print('Sending command')
133       sent = False
134       for i in range(api_attempts):
135         try:
136           http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
137           sent = True
138           break
139         except Exception as e:
140           print('Exception: '+str(e))
141           sleep(0.5)
142       if sent:
143         if config["debug"]:
144           print('Command sent')
145         if reply_url:
146           sleep(0.5)
147           res="NULL"
148           for i in range(api_attempts):
149             try:
150               if command_user:
151                 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
152               else:
153                 my_headers = urllib3.util.make_headers()  
154               req=http.request('GET',reply_url,headers=my_headers).data
155               res = json.loads(req)['state'].strip()
156               if config["debug"]:
157                 print(res)
158               if not(res == 'NULL'):
159                 break
160               sleep(1)  
161             except KeyboardInterrupt:
162               raise
163             except Exception as e:
164               print('Exception: '+str(e))
165               sleep(1)
166           if res and not(res=="NULL"):
167             PlayBack(pyaud, res, mic=mic)
168           elif res=="NULL":
169             PlayBack(pyaud, "Сервер не ответил", mic=mic)  
170         if command_user:
171           my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
172         else:
173           my_headers = urllib3.util.make_headers()  
174         my_headers['Content-Type']='text/plain'
175         my_headers['Accept']='apllication/json'
176         command=""
177         http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8'))
178       else:
179         PlayBack(pyaud, "Сервер недоступен", mic=mic)
180     except KeyboardInterrupt:
181       raise
182     except Exception as e:
183       try:
184         print('Exception: '+str(e))
185         http.request('POST',command_url, headers=my_headers, body="")
186       except:  
187         pass
188
189 async def ListenPhrase(mic, server):
190   global config,last_time, vad
191
192   frame = 30/1000 # 30 ms
193   pause = 2
194   sz = int(mic._rate*frame)
195   sp = int(pause/frame)
196
197   phrase = ""
198   voice = False
199
200   while not phrase:
201     data = mic.read(sz)
202     if len(data) == 0:
203       break
204     vd = vad.is_speech(data, mic._rate)
205     if vd and not voice:
206       voice = True
207       if config["debug"]:
208         print("+", end="")
209       cnt = 0
210     if voice and not vd:
211       cnt = cnt + 1
212       if cnt > sp:
213         cnt = 0
214         voice = False
215         if config["debug"]:
216           print("-")
217     if voice:
218       print("*",end="")
219       await server.send(data)
220       datatxt = await server.recv()
221       data = json.loads(datatxt)
222       try:
223         phrase = data["text"]
224       except:
225         pass  
226   
227   last_time = time()
228   return phrase
229
230
231 async def main_loop(uri):
232
233   global config, last_time
234
235   keyphrase = config["keyphrase"]
236   rec_attempts = config["rec_attempts"]
237   commands = config["commands"]
238
239   
240   with ExitStack() as audio_stack:
241     p = audio_stack.enter_context(_pyaudio())
242
243     s = audio_stack.enter_context(_pyaudio_open_stream(p,
244             format = paInt16, 
245             channels = 1,
246             rate = 16000,
247             input = True, 
248             frames_per_buffer = 2000))
249
250     while True:
251       try:    
252         async with AsyncExitStack() as web_stack:
253           ws = await web_stack.enter_async_context(websockets.connect(uri))
254           print('Type Ctrl-C to exit')
255           phrases = [] + config["commands"]
256           phrases.append(config["keyphrase"])
257           phrases = json.dumps(phrases, ensure_ascii=False)
258           await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
259
260           ws = await web_stack.enter_async_context(_polite_websocket(ws))
261           while True:
262             phrase = await ListenPhrase(s, ws)
263             if config["debug"]:
264               print(phrase)
265             if phrase == keyphrase :
266               print("COMMAND!")
267               PlayBack(p, "Слушаю!", mic=s)
268               command = ""
269   
270               for i in range(rec_attempts):
271                 phrase = await ListenPhrase(s, ws)
272                 if config["debug"]:
273                   print(phrase)
274                 if (not commands) or (phrase in commands):
275                   if config["debug"]:
276                     print("Command: ", phrase)
277                   command = phrase
278                   RunCommand(command, p, s)
279                   break
280                 else:
281                   PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
282               else:
283                 PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
284
285               if not command:
286                 PlayBack(p, "Так команду и не поняла...", mic=s)
287       except KeyboardInterrupt:
288         raise
289       except Exception as e:
290         print('Exception: '+str(e))
291         pass
292              
293 def get_config(path):
294   
295   config = configparser.ConfigParser()
296   config.read(path)
297   
298   try:  
299     keyphrase = config['vosk']['keyphrase']
300   except:
301     print ("Обязательный параметр - ключевое слово - не задан!")
302     raise
303
304   try:  
305     rec_attempts = int(config['vosk']['attempts'])
306   except:
307     rec_attempts = 4
308
309   try:
310     vosk_server = config['vosk']['server']
311   except:
312     print ("Обязательный параметр - сервер распознавания - не задан!")
313     raise
314
315   try:
316     command_file=config['commands']['command_file']
317     with open(command_file) as file:
318       commands = file.read().splitlines()
319   except:
320     commands = None
321
322   try:
323     tts_url=config['rest']['tts_url']
324   except:
325     tts_url = None
326
327   try:
328     tts_param=config['rest']['tts_param']
329   except:
330     tts_param = None
331
332   try:
333     api_attempts=int(config['rest']['attempts'])
334   except:
335     api_attempts = 2
336
337   try:  
338     api_user=config['rest']['api_user']
339     api_pwd=config['rest']['api_pwd']
340   except:
341     api_user = None
342     api_pwd = None
343
344   try:
345     command_url=config['rest']['command_url']
346   except:
347     command_url = None
348
349   try:  
350     reply_url=config['rest']['reply_url']
351   except:
352     reply_url = None  
353
354   try:  
355     vad_mode=config['vad']['agressive']
356   except:
357     vad_mode = 3
358
359   try:
360     debug = (config['system']['debug'].lower() == "true")
361   except:
362     debug = False  
363
364   if command_file:
365     with open(command_file) as file:
366       commands = file.read().splitlines()
367
368   return {
369       "asr_server": vosk_server,
370       "keyphrase": keyphrase,
371       "rec_attempts": rec_attempts,
372       "tts_url": tts_url,
373       "tts_param": tts_param,
374       "api_attempts": api_attempts,
375       "api_user": api_user,
376       "api_pwd": api_pwd,
377       "command_url": command_url,
378       "reply_url": reply_url,
379       "debug": debug,
380       "commands": commands,
381       "vad_mode": vad_mode
382     }
383
384
385 if len(sys.argv) == 2:
386     conf_file = sys.argv[1]
387 else:
388     conf_file = expanduser("~")+"/.config/voicecontrol.ini"
389
390 config = get_config(conf_file)
391
392 server = config['asr_server']
393
394 vad = webrtcvad.Vad(config['vad_mode'])
395 last_time = time()
396
397 while True:
398
399   try:
400
401     loop = asyncio.get_event_loop()
402     loop.run_until_complete(
403         main_loop(f'ws://' + server))
404
405   except (Exception, KeyboardInterrupt) as e:
406     raise
407     loop.run_until_complete(
408       loop.shutdown_asyncgens())
409     if isinstance(e, KeyboardInterrupt):
410       loop.stop()
411       print('Bye')
412       exit(0)
413     else:
414       print(f'Oops! {e}')
415       print('Restarting process...')
416       sleep(10)