703e109f9920b3858979ebf37d5ea5b0b7835bc8
[voicecontrol.git] / voicecontrol
1 #!/usr/bin/env python3
2
3 import websockets,asyncio
4 import sys
5 from pyaudio import PyAudio, Stream, paInt16
6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack
7 from typing import AsyncGenerator, Generator
8
9 from urllib.parse import urlencode, quote
10 import urllib3, base64, json
11
12 import configparser 
13 from os.path import expanduser
14 from streamp3 import MP3Decoder
15
16 from time import time, sleep
17
18 import webrtcvad
19
20 @contextmanager
21 def _pyaudio() -> Generator[PyAudio, None, None]:
22     p = PyAudio()
23     try:
24         yield p
25     finally:
26         print('Terminating PyAudio object')
27         p.terminate()
28
29 @contextmanager
30 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
31     s = p.open(*args, **kwargs)
32     try:
33         yield s
34     finally:
35         print('Closing PyAudio Stream')
36         s.close()
37
38 @asynccontextmanager
39 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
40     try:
41         yield ws
42     finally:
43         print('Terminating connection')
44         await ws.send('{"eof" : 1}')
45         print(await ws.recv())
46
47 def SkipSource(source,seconds):
48   global config
49   try:
50     if config["debug"]:
51      print("Skipping: ", seconds)
52     bufs = int((seconds)*source._rate/source._frames_per_buffer)
53     for i in range(bufs):
54       buffer = source.read(source._frames_per_buffer)
55   except KeyboardInterrupt:
56     raise
57   except:
58     pass
59
60 def Silence(speaker, seconds):
61   buf = bytes(speaker._frames_per_buffer)
62   bufs = int((seconds)*speaker._rate/speaker._frames_per_buffer)
63   for i in range(bufs):
64     speaker.write(buf)
65
66 def PlayBack(pyaud, text, mic = None):
67   global config, last_time
68   
69   http = urllib3.PoolManager()
70
71   playback_url = config["tts_url"]
72   playback_param = config["tts_param"]
73
74   if playback_url and text:
75
76     try:
77
78       if playback_param:
79         url = playback_url.format(urlencode({playback_param:text}))
80       else:
81         url = playback_url+quote(text)  
82
83       req = http.request('GET', url, preload_content=False)
84       decoder = MP3Decoder(req)
85
86       speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
87       Silence(speaker, 0.3) 
88
89       for chunk in decoder:
90         speaker.write(chunk)
91
92       sleep(0.1)
93       speaker.stop_stream()
94       speaker.close()
95
96       elapsed = time() - last_time
97       last_time = time()
98
99       if mic:
100         SkipSource(mic, elapsed + 0.5)
101
102       return elapsed
103
104     except KeyboardInterrupt:
105       raise
106
107     except:
108       pass
109
110   else:
111     return 0
112
113 def RunCommand(command, pyaud, mic = None):
114
115   global config
116   
117   http = urllib3.PoolManager()
118
119   command_url = config["command_url"]
120   reply_url = config["reply_url"]
121   command_user = config["api_user"]
122   command_pwd = config["api_pwd"]
123   api_attempts = config["api_attempts"]
124
125   if command_url:
126     try:
127       if command_user:
128         my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
129       else:
130         my_headers = urllib3.util.make_headers()  
131       my_headers['Content-Type']='text/plain'
132       my_headers['Accept']='apllication/json'
133       http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
134       if reply_url:
135         sleep(0.5)
136         res="NULL"
137         for i in range(api_attempts):
138           try:
139             if command_user:
140               my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
141             else:
142               my_headers = urllib3.util.make_headers()  
143             req=http.request('GET',reply_url,headers=my_headers).data
144             res = json.loads(req)['state'].strip()
145             if config["debug"]:
146               print(res)
147             if not(res == 'NULL'):
148               break
149             sleep(1)  
150           except KeyboardInterrupt:
151             raise
152           except:  
153             sleep(1)
154         if res and not(res=="NULL"):
155           PlayBack(pyaud, res, mic=mic)
156         elif res=="NULL":
157           PlayBack(pyaud, "Сервер не ответил", mic=mic)  
158       if command_user:
159         my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
160       else:
161         my_headers = urllib3.util.make_headers()  
162       my_headers['Content-Type']='text/plain'
163       my_headers['Accept']='apllication/json'
164       command=""
165       http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8'))
166     except KeyboardInterrupt:
167       raise
168     except:
169       try:
170         http.request('POST',command_url, headers=my_headers, body="")
171       except:  
172         pass
173
174 async def ListenPhrase(mic, server):
175   global config,last_time, vad
176
177   frame = 30/1000 # 30 ms
178   pause = 2
179   sz = int(mic._rate*frame)
180   sp = int(pause/frame)
181
182   try:
183
184     phrase = ""
185     voice = False
186
187     while not phrase:
188       data = mic.read(sz)
189       if len(data) == 0:
190         break
191       vd = vad.is_speech(data, mic._rate)
192       if vd and not voice:
193         voice = True
194         if config["debug"]:
195           print("+", end="")
196         cnt = 0
197       if voice and not vd:
198         cnt = cnt + 1
199         if cnt > sp:
200           cnt = 0
201           voice = False
202           if config["debug"]:
203             print("-")
204       if voice:
205         print("*",end="")
206         await server.send(data)
207         datatxt = await server.recv()
208         data = json.loads(datatxt)
209         try:
210           phrase = data["text"]
211           confidence = min(map(lambda x: x["conf"], data["result"]))
212         except:
213           pass  
214   
215     last_time = time()
216
217     return phrase, confidence
218
219   except KeyboardInterrupt:
220     raise
221   except websockets.exceptions.ConnectionClosedError:
222     raise  
223   except:
224     return '',0
225
226
227 async def main_loop(uri):
228
229   global config, last_time
230
231   keyphrase = config["keyphrase"]
232   confidence_treshold = config["confidence_treshold"]
233   rec_attempts = config["rec_attempts"]
234   commands = config["commands"]
235
236   
237   with ExitStack() as audio_stack:
238     p = audio_stack.enter_context(_pyaudio())
239     s = audio_stack.enter_context(_pyaudio_open_stream(p,
240             format = paInt16, 
241             channels = 1,
242             rate = 16000,
243             input = True, 
244             frames_per_buffer = 2000))
245
246     while True:
247       try:    
248         async with AsyncExitStack() as web_stack:
249           ws = await web_stack.enter_async_context(websockets.connect(uri))
250           print('Type Ctrl-C to exit')
251           phrases = [] + config["commands"]
252           phrases.append(config["keyphrase"])
253           phrases = json.dumps(phrases, ensure_ascii=False)
254           await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
255
256           ws = await web_stack.enter_async_context(_polite_websocket(ws))
257           while True:
258             phrase, confidence = await ListenPhrase(s, ws)
259             if config["debug"]:
260               print(phrase,confidence)
261             if phrase == keyphrase and confidence>=confidence_treshold :
262               PlayBack(p, "Я жду команду", mic=s)
263               command = ""
264   
265               for i in range(rec_attempts):
266                 phrase, confidence = await ListenPhrase(s, ws)
267                 if config["debug"]:
268                   print(phrase,confidence)
269                 if confidence > confidence_treshold:
270                   if (not commands) or (phrase in commands):
271                     if config["debug"]:
272                       print("Command: ", phrase)
273                     command = phrase
274                     RunCommand(command, p, s)
275                     break
276                   else:
277                     PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
278                 else:
279                   PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
280
281               if not command:
282                 PlayBack(p, "Так команду и не поняла...", mic=s)
283       except KeyboardInterrupt:
284         raise
285       except:
286         pass
287              
288 def get_config(path):
289   
290   config = configparser.ConfigParser()
291   config.read(path)
292   
293   try:  
294     keyphrase = config['vosk']['keyphrase']
295   except:
296     print ("Обязательный параметр - ключевое слово - не задан!")
297     raise
298
299   try:  
300     rec_attempts = int(config['vosk']['attempts'])
301   except:
302     rec_attempts = 4
303
304   try:  
305     confidence_treshold = float(config['vosk']['confidence_treshold'])
306   except:
307     confidence_treshold = 0.4
308
309   try:
310     vosk_server = config['vosk']['server']
311   except:
312     print ("Обязательный параметр - сервер распознавания - не задан!")
313     raise
314
315   try:
316     command_file=config['commands']['command_file']
317     with open(command_file) as file:
318       commands = file.read().splitlines()
319   except:
320     commands = None
321
322   try:
323     tts_url=config['rest']['tts_url']
324   except:
325     tts_url = None
326
327   try:
328     tts_param=config['rest']['tts_param']
329   except:
330     tts_param = None
331
332   try:
333     api_attempts=int(config['rest']['attempts'])
334   except:
335     api_attempts = 2
336
337   try:  
338     api_user=config['rest']['api_user']
339     api_pwd=config['rest']['api_pwd']
340   except:
341     api_user = None
342     api_pwd = None
343
344   try:
345     command_url=config['rest']['command_url']
346   except:
347     command_url = None
348
349   try:  
350     reply_url=config['rest']['reply_url']
351   except:
352     reply_url = None  
353
354   try:  
355     vad_mode=config['vad']['agressive']
356   except:
357     vad_mode = 3
358
359   try:
360     debug = (config['system']['debug'].lower() == "true")
361   except:
362     debug = False  
363
364   if command_file:
365     with open(command_file) as file:
366       commands = file.read().splitlines()
367
368   return {
369       "asr_server": vosk_server,
370       "keyphrase": keyphrase,
371       "rec_attempts": rec_attempts,
372       "confidence_treshold": confidence_treshold,
373       "tts_url": tts_url,
374       "tts_param": tts_param,
375       "api_attempts": api_attempts,
376       "api_user": api_user,
377       "api_pwd": api_pwd,
378       "command_url": command_url,
379       "reply_url": reply_url,
380       "debug": debug,
381       "commands": commands,
382       "vad_mode": vad_mode
383     }
384
385
386 if len(sys.argv) == 2:
387     conf_file = sys.argv[1]
388 else:
389     conf_file = expanduser("~")+"/.config/voicecontrol.ini"
390
391 config = get_config(conf_file)
392
393 server = config['asr_server']
394
395 vad = webrtcvad.Vad(config['vad_mode'])
396
397 while True:
398
399   try:
400
401     loop = asyncio.get_event_loop()
402     loop.run_until_complete(
403         main_loop(f'ws://' + server))
404
405   except (Exception, KeyboardInterrupt) as e:
406     loop.run_until_complete(
407       loop.shutdown_asyncgens())
408     if isinstance(e, KeyboardInterrupt):
409       loop.stop()
410       print('Bye')
411       exit(0)
412     else:
413       print(f'Oops! {e}')
414       print('Restarting process...')
415       sleep(10)