fafda2c756ffc60ec5848b31be73698823cfb622
[voicecontrol.git] / voicecontrol
1 #!/usr/bin/env python3.8
2
3 import websockets,asyncio
4 import sys
5 from pyaudio import PyAudio, Stream, paInt16
6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack
7 from typing import AsyncGenerator, Generator
8
9 from urllib.parse import urlencode, quote
10 import urllib3, base64, json
11
12 import configparser 
13 from os.path import expanduser
14 from streamp3 import MP3Decoder
15
16 from time import time, sleep
17
18 import webrtcvad
19
20 @contextmanager
21 def _pyaudio() -> Generator[PyAudio, None, None]:
22     p = PyAudio()
23     try:
24         yield p
25     finally:
26         print('Terminating PyAudio object')
27         p.terminate()
28
29 @contextmanager
30 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
31     s = p.open(*args, **kwargs)
32     try:
33         yield s
34     finally:
35         print('Closing PyAudio Stream')
36         s.close()
37
38 @asynccontextmanager
39 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
40     try:
41         yield ws
42     finally:
43         print('Terminating connection')
44         await ws.send('{"eof" : 1}')
45         print(await ws.recv())
46
47 def SkipSource(source,seconds):
48   global config
49   try:
50     if config["debug"]:
51      print("Skipping: ", seconds)
52     bufs = int((seconds)*source._rate/source._frames_per_buffer)
53     for i in range(bufs):
54       buffer = source.read(source._frames_per_buffer)
55   except KeyboardInterrupt:
56     raise
57   except:
58     pass
59
60 def PlayBack(pyaud, text, mic = None):
61   global config, last_time
62   
63   http = urllib3.PoolManager()
64
65   playback_url = config["tts_url"]
66   playback_param = config["tts_param"]
67
68   if playback_url and text:
69
70     try:
71
72       if playback_param:
73         url = playback_url.format(urlencode({playback_param:text}))
74       else:
75         url = playback_url+quote(text)  
76
77       req = http.request('GET', url, preload_content=False)
78       decoder = MP3Decoder(req)
79
80       speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
81
82       for chunk in decoder:
83         speaker.write(chunk)
84
85       sleep(0.1)
86       speaker.stop_stream()
87       speaker.close()
88
89       elapsed = time() - last_time
90       last_time = time()
91
92       if mic:
93         SkipSource(mic, elapsed + 0.5)
94
95       return elapsed
96
97     except KeyboardInterrupt:
98       raise
99
100     except:
101       pass
102
103   else:
104     return 0
105
106 def RunCommand(command, pyaud, mic = None):
107
108   global config
109   
110   http = urllib3.PoolManager()
111
112   command_url = config["command_url"]
113   reply_url = config["reply_url"]
114   command_user = config["api_user"]
115   command_pwd = config["api_pwd"]
116   api_attempts = config["api_attempts"]
117
118   if command_url:
119     try:
120       if command_user:
121         my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
122       else:
123         my_headers = urllib3.util.make_headers()  
124       my_headers['Content-Type']='text/plain'
125       my_headers['Accept']='apllication/json'
126       http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
127       if reply_url:
128         sleep(0.5)
129         res="NULL"
130         for i in range(api_attempts):
131           try:
132             if command_user:
133               my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
134             else:
135               my_headers = urllib3.util.make_headers()  
136             req=http.request('GET',reply_url,headers=my_headers).data
137             res = json.loads(req)['state'].strip()
138             if config["debug"]:
139               print(res)
140             if not(res == 'NULL'):
141               break
142             sleep(1)  
143           except KeyboardInterrupt:
144             raise
145           except:  
146             sleep(1)
147         if res and not(res=="NULL"):
148           PlayBack(pyaud, res, mic=mic)
149         elif res=="NULL":
150           PlayBack(pyaud, "Сервер не ответил", mic=mic)  
151       http.request('POST',command_url, headers=my_headers, body="")
152     except KeyboardInterrupt:
153       raise
154     except:
155       try:
156         http.request('POST',command_url, headers=my_headers, body="")
157       except:  
158         pass
159
160 async def ListenPhrase(mic, server):
161   global config,last_time, vad
162
163   frame = 30/1000 # 30 ms
164   pause = 2
165   sz = int(mic._rate*frame)
166   sp = int(pause/frame)
167
168   try:
169
170     phrase = ""
171     voice = False
172
173     while not phrase:
174       data = mic.read(sz)
175       if len(data) == 0:
176         break
177       vd = vad.is_speech(data, mic._rate)
178       if vd and not voice:
179         voice = True
180         if config["debug"]:
181           print("+", end="")
182         cnt = 0
183       if voice and not vd:
184         cnt = cnt + 1
185         if cnt > sp:
186           cnt = 0
187           voice = False
188           if config["debug"]:
189             print("-")
190       if voice:
191         print("*",end="")
192         await server.send(data)
193         datatxt = await server.recv()
194         data = json.loads(datatxt)
195         try:
196           phrase = data["text"]
197           confidence = min(map(lambda x: x["conf"], data["result"]))
198         except:
199           pass  
200   
201     last_time = time()
202
203     return phrase, confidence
204
205   except KeyboardInterrupt:
206     raise
207   except websockets.exceptions.ConnectionClosedError:
208     raise  
209   except:
210     raise
211     return '',0
212
213
214 async def hello(uri):
215
216   global config, last_time
217
218   keyphrase = config["keyphrase"]
219   confidence_treshold = config["confidence_treshold"]
220   rec_attempts = config["rec_attempts"]
221   commands = config["commands"]
222
223   async with AsyncExitStack() as stack:
224     ws = await stack.enter_async_context(websockets.connect(uri))
225     print('Type Ctrl-C to exit')
226     phrases = config["commands"]
227     phrases.append(config["keyphrase"])
228     phrases = json.dumps(phrases, ensure_ascii=False)
229     await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
230
231     ws = await stack.enter_async_context(_polite_websocket(ws))
232     p = stack.enter_context(_pyaudio())
233     s = stack.enter_context(_pyaudio_open_stream(p,
234             format = paInt16, 
235             channels = 1,
236             rate = 16000,
237             input = True, 
238             frames_per_buffer = 2000))
239     while True:
240       phrase, confidence = await ListenPhrase(s, ws)
241       if config["debug"]:
242         print(phrase,confidence)
243       if phrase == keyphrase and confidence>=confidence_treshold :
244         PlayBack(p, "Я жду команду", mic=s)
245         command = ""
246
247         for i in range(rec_attempts):
248           phrase, confidence = await ListenPhrase(s, ws)
249           if config["debug"]:
250             print(phrase,confidence)
251           if confidence > confidence_treshold:
252             if (not commands) or (phrase in commands):
253               if config["debug"]:
254                 print("Command: ", phrase)
255               command = phrase
256               RunCommand(command, p, s)
257               break
258             else:
259               PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
260           else:
261             PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
262
263         if not command:
264           PlayBack(p, "Так команду и не поняла...", mic=s)
265
266              
267 def get_config(path):
268   
269   config = configparser.ConfigParser()
270   config.read(path)
271   
272   try:  
273     keyphrase = config['vosk']['keyphrase']
274   except:
275     print ("Обязательный параметр - ключевое слово - не задан!")
276     raise
277
278   try:  
279     rec_attempts = int(config['vosk']['attempts'])
280   except:
281     rec_attempts = 4
282
283   try:  
284     confidence_treshold = float(config['vosk']['confidence_treshold'])
285   except:
286     confidence_treshold = 0.4
287
288   try:
289     vosk_server = config['vosk']['server']
290   except:
291     print ("Обязательный параметр - сервер распознавания - не задан!")
292     raise
293
294   try:
295     command_file=config['commands']['command_file']
296     with open(command_file) as file:
297       commands = file.read().splitlines()
298   except:
299     commands = None
300
301   try:
302     tts_url=config['rest']['tts_url']
303   except:
304     tts_url = None
305
306   try:
307     tts_param=config['rest']['tts_param']
308   except:
309     tts_param = None
310
311   try:
312     api_attempts=int(config['rest']['attempts'])
313   except:
314     api_attempts = 2
315
316   try:  
317     api_user=config['rest']['api_user']
318     api_pwd=config['rest']['api_pwd']
319   except:
320     api_user = None
321     api_pwd = None
322
323   try:
324     command_url=config['rest']['command_url']
325   except:
326     command_url = None
327
328   try:  
329     reply_url=config['rest']['reply_url']
330   except:
331     reply_url = None  
332
333   try:  
334     vad_mode=config['vad']['agressive']
335   except:
336     vad_mode = 3
337
338   try:
339     debug = (config['system']['debug'].lower() == "true")
340   except:
341     debug = False  
342
343   if command_file:
344     with open(command_file) as file:
345       commands = file.read().splitlines()
346
347   return {
348       "asr_server": vosk_server,
349       "keyphrase": keyphrase,
350       "rec_attempts": rec_attempts,
351       "confidence_treshold": confidence_treshold,
352       "tts_url": tts_url,
353       "tts_param": tts_param,
354       "api_attempts": api_attempts,
355       "api_user": api_user,
356       "api_pwd": api_pwd,
357       "command_url": command_url,
358       "reply_url": reply_url,
359       "debug": debug,
360       "commands": commands,
361       "vad_mode": vad_mode
362     }
363
364
365 if len(sys.argv) == 2:
366     conf_file = sys.argv[1]
367 else:
368     conf_file = expanduser("~")+"/.config/voicecontrol.ini"
369
370 config = get_config(conf_file)
371
372 server = config['asr_server']
373
374 vad = webrtcvad.Vad(config['vad_mode'])
375
376 while True:
377
378   try:
379
380     loop = asyncio.get_event_loop()
381     loop.run_until_complete(
382         hello(f'ws://' + server))
383
384   except (Exception, KeyboardInterrupt) as e:
385     loop.run_until_complete(
386       loop.shutdown_asyncgens())
387     if isinstance(e, KeyboardInterrupt):
388       loop.stop()
389       print('Bye')
390       exit(0)
391     else:
392       print(f'Oops! {e}')
393       print('Restarting process...')
394       sleep(10)