voicecontrol

   1 #!/usr/bin/env python3
   2
   3 import websockets,asyncio
   4 import sys
   5 from pyaudio import PyAudio, Stream, paInt16
   6 from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack
   7 from typing import AsyncGenerator, Generator
   8
   9 from urllib.parse import urlencode, quote
  10 import urllib3, base64, json
  11
  12 import configparser
  13 from os.path import expanduser
  14 from streamp3 import MP3Decoder
  15
  16 from time import time, sleep
  17
  18 import webrtcvad
  19
  20 from pprint import pprint
  21
  22 @contextmanager
  23 def _pyaudio() -> Generator[PyAudio, None, None]:
  24     p = PyAudio()
  25     try:
  26         yield p
  27     finally:
  28         print('Terminating PyAudio object')
  29         p.terminate()
  30
  31 @contextmanager
  32 def _pyaudio_open_stream(p: PyAudio, *args, **kwargs) -> Generator[Stream, None, None]:
  33     s = p.open(*args, **kwargs)
  34     try:
  35         yield s
  36     finally:
  37         print('Closing PyAudio Stream')
  38         s.close()
  39
  40 @asynccontextmanager
  41 async def _polite_websocket(ws: websockets.WebSocketClientProtocol) -> AsyncGenerator[websockets.WebSocketClientProtocol, None]:
  42     try:
  43         yield ws
  44     finally:
  45         print('Terminating connection')
  46         await ws.send('{"eof" : 1}')
  47         print(await ws.recv())
  48
  49 def SkipSource(source,seconds):
  50   global config
  51   try:
  52     if config["debug"]:
  53       print("Skipping: ", seconds)
  54     bufs = int((seconds)*source._rate/source._frames_per_buffer)
  55     for i in range(bufs):
  56       buffer = source.read(source._frames_per_buffer)
  57   except KeyboardInterrupt:
  58     raise
  59   except:
  60     pass
  61
  62 def PlayBack(pyaud, text, mic = None):
  63   global config, last_time
  64
  65   http = urllib3.PoolManager()
  66
  67   playback_url = config["tts_url"]
  68   playback_param = config["tts_param"]
  69
  70   if playback_url and text:
  71
  72     try:
  73
  74       if playback_param:
  75         url = playback_url.format(urlencode({playback_param:text}))
  76       else:
  77         url = playback_url+quote(text)
  78
  79       req = http.request('GET', url, preload_content=False)
  80       decoder = MP3Decoder(req)
  81
  82       speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
  83       pprint(speaker)
  84
  85       for chunk in decoder:
  86         speaker.write(chunk)
  87
  88       sleep(0.1)
  89       speaker.stop_stream()
  90       speaker.close()
  91
  92       elapsed = time() - last_time
  93       last_time = time()
  94
  95       if mic:
  96         SkipSource(mic, elapsed + 0.2)
  97
  98       return elapsed
  99
 100     except KeyboardInterrupt:
 101       raise
 102
 103     except:
 104       raise
 105
 106   else:
 107     return 0
 108
 109 def RunCommand(command, pyaud, mic = None):
 110
 111   global config
 112
 113   http = urllib3.PoolManager()
 114
 115   command_url = config["command_url"]
 116   reply_url = config["reply_url"]
 117   command_user = config["api_user"]
 118   command_pwd = config["api_pwd"]
 119   api_attempts = config["api_attempts"]
 120
 121   if command_url:
 122     try:
 123       if config["debug"]:
 124         print('Preparing command')
 125       if command_user:
 126         my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
 127       else:
 128         my_headers = urllib3.util.make_headers()
 129       my_headers['Content-Type']='text/plain'
 130       my_headers['Accept']='apllication/json'
 131       if config["debug"]:
 132         print('Sending command')
 133       sent = False
 134       for i in range(api_attempts):
 135         try:
 136           http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
 137           sent = True
 138           break
 139         except Exception as e:
 140           print('Exception: '+str(e))
 141           sleep(0.5)
 142       if sent:
 143         if config["debug"]:
 144           print('Command sent')
 145         if reply_url:
 146           sleep(0.5)
 147           res="NULL"
 148           for i in range(api_attempts):
 149             try:
 150               if command_user:
 151                 my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
 152               else:
 153                 my_headers = urllib3.util.make_headers()
 154               req=http.request('GET',reply_url,headers=my_headers).data
 155               res = json.loads(req)['state'].strip()
 156               if config["debug"]:
 157                 print(res)
 158               if not(res == 'NULL'):
 159                 break
 160               sleep(1)
 161             except KeyboardInterrupt:
 162               raise
 163             except Exception as e:
 164               print('Exception: '+str(e))
 165               sleep(1)
 166           if res and not(res=="NULL"):
 167             PlayBack(pyaud, res, mic=mic)
 168           elif res=="NULL":
 169             PlayBack(pyaud, "Сервер не ответил", mic=mic)
 170         if command_user:
 171           my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
 172         else:
 173           my_headers = urllib3.util.make_headers()
 174         my_headers['Content-Type']='text/plain'
 175         my_headers['Accept']='apllication/json'
 176         command=""
 177         http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8'))
 178       else:
 179         PlayBack(pyaud, "Сервер недоступен", mic=mic)
 180     except KeyboardInterrupt:
 181       raise
 182     except Exception as e:
 183       try:
 184         print('Exception: '+str(e))
 185         http.request('POST',command_url, headers=my_headers, body="")
 186       except:
 187         pass
 188
 189 async def ListenPhrase(mic, server):
 190   global config,last_time, vad
 191
 192   frame = 30/1000 # 30 ms
 193   pause = 2
 194   sz = int(mic._rate*frame)
 195   sp = int(pause/frame)
 196
 197   phrase = ""
 198   voice = False
 199
 200   while not phrase:
 201     data = mic.read(sz)
 202     if len(data) == 0:
 203       break
 204     vd = vad.is_speech(data, mic._rate)
 205     if vd and not voice:
 206       voice = True
 207       if config["debug"]:
 208         print("+", end="")
 209       cnt = 0
 210     if voice and not vd:
 211       cnt = cnt + 1
 212       if cnt > sp:
 213         cnt = 0
 214         voice = False
 215         if config["debug"]:
 216           print("-")
 217     if voice:
 218       print("*",end="")
 219       await server.send(data)
 220       datatxt = await server.recv()
 221       data = json.loads(datatxt)
 222       try:
 223         phrase = data["text"]
 224       except:
 225         pass
 226
 227   last_time = time()
 228   return phrase
 229
 230
 231 async def main_loop(uri):
 232
 233   global config, last_time
 234
 235   keyphrase = config["keyphrase"]
 236   rec_attempts = config["rec_attempts"]
 237   commands = config["commands"]
 238
 239
 240   with ExitStack() as audio_stack:
 241     p = audio_stack.enter_context(_pyaudio())
 242
 243     s = audio_stack.enter_context(_pyaudio_open_stream(p,
 244             format = paInt16,
 245             channels = 1,
 246             rate = 16000,
 247             input = True,
 248             frames_per_buffer = 2000))
 249
 250     while True:
 251       try:
 252         async with AsyncExitStack() as web_stack:
 253           ws = await web_stack.enter_async_context(websockets.connect(uri))
 254           print('Type Ctrl-C to exit')
 255           phrases = [] + config["commands"]
 256           phrases.append(config["keyphrase"])
 257           phrases = json.dumps(phrases, ensure_ascii=False)
 258           await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
 259
 260           ws = await web_stack.enter_async_context(_polite_websocket(ws))
 261           while True:
 262             phrase = await ListenPhrase(s, ws)
 263             if config["debug"]:
 264               print(phrase)
 265             if phrase == keyphrase :
 266               print("COMMAND!")
 267               PlayBack(p, "Слушаю!", mic=s)
 268               command = ""
 269
 270               for i in range(rec_attempts):
 271                 phrase = await ListenPhrase(s, ws)
 272                 if config["debug"]:
 273                   print(phrase)
 274                 if (not commands) or (phrase in commands):
 275                   if config["debug"]:
 276                     print("Command: ", phrase)
 277                   command = phrase
 278                   RunCommand(command, p, s)
 279                   break
 280                 else:
 281                   PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
 282               else:
 283                 PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
 284
 285               if not command:
 286                 PlayBack(p, "Так команду и не поняла...", mic=s)
 287       except KeyboardInterrupt:
 288         raise
 289       except Exception as e:
 290         print('Exception: '+str(e))
 291         pass
 292
 293 def get_config(path):
 294
 295   config = configparser.ConfigParser()
 296   config.read(path)
 297
 298   try:
 299     keyphrase = config['vosk']['keyphrase']
 300   except:
 301     print ("Обязательный параметр - ключевое слово - не задан!")
 302     raise
 303
 304   try:
 305     rec_attempts = int(config['vosk']['attempts'])
 306   except:
 307     rec_attempts = 4
 308
 309   try:
 310     vosk_server = config['vosk']['server']
 311   except:
 312     print ("Обязательный параметр - сервер распознавания - не задан!")
 313     raise
 314
 315   try:
 316     command_file=config['commands']['command_file']
 317     with open(command_file) as file:
 318       commands = file.read().splitlines()
 319   except:
 320     commands = None
 321
 322   try:
 323     tts_url=config['rest']['tts_url']
 324   except:
 325     tts_url = None
 326
 327   try:
 328     tts_param=config['rest']['tts_param']
 329   except:
 330     tts_param = None
 331
 332   try:
 333     api_attempts=int(config['rest']['attempts'])
 334   except:
 335     api_attempts = 2
 336
 337   try:
 338     api_user=config['rest']['api_user']
 339     api_pwd=config['rest']['api_pwd']
 340   except:
 341     api_user = None
 342     api_pwd = None
 343
 344   try:
 345     command_url=config['rest']['command_url']
 346   except:
 347     command_url = None
 348
 349   try:
 350     reply_url=config['rest']['reply_url']
 351   except:
 352     reply_url = None
 353
 354   try:
 355     vad_mode=config['vad']['agressive']
 356   except:
 357     vad_mode = 3
 358
 359   try:
 360     debug = (config['system']['debug'].lower() == "true")
 361   except:
 362     debug = False
 363
 364   if command_file:
 365     with open(command_file) as file:
 366       commands = file.read().splitlines()
 367
 368   return {
 369       "asr_server": vosk_server,
 370       "keyphrase": keyphrase,
 371       "rec_attempts": rec_attempts,
 372       "tts_url": tts_url,
 373       "tts_param": tts_param,
 374       "api_attempts": api_attempts,
 375       "api_user": api_user,
 376       "api_pwd": api_pwd,
 377       "command_url": command_url,
 378       "reply_url": reply_url,
 379       "debug": debug,
 380       "commands": commands,
 381       "vad_mode": vad_mode
 382     }
 383
 384
 385 if len(sys.argv) == 2:
 386     conf_file = sys.argv[1]
 387 else:
 388     conf_file = expanduser("~")+"/.config/voicecontrol.ini"
 389
 390 config = get_config(conf_file)
 391
 392 server = config['asr_server']
 393
 394 vad = webrtcvad.Vad(config['vad_mode'])
 395 last_time = time()
 396
 397 while True:
 398
 399   try:
 400
 401     loop = asyncio.get_event_loop()
 402     loop.run_until_complete(
 403         main_loop(f'ws://' + server))
 404
 405   except (Exception, KeyboardInterrupt) as e:
 406     raise
 407     loop.run_until_complete(
 408       loop.shutdown_asyncgens())
 409     if isinstance(e, KeyboardInterrupt):
 410       loop.stop()
 411       print('Bye')
 412       exit(0)
 413     else:
 414       print(f'Oops! {e}')
 415       print('Restarting process...')
 416       sleep(10)