Переделано под свежий Python и Vosk

[voicecontrol.git] / voicecontrol
diff --git a/voicecontrol b/voicecontrol

index fafda2c756ffc60ec5848b31be73698823cfb622..6b4f17bc8417c419a4223883209155355711b248 100755 (executable)
--- a/voicecontrol
+++ b/voicecontrol
@@ -1,9 +1,9 @@
-#!/usr/bin/env python3.8
+#!/usr/bin/env python3
  
  import websockets,asyncio
  import sys
  from pyaudio import PyAudio, Stream, paInt16
-from contextlib import asynccontextmanager, contextmanager, AsyncExitStack
+from contextlib import asynccontextmanager, contextmanager, AsyncExitStack, ExitStack
  from typing import AsyncGenerator, Generator
  
  from urllib.parse import urlencode, quote
@@ -17,6 +17,8 @@ from time import time, sleep
  
  import webrtcvad
  
+from pprint import pprint
+
  @contextmanager
  def _pyaudio() -> Generator[PyAudio, None, None]:
      p = PyAudio()
@@ -48,7 +50,7 @@ def SkipSource(source,seconds):
    global config
    try:
      if config["debug"]:
-     print("Skipping: ", seconds)
+      print("Skipping: ", seconds)
      bufs = int((seconds)*source._rate/source._frames_per_buffer)
      for i in range(bufs):
        buffer = source.read(source._frames_per_buffer)
@@ -78,6 +80,7 @@ def PlayBack(pyaud, text, mic = None):
        decoder = MP3Decoder(req)
  
        speaker = pyaud.open(output=True, format=paInt16, channels=decoder.num_channels, rate=decoder.sample_rate)
+      pprint(speaker)
  
        for chunk in decoder:
          speaker.write(chunk)
@@ -90,7 +93,7 @@ def PlayBack(pyaud, text, mic = None):
        last_time = time()
  
        if mic:
-        SkipSource(mic, elapsed + 0.5)
+        SkipSource(mic, elapsed + 0.2)
  
        return elapsed
  
@@ -98,7 +101,7 @@ def PlayBack(pyaud, text, mic = None):
        raise
  
      except:
-      pass
+      raise
  
    else:
      return 0
@@ -117,42 +120,68 @@ def RunCommand(command, pyaud, mic = None):
  
    if command_url:
      try:
+      if config["debug"]:
+        print('Preparing command')
        if command_user:
          my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
        else:
          my_headers = urllib3.util.make_headers()  
        my_headers['Content-Type']='text/plain'
        my_headers['Accept']='apllication/json'
-      http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
-      if reply_url:
-        sleep(0.5)
-        res="NULL"
-        for i in range(api_attempts):
-          try:
-            if command_user:
-              my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
-            else:
-              my_headers = urllib3.util.make_headers()  
-            req=http.request('GET',reply_url,headers=my_headers).data
-            res = json.loads(req)['state'].strip()
-            if config["debug"]:
-              print(res)
-            if not(res == 'NULL'):
-              break
-            sleep(1)  
-          except KeyboardInterrupt:
-            raise
-          except:  
-            sleep(1)
-        if res and not(res=="NULL"):
-          PlayBack(pyaud, res, mic=mic)
-        elif res=="NULL":
-          PlayBack(pyaud, "Сервер не ответил", mic=mic)  
-      http.request('POST',command_url, headers=my_headers, body="")
+      if config["debug"]:
+        print('Sending command')
+      sent = False
+      for i in range(api_attempts):
+        try:
+          http.request('POST',command_url,headers=my_headers,body=command.encode('UTF-8'))
+          sent = True
+          break
+        except Exception as e:
+          print('Exception: '+str(e))
+          sleep(0.5)
+      if sent:
+        if config["debug"]:
+          print('Command sent')
+        if reply_url:
+          sleep(0.5)
+          res="NULL"
+          for i in range(api_attempts):
+            try:
+              if command_user:
+                my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
+              else:
+                my_headers = urllib3.util.make_headers()  
+              req=http.request('GET',reply_url,headers=my_headers).data
+              res = json.loads(req)['state'].strip()
+              if config["debug"]:
+                print(res)
+              if not(res == 'NULL'):
+                break
+              sleep(1)  
+            except KeyboardInterrupt:
+              raise
+            except Exception as e:
+              print('Exception: '+str(e))
+              sleep(1)
+          if res and not(res=="NULL"):
+            PlayBack(pyaud, res, mic=mic)
+          elif res=="NULL":
+            PlayBack(pyaud, "Сервер не ответил", mic=mic)  
+        if command_user:
+          my_headers = urllib3.util.make_headers(basic_auth=command_user+':'+command_pwd)
+        else:
+          my_headers = urllib3.util.make_headers()  
+        my_headers['Content-Type']='text/plain'
+        my_headers['Accept']='apllication/json'
+        command=""
+        http.request('POST',command_url, headers=my_headers, body=command.encode('UTF-8'))
+      else:
+        PlayBack(pyaud, "Сервер недоступен", mic=mic)
      except KeyboardInterrupt:
        raise
-    except:
+    except Exception as e:
        try:
+        print('Exception: '+str(e))
          http.request('POST',command_url, headers=my_headers, body="")
        except:  
          pass
@@ -165,104 +194,101 @@ async def ListenPhrase(mic, server):
    sz = int(mic._rate*frame)
    sp = int(pause/frame)
  
-  try:
-
-    phrase = ""
-    voice = False
+  phrase = ""
+  voice = False
  
-    while not phrase:
-      data = mic.read(sz)
-      if len(data) == 0:
-        break
-      vd = vad.is_speech(data, mic._rate)
-      if vd and not voice:
-        voice = True
-        if config["debug"]:
-          print("+", end="")
+  while not phrase:
+    data = mic.read(sz)
+    if len(data) == 0:
+      break
+    vd = vad.is_speech(data, mic._rate)
+    if vd and not voice:
+      voice = True
+      if config["debug"]:
+        print("+", end="")
+      cnt = 0
+    if voice and not vd:
+      cnt = cnt + 1
+      if cnt > sp:
          cnt = 0
-      if voice and not vd:
-        cnt = cnt + 1
-        if cnt > sp:
-          cnt = 0
-          voice = False
-          if config["debug"]:
-            print("-")
-      if voice:
-        print("*",end="")
-        await server.send(data)
-        datatxt = await server.recv()
-        data = json.loads(datatxt)
-        try:
-          phrase = data["text"]
-          confidence = min(map(lambda x: x["conf"], data["result"]))
-        except:
-          pass  
+        voice = False
+        if config["debug"]:
+          print("-")
+    if voice:
+      print("*",end="")
+      await server.send(data)
+      datatxt = await server.recv()
+      data = json.loads(datatxt)
+      try:
+        phrase = data["text"]
+      except:
+        pass  
    
-    last_time = time()
-
-    return phrase, confidence
-
-  except KeyboardInterrupt:
-    raise
-  except websockets.exceptions.ConnectionClosedError:
-    raise  
-  except:
-    raise
-    return '',0
+  last_time = time()
+  return phrase
  
  
-async def hello(uri):
+async def main_loop(uri):
  
    global config, last_time
  
    keyphrase = config["keyphrase"]
-  confidence_treshold = config["confidence_treshold"]
    rec_attempts = config["rec_attempts"]
    commands = config["commands"]
  
-  async with AsyncExitStack() as stack:
-    ws = await stack.enter_async_context(websockets.connect(uri))
-    print('Type Ctrl-C to exit')
-    phrases = config["commands"]
-    phrases.append(config["keyphrase"])
-    phrases = json.dumps(phrases, ensure_ascii=False)
-    await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
-
-    ws = await stack.enter_async_context(_polite_websocket(ws))
-    p = stack.enter_context(_pyaudio())
-    s = stack.enter_context(_pyaudio_open_stream(p,
+  
+  with ExitStack() as audio_stack:
+    p = audio_stack.enter_context(_pyaudio())
+
+    s = audio_stack.enter_context(_pyaudio_open_stream(p,
              format = paInt16, 
              channels = 1,
              rate = 16000,
              input = True, 
              frames_per_buffer = 2000))
-    while True:
-      phrase, confidence = await ListenPhrase(s, ws)
-      if config["debug"]:
-        print(phrase,confidence)
-      if phrase == keyphrase and confidence>=confidence_treshold :
-        PlayBack(p, "Я жду команду", mic=s)
-        command = ""
-
-        for i in range(rec_attempts):
-          phrase, confidence = await ListenPhrase(s, ws)
-          if config["debug"]:
-            print(phrase,confidence)
-          if confidence > confidence_treshold:
-            if (not commands) or (phrase in commands):
-              if config["debug"]:
-                print("Command: ", phrase)
-              command = phrase
-              RunCommand(command, p, s)
-              break
-            else:
-              PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
-          else:
-            PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
-
-        if not command:
-          PlayBack(p, "Так команду и не поняла...", mic=s)
  
+    while True:
+      try:    
+        async with AsyncExitStack() as web_stack:
+          ws = await web_stack.enter_async_context(websockets.connect(uri))
+          print('Type Ctrl-C to exit')
+          phrases = [] + config["commands"]
+          phrases.append(config["keyphrase"])
+          phrases = json.dumps(phrases, ensure_ascii=False)
+          await ws.send('{"config" : { "phrase_list" : '+phrases+', "sample_rate" : 16000.0}}')
+
+          ws = await web_stack.enter_async_context(_polite_websocket(ws))
+          while True:
+            phrase = await ListenPhrase(s, ws)
+            if config["debug"]:
+              print(phrase)
+            if phrase == keyphrase :
+              print("COMMAND!")
+              PlayBack(p, "Слушаю!", mic=s)
+              command = ""
+  
+              for i in range(rec_attempts):
+                phrase = await ListenPhrase(s, ws)
+                if config["debug"]:
+                  print(phrase)
+                if (not commands) or (phrase in commands):
+                  if config["debug"]:
+                    print("Command: ", phrase)
+                  command = phrase
+                  RunCommand(command, p, s)
+                  break
+                else:
+                  PlayBack(p, "Не знаю такой команды: "+phrase, mic=s)
+              else:
+                PlayBack(p, "Не поняла, слишком неразборчиво", mic=s)
+
+              if not command:
+                PlayBack(p, "Так команду и не поняла...", mic=s)
+      except KeyboardInterrupt:
+        raise
+      except Exception as e:
+        print('Exception: '+str(e))
+        pass
               
  def get_config(path):
    
@@ -280,11 +306,6 @@ def get_config(path):
    except:
      rec_attempts = 4
  
-  try:  
-    confidence_treshold = float(config['vosk']['confidence_treshold'])
-  except:
-    confidence_treshold = 0.4
-
    try:
      vosk_server = config['vosk']['server']
    except:
@@ -348,7 +369,6 @@ def get_config(path):
        "asr_server": vosk_server,
        "keyphrase": keyphrase,
        "rec_attempts": rec_attempts,
-      "confidence_treshold": confidence_treshold,
        "tts_url": tts_url,
        "tts_param": tts_param,
        "api_attempts": api_attempts,
@@ -372,6 +392,7 @@ config = get_config(conf_file)
  server = config['asr_server']
  
  vad = webrtcvad.Vad(config['vad_mode'])
+last_time = time()
  
  while True:
  
@@ -379,9 +400,10 @@ while True:
  
      loop = asyncio.get_event_loop()
      loop.run_until_complete(
-        hello(f'ws://' + server))
+        main_loop(f'ws://' + server))
  
    except (Exception, KeyboardInterrupt) as e:
+    raise
      loop.run_until_complete(
        loop.shutdown_asyncgens())
      if isinstance(e, KeyboardInterrupt):