From 18d594e73c67a54b2d46fe8d520bbbca3927f8b2 Mon Sep 17 00:00:00 2001
From: jwansek <eddie.atten.ea29@gmail.com>
Date: Fri, 1 Mar 2024 14:25:52 +0000
Subject: Added not listening by default, calling the ollama service after a
 transcription

---
 noetic-llama/src/ollamamessages/CMakeLists.txt     |  1 +
 .../src/ollamamessages/msg/WhisperListening.msg    |  1 +
 .../src/ollamawrapper/src/ollamawrapper.py         |  2 +-
 .../src/whisperwrapper/src/whisperwrapper.py       | 28 +++++++++++++++++++++-
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 noetic-llama/src/ollamamessages/msg/WhisperListening.msg

(limited to 'noetic-llama/src')

diff --git a/noetic-llama/src/ollamamessages/CMakeLists.txt b/noetic-llama/src/ollamamessages/CMakeLists.txt
index 7f1e55c..b67d181 100644
--- a/noetic-llama/src/ollamamessages/CMakeLists.txt
+++ b/noetic-llama/src/ollamamessages/CMakeLists.txt
@@ -51,6 +51,7 @@ find_package(catkin REQUIRED COMPONENTS
 add_message_files(
   FILES
   WhisperTranscription.msg
+  WhisperListening.msg
 )
 
 ## Generate services in the 'srv' folder
diff --git a/noetic-llama/src/ollamamessages/msg/WhisperListening.msg b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg
new file mode 100644
index 0000000..e2a1506
--- /dev/null
+++ b/noetic-llama/src/ollamamessages/msg/WhisperListening.msg
@@ -0,0 +1 @@
+bool listening
\ No newline at end of file
diff --git a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
index 627fa58..66843f8 100644
--- a/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
+++ b/noetic-llama/src/ollamawrapper/src/ollamawrapper.py
@@ -15,7 +15,7 @@ import capabilities
 from capabilities import *
 
 ollama_api_url = rospy.get_param("/stt/ollama_api_url", "192.168.122.1:11434")
-base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-q3_K_S")
+base_ollama_model = rospy.get_param("/stt/ollama_base_model", "nexusraven:13b-v2-q2_K")
 
 @dataclass
 class FunctionCapability:
diff --git a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
index 4ebaebf..439c8de 100644
--- a/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
+++ b/noetic-llama/src/whisperwrapper/src/whisperwrapper.py
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 
-from ollamamessages.msg import WhisperTranscription
+from ollamamessages.msg import WhisperTranscription, WhisperListening
+from ollamamessages.srv import OllamaCall, OllamaCallResponse
 
 import speech_recognition as sr
+import threading
 import tempfile
 import requests
 import rospy 
@@ -15,13 +17,22 @@ pause = rospy.get_param("/stt/speech_recogn_pause_time", 0.8)
 energy = rospy.get_param("/stt/speech_recogn_energy", 400) 
 dynamic_energy = rospy.get_param("/stt/speech_recogn_dyn_energy_flag", False)
 microphone_device = rospy.get_param("/stt/microphone_device", 1)
+no_speech_thresh = rospy.get_param("/stt/speech_confidence_thresh", 0.1)
 
 class WhisperWrapper:
+
+    listening = False
+
     def __init__(self) -> None:
         self.transcription_pub = rospy.Publisher("/stt/transcription", WhisperTranscription, queue_size = 1)
+        self.listening_sub = rospy.Subscriber("/stt/listening", WhisperListening, self.listening_sub_cb)
 
         self.record_audio(pause, energy, dynamic_energy, microphone_device)
 
+    def listening_sub_cb(self, set_listening):
+        rospy.loginfo("Set listening = %s" % str(set_listening.listening))
+        self.listening = set_listening.listening
+
     def record_audio(self, pause, energy, dynamic_energy, microphone_device):
         recogniser = sr.Recognizer()
         recogniser.energy_threshold = energy
@@ -33,6 +44,10 @@ class WhisperWrapper:
             while True and not rospy.is_shutdown():
                 audio = recogniser.listen(microphone)
 
+                if not self.listening:
+                    rospy.loginfo("I heard something but I'm stopping here because we've been set to not listen")
+                    continue
+
                 with tempfile.NamedTemporaryFile(mode = "wb", suffix = ".wav", delete = False) as f:
                     audio_path = f.name
                     f.write(audio.get_wav_data())
@@ -55,6 +70,17 @@ class WhisperWrapper:
                         no_speech_prob = o["segments"][0]["no_speech_prob"]
                     )
 
+                    if o["segments"][0]["no_speech_prob"] < no_speech_thresh:
+                        self.run_ollama(o["text"])
+                    else:
+                        rospy.loginfo("Skipped due to low confidence it's actually speech.")
+
+    def run_ollama(self, text):
+        service_call = rospy.ServiceProxy("/stt/ollamacall", OllamaCall)
+        response = service_call(input = text)
+        print(response)
+
+
                     
 
 if __name__ == "__main__":
-- 
cgit v1.2.3