diff --git a/main.py b/main.py index e5bbdf1..0a284b7 100644 --- a/main.py +++ b/main.py @@ -11,8 +11,11 @@ import json from vosk import Model, KaldiRecognizer import time +latest_partial = {"partial": ""} +latest_result = {"text": ""} + def speech_loop(): - global latest_speech + global latest_partial, latest_result model = Model("./vosk-model-small-en-us-0.15") rec = KaldiRecognizer(model, 16000) q = queue.Queue() @@ -27,13 +30,14 @@ def speech_loop(): while True: data = q.get() if rec.AcceptWaveform(data): - result = json.loads(rec.Result()) - latest_speech = result + result = json.loads(rec.Result()) + latest_result = result print(".", result) else: partial = json.loads(rec.PartialResult()) - latest_speech = partial - print("...", partial, end='\r') + latest_partial = partial + print("...", partial.get("partial", ""), end='\r') + # --- RetinaFace Utilities --- @@ -172,14 +176,14 @@ def background_loop(): conf = data[4] box_center = np.array([(x1 + x2) / 2, (y1 + y2) / 2]) offset = box_center - frame_center - # face_data.append({ - # "box": [x1, y1, x2, y2], - # "confidence": float(conf), - # "offset_from_center": { - # "x": float(offset[0]), - # "y": float(offset[1]) - # } - # }) + face_data.append({ + "box": [x1, y1, x2, y2], + "confidence": float(conf), + "offset_from_center": { + "x": float(offset[0]), + "y": float(offset[1]) + } + }) cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2) cv2.putText(frame, f'{conf:.4f}', (x1, y1 + 12), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) for j in range(5): @@ -201,29 +205,57 @@ app = Flask(__name__) @app.route('/') def index(): return ''' - - RetinaFace + Speech - -

Live Stream

- -

Live Speech

-
- - - +

Little Sophias Inner Thoughts

+ + +

Voice-To-Text

+
Partial:
+
Final:
+ +

Face Detection

+
Count:
+
Offsets: + +
+ + ''' +@app.route('/video_feed') +def video_feed(): + return Response(stream_frames(), mimetype='multipart/x-mixed-replace; boundary=frame') + + def stream_frames(): while True: if latest_frame: @@ -236,7 +268,11 @@ def get_faces(): @app.route('/speech') def get_speech(): - return jsonify(latest_speech) + return jsonify({ + "partial": latest_partial.get("partial", ""), + "text": latest_result.get("text", "") + }) + # --- Start Background Thread ---