#How to use #Vosk -- the Offline Speech Recognition Library for Python #https://www.youtube.com/watch?v=3Mga7_8bYpw #how to load wav file #https://towardsdatascience.com/transcribe-large-audio-files-offline-with-vosk-a77ee8f7aa28 import os import sys import json #from vosk import Model, KaldiRecognizer from vosk import Model, KaldiRecognizer, SetLogLevel import pyaudio import wave import json import ssl ssl._create_default_https_context = ssl._create_unverified_context #wav_file = 'test.wav' wav_file = "00_00_00_Nameless_Neutral__Yes,[breath] I'm planning to get out of here, [breath]and if you want to join me, come down to the harbor. We'll meet at the ship.wav" #wf = wave.open(wav_file, "rb") wf = wave.open(wav_file, "rb") theModelDir = (r"E:\Amo_Files\ZDIECIA\Reaction_FACES\ponyface\ai\_whl\vosk-model-en-us-0.22") audioFrameHz = 48000 model = Model(theModelDir, model_name=None, lang=None) #recognizer = KaldiRecognizer(model, audioFrameHz) rec = KaldiRecognizer(model, wf.getframerate()) transcription = [] while True: data = wf.readframes(4000) if len(data) == 0: break if rec.AcceptWaveform(data): # Convert json output to dict result_dict = json.loads(rec.Result()) # Extract text values and append them to transcription list transcription.append(result_dict.get("text", "")) final_result = json.loads(rec.FinalResult()) transcription.append(final_result.get("text", "")) # merge or join all list elements to one big string transcription_text = ' '.join(transcription) print(transcription_text) x=""" while True: data = stream.read(4096) #if len(data) ==0: #break if recognizer.AcceptWaveform(data): text = recognizer.Result() print(text) print(text[14:-3]) """