dragonfire/sr/kaldi.py
# -*- coding: UTF-8 -*-
from __future__ import absolute_import
# Operates on sound fragments consisting of signed integer samples 8, 16
# or 32 bits wide, stored in Python strings.
import audioop
import os
import time
from contextlib import contextmanager
from ctypes import CFUNCTYPE, c_char_p, c_int, cdll
from threading import Thread
import pyaudio # Provides Python bindings for PortAudio, the cross platform audio API
from dragonfire import VirtualAssistant
from dragonfire.sr import noalsaerr
from gi.repository import GObject
from .decoder import DecoderPipeline
CHUNK = 8000 # Smallest unit of audio. 1024 bytes
FORMAT = pyaudio.paInt16 # Data format
CHANNELS = 1 # Number of channels
RATE = 16000 # Bit Rate of audio stream / Frame Rate
THRESHOLD = 1000 # Threshhold value for detecting stimulant
SILENCE_DETECTION = 5 # Wait number of frames to decide whether it fell silent or not
LISTENING = False
ENGLISH_MODEL_PATH = os.path.dirname(
os.path.realpath(__file__)) + "/models/english/"
class KaldiRecognizer():
def __init__(self):
# logging.basicConfig(level=logging.INFO)
# voxforge/tri2b_mmi_b0.05 model:
decoder_conf = {
"model": ENGLISH_MODEL_PATH + "final.mdl",
"lda-mat": ENGLISH_MODEL_PATH + "final.mat",
"word-syms": ENGLISH_MODEL_PATH + "words.txt",
"fst": ENGLISH_MODEL_PATH + "HCLG.fst",
"silence-phones": "6"
}
self.decoder_pipeline = DecoderPipeline({"decoder": decoder_conf})
self.__class__.words = []
self.__class__.finished = False
self.decoder_pipeline.set_word_handler(self.word_getter)
self.decoder_pipeline.set_eos_handler(self.set_finished, self.finished)
GObject.threads_init()
self.loop = GObject.MainLoop()
self.gi_thread = Thread(target=self.loop.run, args=())
self.gi_thread.start()
@classmethod
def word_getter(cls, word):
cls.words.append(word)
@classmethod
def set_finished(cls, finished):
cls.finished = True
def reset(self):
self.__class__.words = []
self.__class__.finished = False
def recognize(self, args, userin, user_full_name, user_prefix):
with noalsaerr():
p = pyaudio.PyAudio() # Create a PyAudio session
# Create a stream
stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
try:
data = stream.read(
CHUNK) # Get first data frame from the microphone
# Loop over the frames of the audio / data chunks
while data != '':
rms = audioop.rms(
data, 2) # Calculate Root Mean Square of current chunk
if rms >= THRESHOLD: # If Root Mean Square value is greater than THRESHOLD constant
self.decoder_pipeline.init_request(
"recognize",
"audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1"
)
self.decoder_pipeline.process_data(data)
silence_counter = 0 # Define silence counter
# While silence counter value less than SILENCE_DETECTION constant
while silence_counter < SILENCE_DETECTION:
data = stream.read(
CHUNK) # Read a new chunk from the stream
if LISTENING:
stream.write(data, CHUNK)
self.decoder_pipeline.process_data(data)
rms = audioop.rms(
data, 2
) # Calculate Root Mean Square of current chunk again
if rms < THRESHOLD: # If Root Mean Square value is less than THRESHOLD constant
silence_counter += 1 # Then increase silence counter
else: # Else
silence_counter = 0 # Assign zero value to silence counter
stream.stop_stream()
self.decoder_pipeline.end_request()
while not self.finished:
time.sleep(0.1)
stream.start_stream()
words = self.words
words = [x for x in words if x != '<#s>']
com = ' '.join(words)
her = VirtualAssistant(args, userin, user_full_name, user_prefix)
t = Thread(target=her.command, args=(com,))
t.start()
self.reset()
data = stream.read(CHUNK) # Read a new chunk from the stream
if LISTENING:
stream.write(data, CHUNK)
except KeyboardInterrupt:
stream.stop_stream()
stream.close()
p.terminate()
self.loop.quit()
raise KeyboardInterrupt
ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
def py_error_handler(filename, line, function, err, fmt):
pass
c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
if __name__ == '__main__':
recognizer = KaldiRecognizer()
recognizer.recognize()