A minimal script using this package is shown below
# Create a new Snack sound object
sound snd -frequency 8000
pack [ button .a -text Record -command {snd record} ]
pack [ button .b -text Stop -command {snd stop} ]
# Create a recognizer linked to the sound object with a simple callback (puts) that just prints the recognizer output each time the sound changes.
recognizer test.LEX tri.HMM snd puts
snd read sentence.wav
the file sentence.wav
is read and processed and the resulting output of the speech recognizer
will be printed.
It is possible to set up
several speech recognizers in parallell tracking the same sound object.
These recognizers can have different grammars or even acoustic models trained
on different languages.
typedef struct Recog {
RTStarEngine *engine;
RTStarUtterance *utterance;
Sound *snd;
int id;
Tcl_Obj *cmdPtr;
Tcl_Interp *interp;
int forwardPos;
int nBest;
int outputFormat;
} Recog;
/*
The function that handles the sub-commands of a recognition
object command.
*/
static int
engine_cmd(ClientData clientData, Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[])
{
Recog *r = (Recog *)clientData;
int index;
static char *optionStrings[] = {
"configure", "destroy", NULL
};
enum options {
CONFIG, DESTROY
};
if (objc < 2) {
return TCL_ERROR;
}
if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option",
0,
&index) != TCL_OK) {
return TCL_ERROR;
}
switch ((enum options) index) {
case CONFIG:
{
char *string1 = Tcl_GetStringFromObj(objv[2],
NULL);
char *string2 = Tcl_GetStringFromObj(objv[3],
NULL);
if (strcmp(string1, "NBEST") == 0)
{
if (Tcl_GetIntFromObj(interp, objv[3], &r->nBest) !=
TCL_OK) {
return TCL_ERROR;
}
}
else if (strcmp(string1, "OUTFORMAT")
== 0) {
if (strcmp(string2, "RTO_WORDS") == 0) {
r->outputFormat = RTO_WORDS;
}
if (strcmp(string2, "RTO_WnS") == 0) {
r->outputFormat = RTO_WnS;
}
if (strcmp(string2, "RTO_MIX") == 0) {
r->outputFormat = RTO_MIX;
}
if (strcmp(string2, "RTO_WAVES") == 0) {
r->outputFormat = RTO_WAVES;
}
if (strcmp(string2, "RTO_GRAPH") == 0) {
r->outputFormat = RTO_GRAPH;
}
if (strcmp(string2, "RTO_MINIMAL_GRAPH") == 0) {
r->outputFormat = RTO_MINIMAL_GRAPH;
}
}
else {
RTStarControl(r->engine, string1, string2);
}
break;
}
case DESTROY:
{
char *string = Tcl_GetStringFromObj(objv[0],
NULL);
Tcl_DeleteCommand(interp, string);
Snack_RemoveCallback(r->snd, r->id);
CloseRTStarEngine(r->engine);
ckfree((char *) r);
break;
}
}
return TCL_OK;
}
/*
The callback that is executed whenever there is a change
to
the sound, which the recognizer should process.
*/
static void
ProcessUtterance(ClientData clientData, int flag)
{
Recog *r = (Recog *) clientData;
Sound *s = r->snd;
int len = Snack_GetLength(s) - r->forwardPos;
char *res = NULL;
Tcl_Obj *cmd = NULL;
short *buffer = NULL;
/*
Sound object has changed as a result of, e.g.,
record or read.
Initialize processing of the sound here.
*/
if (flag == SNACK_NEW_SOUND) {
r->utterance = InitRTStarUtterance(r->engine);
}
/*
Process all new sound data. Might be a small
recorded chunk or all of the sound contents.
*/
if (len > 0) {
if ((buffer = (short *) ckalloc(len * sizeof(short)))
== NULL) {
return;
}
Snack_GetSoundData(s, r->forwardPos, buffer,
len * sizeof(short));
RTStarForward(r->utterance, buffer, len);
ckfree((char *) buffer);
}
r->forwardPos = Snack_GetLength(s);
/*
If Snack_GetSoundStatus() returns IDLE, either
stop was called or the
sound changed contents through, e.g., a read
command. Do postprocessing.
*/
if (Snack_GetSoundStatus(s) == IDLE) {
RTStarBackTrace(r->utterance);
res = RTStarOutput(r->utterance, r->nBest, r->outputFormat);
Tcl_Preserve((ClientData) r->interp);
cmd = Tcl_DuplicateObj(r->cmdPtr);
Tcl_AppendStringsToObj(cmd, " {", res, "}",
(char *) NULL);
/*
Execute the callback
*/
if (Tcl_GlobalEvalObj(r->interp, cmd) != TCL_OK)
{
Tcl_AddErrorInfo(r->interp, "\n
(\"command\" script)");
Tcl_BackgroundError(r->interp);
}
Tcl_Release((ClientData) r->interp);
CloseRTStarUtterance(r->utterance);
r->forwardPos = 0;
}
}
int
RTStarEngineCmd(ClientData cdata, Tcl_Interp *interp, int objc,
Tcl_Obj *CONST objv[])
{
static int id = 0;
char name[20];
char *lexnet_name = NULL;
char *annfile_name = NULL;
char *sound_name = NULL;
Recog *r = (Recog *) ckalloc(sizeof(Recog));
sprintf(name, "recognizer%d", ++id);
lexnet_name = Tcl_GetStringFromObj(objv[1], NULL);
annfile_name = Tcl_GetStringFromObj(objv[2], NULL);
sound_name = Tcl_GetStringFromObj(objv[3], NULL);
/*
Initialize a new speech recognition engine.
*/
r->engine = InitRTStarEngine(lexnet_name, annfile_name, HMMCLASSIFIER);
if (r->engine == NULL) {
Tcl_AppendResult(interp, "Error creating RTStarEngine,
check filenames",
NULL);
return TCL_ERROR;
}
/*
Create a struct with info related to the engine.
*/
Tcl_IncrRefCount(objv[4]);
r->cmdPtr = objv[4];
r->interp = interp;
if ((r->snd = Snack_GetSound(interp, sound_name)) == NULL)
{
return TCL_ERROR;
}
r->id = Snack_AddCallback(r->snd, ProcessUtterance, (int
*) r);
r->forwardPos = 0;
r->nBest = 1;
r->outputFormat = RTO_WORDS;
/*
Create an object command for this engine.
*/
Tcl_CreateObjCommand(interp, name, engine_cmd, (ClientData)
r,
(Tcl_CmdDeleteProc
*) NULL);
Tcl_SetObjResult(interp, Tcl_NewStringObj(name, -1));
return TCL_OK;
}
EXPORT(int, Starlite_Init)(Tcl_Interp *interp)
{
int code = Tcl_PkgProvide(interp, "starlite", "1.1");
if (code != TCL_OK) return code;
Tcl_CreateObjCommand(interp, "recognizer", RTStarEngineCmd,
NULL, (Tcl_CmdDeleteProc
*)NULL);
return TCL_OK;
}
EXPORT(int, Starlite_SafeInit)(Tcl_Interp *interp)
{
return Starlite_Init(interp);
}