47 #include <sphinxbase/err.h>
48 #include <sphinxbase/strfuncs.h>
49 #include <sphinxbase/filename.h>
50 #include <sphinxbase/pio.h>
51 #include <sphinxbase/jsgf.h>
52 #include <sphinxbase/hash_table.h>
55 #include "cmdln_macro.h"
60 #include "kws_search.h"
61 #include "fsg_search_internal.h"
65 #include "allphone_search.h"
67 static const arg_t ps_args_def[] = {
74 file_exists(
const char *path)
78 tmp = fopen(path,
"rb");
85 hmmdir_exists(
const char *path)
88 char *mdef = string_join(path,
"/mdef", NULL);
90 tmp = fopen(mdef,
"rb");
99 const char *hmmdir,
const char *file)
101 char *tmp = string_join(hmmdir,
"/", file, NULL);
103 if (cmd_ln_str_r(ps->
config, arg) == NULL && file_exists(tmp))
104 cmd_ln_set_str_r(ps->
config, arg, tmp);
109 static const arg_t feat_defn[] = {
110 waveform_to_cepstral_command_line_macro(),
111 cepstral_to_feature_command_line_macro(),
118 char const *hmmdir, *featparams;
121 #ifdef __ADSPBLACKFIN__
122 E_INFO(
"Will not use mmap() on uClinux/Blackfin.");
123 cmd_ln_set_boolean_r(ps->
config,
"-mmap", FALSE);
127 if ((hmmdir = cmd_ln_str_r(ps->
config,
"-hmm")) != NULL) {
128 ps_add_file(ps,
"-mdef", hmmdir,
"mdef");
129 ps_add_file(ps,
"-mean", hmmdir,
"means");
130 ps_add_file(ps,
"-var", hmmdir,
"variances");
131 ps_add_file(ps,
"-tmat", hmmdir,
"transition_matrices");
132 ps_add_file(ps,
"-mixw", hmmdir,
"mixture_weights");
133 ps_add_file(ps,
"-sendump", hmmdir,
"sendump");
134 ps_add_file(ps,
"-fdict", hmmdir,
"noisedict");
135 ps_add_file(ps,
"-lda", hmmdir,
"feature_transform");
136 ps_add_file(ps,
"-featparams", hmmdir,
"feat.params");
137 ps_add_file(ps,
"-senmgau", hmmdir,
"senmgau");
141 if ((featparams = cmd_ln_str_r(ps->
config,
"-featparams"))) {
143 cmd_ln_parse_file_r(ps->
config, feat_defn, featparams, FALSE))
144 E_INFO(
"Parsed model-specific feature parameters from %s\n",
149 if (err_get_logfp() != NULL) {
150 cmd_ln_print_values_r(ps->
config, err_get_logfp(),
ps_args());
158 hash_iter_t *search_it;
159 for (search_it = hash_table_iter(ps->
searches); search_it;
160 search_it = hash_table_iter_next(search_it)) {
161 ps_search_free(hash_entry_val(search_it->ent));
174 hash_table_lookup(ps->
searches, name, &search);
184 const char *hmmdir = cmd_ln_str_r(config,
"-hmm");
185 if (hmmdir == NULL && hmmdir_exists(MODELDIR
"/en-us/en-us")) {
186 hmmdir = MODELDIR
"/en-us/en-us";
187 cmd_ln_set_str_r(config,
"-hmm", hmmdir);
190 const char *lmfile = cmd_ln_str_r(config,
"-lm");
192 if (lmfile == NULL && !cmd_ln_str_r(config,
"-fsg")
193 && !cmd_ln_str_r(config,
"-jsgf")
194 && !cmd_ln_str_r(config,
"-lmctl")
195 && !cmd_ln_str_r(config,
"-kws")
196 && !cmd_ln_str_r(config,
"-keyphrase")
197 && file_exists(MODELDIR
"/en-us/en-us.lm.bin")) {
198 lmfile = MODELDIR
"/en-us/en-us.lm.bin";
199 cmd_ln_set_str_r(config,
"-lm", lmfile);
202 const char *dictfile = cmd_ln_str_r(config,
"-dict");
203 if (dictfile == NULL && file_exists(MODELDIR
"/en-us/cmudict-en-us.dict")) {
204 dictfile = MODELDIR
"/en-us/cmudict-en-us.dict";
205 cmd_ln_set_str_r(config,
"-dict", dictfile);
210 if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
211 char *tmphmm = string_join(MODELDIR
"/hmm/", hmmdir, NULL);
212 if (hmmdir_exists(tmphmm)) {
213 cmd_ln_set_str_r(config,
"-hmm", tmphmm);
215 E_ERROR(
"Failed to find mdef file inside the model folder "
216 "specified with -hmm `%s'\n", hmmdir);
220 if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
221 char *tmplm = string_join(MODELDIR
"/lm/", lmfile, NULL);
222 cmd_ln_set_str_r(config,
"-lm", tmplm);
225 if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
226 char *tmpdict = string_join(MODELDIR
"/lm/", dictfile, NULL);
227 cmd_ln_set_str_r(config,
"-dict", tmpdict);
237 const char *keyphrase;
240 if (config && config != ps->
config) {
241 cmd_ln_free_r(ps->
config);
242 ps->
config = cmd_ln_retain(config);
245 err_set_debug_level(cmd_ln_int32_r(ps->
config,
"-debug"));
248 if (config && cmd_ln_str_r(ps->
config,
"-logfn")) {
249 if (err_set_logfile(cmd_ln_str_r(ps->
config,
"-logfn")) < 0) {
250 E_ERROR(
"Cannot redirect log output\n");
260 ps_expand_model_config(ps);
263 ps_free_searches(ps);
264 ps->
searches = hash_table_new(3, HASH_CASE_YES);
279 if (ps->
lmath == NULL
280 || (logmath_get_base(ps->
lmath) !=
281 (float64)cmd_ln_float32_r(ps->
config,
"-logbase"))) {
283 logmath_free(ps->
lmath);
284 ps->
lmath = logmath_init
285 ((float64)cmd_ln_float32_r(ps->
config,
"-logbase"), 0,
286 cmd_ln_boolean_r(ps->
config,
"-bestpath"));
296 if (cmd_ln_int32_r(ps->
config,
"-pl_window") > 0) {
314 lw = cmd_ln_float32_r(ps->
config,
"-lw");
320 if ((keyphrase = cmd_ln_str_r(ps->
config,
"-keyphrase"))) {
326 if ((path = cmd_ln_str_r(ps->
config,
"-kws"))) {
333 if ((path = cmd_ln_str_r(ps->
config,
"-fsg"))) {
334 fsg_model_t *fsg = fsg_model_readfile(path, ps->
lmath, lw);
346 if ((path = cmd_ln_str_r(ps->
config,
"-jsgf"))) {
352 if ((path = cmd_ln_str_r(ps->
config,
"-allphone"))) {
358 if ((path = cmd_ln_str_r(ps->
config,
"-lm")) &&
359 !cmd_ln_boolean_r(ps->
config,
"-allphone")) {
365 if ((path = cmd_ln_str_r(ps->
config,
"-lmctl"))) {
367 ngram_model_t *lmset;
368 ngram_model_set_iter_t *lmset_it;
370 if (!(lmset = ngram_model_set_read(ps->
config, path, ps->
lmath))) {
371 E_ERROR(
"Failed to read language model control file: %s\n", path);
375 for(lmset_it = ngram_model_set_iter(lmset);
376 lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) {
377 ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name);
378 E_INFO(
"adding search %s\n", name);
380 ngram_model_set_iter_free(lmset_it);
381 ngram_model_free(lmset);
385 ngram_model_free(lmset);
387 name = cmd_ln_str_r(ps->
config,
"-lmname");
391 E_ERROR(
"No default LM name (-lmname) for `-lmctl'\n");
397 ps->
perf.name =
"decode";
398 ptmr_init(&ps->
perf);
409 E_ERROR(
"No configuration specified");
413 ps = ckd_calloc(1,
sizeof(*ps));
442 ps_free_searches(ps);
446 logmath_free(ps->
lmath);
447 cmd_ln_free_r(ps->
config);
491 if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
503 hash_iter_t *search_it;
504 const char* name = NULL;
505 for (search_it = hash_table_iter(ps->
searches); search_it;
506 search_it = hash_table_iter_next(search_it)) {
507 if (hash_entry_val(search_it->ent) == ps->
search) {
508 name = hash_entry_key(search_it->ent);
523 ps_search_free(search);
542 return (
const char*)(((hash_iter_t *)itor)->ent->key);
548 hash_table_iter_free((hash_iter_t *)itor);
555 if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search)))
564 if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search)))
573 if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search)))
575 return search ? kws_search_get_keywords(search) : NULL;
587 old_search = (
ps_search_t *) hash_table_replace(ps->
searches, ps_search_name(search), search);
588 if (old_search != search)
589 ps_search_free(old_search);
599 return set_search_internal(ps, search);
608 lm = ngram_model_read(ps->
config, path, NGRAM_AUTO, ps->
lmath);
613 ngram_model_free(lm);
622 return set_search_internal(ps, search);
633 lm = ngram_model_read(ps->
config, path, NGRAM_AUTO, ps->
lmath);
636 ngram_model_free(lm);
644 search = kws_search_init(name, NULL, keyfile, ps->
config, ps->
acmod, ps->
dict, ps->
d2p);
645 return set_search_internal(ps, search);
652 search = kws_search_init(name, keyphrase, NULL, ps->
config, ps->
acmod, ps->
dict, ps->
d2p);
653 return set_search_internal(ps, search);
661 return set_search_internal(ps, search);
670 jsgf_t *jsgf = jsgf_parse_file(path, NULL);
679 if ((toprule = cmd_ln_str_r(ps->
config,
"-toprule"))) {
680 rule = jsgf_get_rule(jsgf, toprule);
682 E_ERROR(
"Start rule %s not found\n", toprule);
683 jsgf_grammar_free(jsgf);
687 rule = jsgf_get_public_rule(jsgf);
689 E_ERROR(
"No public rules found in %s\n", path);
690 jsgf_grammar_free(jsgf);
695 lw = cmd_ln_float32_r(ps->
config,
"-lw");
696 fsg = jsgf_build_fsg(jsgf, rule, ps->
lmath, lw);
699 jsgf_grammar_free(jsgf);
709 jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
718 if ((toprule = cmd_ln_str_r(ps->
config,
"-toprule"))) {
719 rule = jsgf_get_rule(jsgf, toprule);
721 E_ERROR(
"Start rule %s not found\n", toprule);
725 rule = jsgf_get_public_rule(jsgf);
727 E_ERROR(
"No public rules found in input string\n");
732 lw = cmd_ln_float32_r(ps->
config,
"-lw");
733 fsg = jsgf_build_fsg(jsgf, rule, ps->
lmath, lw);
742 char const *fdictfile,
char const *format)
747 hash_iter_t *search_it;
751 newconfig = cmd_ln_init(NULL,
ps_args(), TRUE, NULL);
752 cmd_ln_set_boolean_r(newconfig,
"-dictcase",
753 cmd_ln_boolean_r(ps->
config,
"-dictcase"));
754 cmd_ln_set_str_r(newconfig,
"-dict", dictfile);
756 cmd_ln_set_str_r(newconfig,
"-fdict", fdictfile);
758 cmd_ln_set_str_r(newconfig,
"-fdict",
759 cmd_ln_str_r(ps->
config,
"-fdict"));
763 cmd_ln_free_r(newconfig);
769 cmd_ln_free_r(newconfig);
775 cmd_ln_free_r(newconfig);
776 cmd_ln_set_str_r(ps->
config,
"-dict", dictfile);
778 cmd_ln_set_str_r(ps->
config,
"-fdict", fdictfile);
785 for (search_it = hash_table_iter(ps->
searches); search_it;
786 search_it = hash_table_iter_next(search_it)) {
787 if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
788 hash_table_iter_free(search_it);
811 hash_iter_t *search_it;
812 char **phonestr, *tmp;
816 tmp = ckd_salloc(phones);
817 np = str2words(tmp, NULL, 0);
818 phonestr = ckd_calloc(np,
sizeof(*phonestr));
819 str2words(tmp, phonestr, np);
820 pron = ckd_calloc(np,
sizeof(*pron));
821 for (i = 0; i < np; ++i) {
824 E_ERROR(
"Unknown phone %s in phone string %s\n",
848 for (search_it = hash_table_iter(ps->
searches); search_it;
849 search_it = hash_table_iter_next(search_it)) {
850 ps_search_t *search = hash_entry_val(search_it->ent);
851 if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) {
853 if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) {
854 hash_table_iter_free(search_it);
860 if ((rv = ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)) {
861 hash_table_iter_free(search_it);
883 for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j)
885 phones = ckd_calloc(1, phlen);
886 for (j = 0; j < dict_pronlen(dict, wid); ++j) {
888 if (j != dict_pronlen(dict, wid) - 1)
899 long total, pos, endpos;
906 if (maxsamps != -1) {
907 data = ckd_calloc(maxsamps,
sizeof(*data));
908 total = fread(data,
sizeof(*data), maxsamps, rawfh);
911 }
else if ((pos = ftell(rawfh)) >= 0) {
912 fseek(rawfh, 0, SEEK_END);
913 endpos = ftell(rawfh);
914 fseek(rawfh, pos, SEEK_SET);
915 maxsamps = endpos - pos;
917 data = ckd_calloc(maxsamps,
sizeof(*data));
918 total = fread(data,
sizeof(*data), maxsamps, rawfh);
924 while (!feof(rawfh)) {
928 nread = fread(data,
sizeof(*data),
sizeof(data)/
sizeof(*data), rawfh);
951 E_ERROR(
"Utterance already started\n");
956 E_ERROR(
"No search module is selected, did you forget to "
957 "specify a language model or grammar?\n");
961 ptmr_reset(&ps->
perf);
962 ptmr_start(&ps->
perf);
964 sprintf(uttid,
"%09u", ps->
uttno);
979 char *logfn = string_join(ps->
mfclogdir,
"/",
980 uttid,
".mfc", NULL);
982 E_INFO(
"Writing MFCC log file: %s\n", logfn);
983 if ((mfcfh = fopen(logfn,
"wb")) == NULL) {
984 E_ERROR_SYSTEM(
"Failed to open MFCC log file %s", logfn);
992 char *logfn = string_join(ps->
rawlogdir,
"/",
993 uttid,
".raw", NULL);
995 E_INFO(
"Writing raw audio log file: %s\n", logfn);
996 if ((rawfh = fopen(logfn,
"wb")) == NULL) {
997 E_ERROR_SYSTEM(
"Failed to open raw audio log file %s", logfn);
1005 char *logfn = string_join(ps->
senlogdir,
"/",
1006 uttid,
".sen", NULL);
1008 E_INFO(
"Writing senone score log file: %s\n", logfn);
1009 if ((senfh = fopen(logfn,
"wb")) == NULL) {
1010 E_ERROR_SYSTEM(
"Failed to open senone score log file %s", logfn);
1022 return ps_search_start(ps->
search);
1037 if ((k = ps_search_step(ps->
search,
1050 int nfr, n_searchfr;
1056 if ((nfr = ps_search_forward(ps)) < 0) {
1078 E_ERROR(
"Failed to process data, utterance is not started. Use start_utt to start it\n");
1090 &n_samples, full_utt)) < 0)
1096 if ((nfr = ps_search_forward(ps)) < 0)
1121 &n_frames, full_utt)) < 0)
1127 if ((nfr = ps_search_forward(ps)) < 0)
1141 E_ERROR(
"Utterance is not started\n");
1147 if ((rv = ps_search_forward(ps)) < 0) {
1148 ptmr_stop(&ps->
perf);
1153 if ((rv = ps_search_finish(ps->
phone_loop)) < 0) {
1154 ptmr_stop(&ps->
perf);
1161 i < ps->acmod->output_frame; ++i)
1162 ps_search_step(ps->
search, i);
1165 if ((rv = ps_search_finish(ps->
search)) < 0) {
1166 ptmr_stop(&ps->
perf);
1169 ptmr_stop(&ps->
perf);
1172 if (cmd_ln_boolean_r(ps->
config,
"-backtrace")) {
1180 E_INFO(
"%s (%d)\n", hyp, score);
1181 E_INFO_NOFN(
"%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
1182 "word",
"start",
"end",
"pprob",
"ascr",
"lscr",
"lback");
1187 int32 post, lscr, ascr, lback;
1192 E_INFO_NOFN(
"%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
1206 ptmr_start(&ps->
perf);
1207 hyp = ps_search_hyp(ps->
search, out_best_score, NULL);
1208 ptmr_stop(&ps->
perf);
1217 ptmr_start(&ps->
perf);
1218 hyp = ps_search_hyp(ps->
search, NULL, out_is_final);
1219 ptmr_stop(&ps->
perf);
1229 ptmr_start(&ps->
perf);
1230 prob = ps_search_prob(ps->
search);
1231 ptmr_stop(&ps->
perf);
1240 ptmr_start(&ps->
perf);
1241 itor = ps_search_seg_iter(ps->
search, out_best_score);
1242 ptmr_stop(&ps->
perf);
1249 return ps_search_seg_next(seg);
1263 if (out_sf) *out_sf = seg->
sf + uf;
1264 if (out_ef) *out_ef = seg->
ef + uf;
1270 if (out_ascr) *out_ascr = seg->
ascr;
1271 if (out_lscr) *out_lscr = seg->
lscr;
1272 if (out_lback) *out_lback = seg->
lback;
1279 ps_search_seg_free(seg);
1285 return ps_search_lattice(ps->
search);
1290 char const *ctx1,
char const *ctx2)
1293 ngram_model_t *lmset;
1306 if (0 != strcmp(ps_search_type(ps->
search), PS_SEARCH_TYPE_NGRAM)) {
1343 assert(nbest != NULL);
1345 if (nbest->top == NULL)
1347 if (out_score) *out_score = nbest->top->
score;
1354 if (nbest->top == NULL)
1356 if (out_score) *out_score = nbest->top->
score;
1368 double *out_ncpu,
double *out_nwall)
1372 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1374 *out_ncpu = ps->
perf.t_cpu;
1375 *out_nwall = ps->
perf.t_elapsed;
1380 double *out_ncpu,
double *out_nwall)
1384 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1385 *out_nspeech = (double)ps->
n_frame / frate;
1386 *out_ncpu = ps->
perf.t_tot_cpu;
1387 *out_nwall = ps->
perf.t_tot_elapsed;
1393 return fe_get_vad_state(ps->
acmod->
fe);
1404 search->name = ckd_salloc(name);
1405 search->type = ckd_salloc(type);
1408 search->
acmod = acmod;
1415 search->
start_wid = dict_startwid(dict);
1421 search->
dict = NULL;
1432 ckd_free(search->name);
1433 ckd_free(search->type);
1449 search->
start_wid = dict_startwid(dict);
1455 search->
dict = NULL;
void acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
Implementation of FSG search (and "FSG set") structure.
ptmr_t perf
Performance counter for all of decoding.
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
Internal implementation of PocketSphinx decoder.
POCKETSPHINX_EXPORT void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get performance information for the current utterance.
POCKETSPHINX_EXPORT feat_t * ps_get_feat(ps_decoder_t *ps)
Get the dynamic feature computation object for this decoder.
int acmod_read_scores(acmod_t *acmod)
Read one frame of scores from senone score dump file.
void ps_astar_finish(ps_astar_t *nbest)
Finish N-best search, releasing resources associated with it.
POCKETSPHINX_EXPORT void ps_set_rawdata_size(ps_decoder_t *ps, int32 size)
Sets the limit of the raw audio data to store in decoder to retrieve it later on ps_get_rawdata.
POCKETSPHINX_EXPORT ngram_model_t * ps_get_lm(ps_decoder_t *ps, const char *name)
Get the language model set object for this decoder.
ps_latpath_t * ps_astar_next(ps_astar_t *nbest)
Find next best hypothesis of A* on a word graph.
char const * ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path)
Get hypothesis string from A* search.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_iter(ps_decoder_t *ps, int32 *out_best_score)
Get an iterator over the word segmentation for the best hypothesis.
POCKETSPHINX_EXPORT int ps_process_cep(ps_decoder_t *ps, mfcc_t **data, int n_frames, int no_search, int full_utt)
Decode acoustic feature data.
Base structure for search module.
POCKETSPHINX_EXPORT arg_t const * ps_args(void)
Returns the argument definitions used in ps_init().
Utterance started, no data yet.
dict_t * dict
Pronunciation dictionary.
int acmod_set_insenfh(acmod_t *acmod, FILE *senfh)
Set up a senone score dump file for input.
POCKETSPHINX_EXPORT const char * ps_get_kws(ps_decoder_t *ps, const char *name)
Get the current Key phrase to spot.
int32 acmod_stream_offset(acmod_t *acmod)
Get the offset of the utterance start of the current stream, helpful for stream-wide timing...
POCKETSPHINX_EXPORT int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg)
Adds new search based on finite state grammar.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
ps_mllr_t * acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
Adapt acoustic model using a linear transform.
int acmod_process_cep(acmod_t *acmod, mfcc_t ***inout_cep, int *inout_n_frames, int full_utt)
Feed acoustic feature data into the acoustic model for scoring.
POCKETSPHINX_EXPORT int ps_load_dict(ps_decoder_t *ps, char const *dictfile, char const *fdictfile, char const *format)
Reload the pronunciation dictionary from a file.
POCKETSPHINX_EXPORT void ps_nbest_free(ps_nbest_t *nbest)
Finish N-best search early, releasing resources.
int32 silence_wid
Silence word ID.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
acmod_t * acmod
Acoustic model.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest(ps_decoder_t *ps, int sf, int ef, char const *ctx1, char const *ctx2)
Get an iterator over the best hypotheses, optionally within a selected region of the utterance...
Main header file for the PocketSphinx decoder.
POCKETSPHINX_EXPORT ps_seg_t * ps_nbest_seg(ps_nbest_t *nbest, int32 *out_score)
Get the word segmentation from an N-best list iterator.
void acmod_set_rawdata_size(acmod_t *acmod, int32 size)
Sets the limit of the raw audio data to store.
POCKETSPHINX_EXPORT void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef)
Get inclusive start and end frames from a segmentation iterator.
void acmod_start_stream(acmod_t *acmod)
Reset the current stream.
POCKETSPHINX_EXPORT ps_mllr_t * ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr)
Adapt current acoustic model using a linear transform.
int32 finish_wid
Finish word ID.
fe_t * fe
Acoustic feature computation.
int bin_mdef_ciphone_id(bin_mdef_t *m, const char *ciphone)
Context-independent phone lookup.
int32 lscr
Language model score.
int32 n_words
Number of words known to search (may be less than in the dictionary)
POCKETSPHINX_EXPORT int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile)
Adds keywords from a file to spotting.
POCKETSPHINX_EXPORT uint8 ps_get_in_speech(ps_decoder_t *ps)
Checks if the last feed audio buffer contained speech.
Utterance ended, still buffering.
POCKETSPHINX_EXPORT int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format)
Dump the current pronunciation dictionary to a file.
#define BAD_S3WID
Dictionary word id.
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter(ps_decoder_t *ps)
Returns iterator over current searches.
char const * mfclogdir
Log directory for MFCC files.
POCKETSPHINX_EXPORT void ps_default_search_args(cmd_ln_t *)
Sets default grammar and language model if they are not set explicitly and are present in the default...
Word graph search implementation.
POCKETSPHINX_EXPORT char const * ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score)
Get the hypothesis string from an N-best list iterator.
POCKETSPHINX_EXPORT char * ps_lookup_word(ps_decoder_t *ps, const char *word)
Lookup for the word in the dictionary and return phone transcription for it.
int refcount
Reference count.
int dict2pid_free(dict2pid_t *d2p)
Free the memory dict2pid structure.
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
POCKETSPHINX_EXPORT ps_lattice_t * ps_get_lattice(ps_decoder_t *ps)
Get word lattice.
POCKETSPHINX_EXPORT ps_nbest_t * ps_nbest_next(ps_nbest_t *nbest)
Move an N-best list iterator forward.
int32 prob
Log posterior probability.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
POCKETSPHINX_EXPORT long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, long maxsamps)
Decode a raw audio stream.
char const * word
Word string (pointer into dictionary hash)
int acmod_set_senfh(acmod_t *acmod, FILE *logfh)
Start logging senone scores to a filehandle.
acmod_t * acmod
Acoustic model.
ps_search_t * search
Search object from whence this came.
ps_search_t * search
Currently active search module.
Lexicon tree based Viterbi search.
hash_table_t * searches
Set of search modules.
int acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
Start logging raw audio to a filehandle.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
POCKETSPHINX_EXPORT int32 ps_get_prob(ps_decoder_t *ps)
Get posterior probability.
logmath_t * lmath
Log math computation.
int32 start_wid
Start word ID.
POCKETSPHINX_EXPORT ps_seg_t * ps_seg_next(ps_seg_t *seg)
Get the next segment in a word segmentation.
N-Gram search module structure.
POCKETSPHINX_EXPORT int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on N-gram language model.
dict2pid_t * d2p
Dictionary to senone mappings.
POCKETSPHINX_EXPORT char const * ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score)
Get hypothesis string and path score.
char const * rawlogdir
Log directory for audio files.
void ps_search_base_free(ps_search_t *search)
Free search.
POCKETSPHINX_EXPORT int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
Adds new search using JSGF model.
frame_idx_t output_frame
Index of next frame of dynamic features.
POCKETSPHINX_EXPORT int ps_free(ps_decoder_t *ps)
Finalize the decoder.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
N-Gram based multi-pass search ("FBS")
POCKETSPHINX_EXPORT void ps_seg_free(ps_seg_t *seg)
Finish iterating over a word segmentation early, freeing resources.
int32 ascr
Acoustic score.
cmd_ln_t * config
Configuration.
int acmod_end_utt(acmod_t *acmod)
Mark the end of an utterance.
POCKETSPHINX_EXPORT ps_decoder_t * ps_retain(ps_decoder_t *ps)
Retain a pointer to the decoder.
int dict2pid_add_word(dict2pid_t *d2p, int32 wid)
Add a word to the dict2pid structure (after adding it to dict).
int acmod_advance(acmod_t *acmod)
Advance the frame index.
uint8 state
State of utterance processing.
int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
Start logging MFCCs to a filehandle.
void acmod_free(acmod_t *acmod)
Finalize an acoustic model.
POCKETSPHINX_EXPORT cmd_ln_t * ps_get_config(ps_decoder_t *ps)
Get the configuration object for this decoder.
POCKETSPHINX_EXPORT int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on N-gram language model.
ps_search_t * phone_loop
Phone loop search for lookahead.
Feature space linear transform structure.
POCKETSPHINX_EXPORT int ps_process_raw(ps_decoder_t *ps, int16 const *data, size_t n_samples, int no_search, int full_utt)
Decode raw audio data.
POCKETSPHINX_EXPORT int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search based on phone N-gram language model.
a structure for a dictionary.
feat_t * fcb
Dynamic feature computation.
Word graph structure used in bestpath/nbest search.
char const * senlogdir
Log directory for senone score files.
ps_searchfuncs_t * vt
V-table of search methods.
POCKETSPHINX_EXPORT int ps_unset_search(ps_decoder_t *ps, const char *name)
Unsets the search and releases related resources.
ps_astar_t * ps_astar_start(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, int sf, int ef, int w1, int w2)
Begin N-Gram based A* search on a word graph.
uint32 n_frame
Total number of frames processed.
POCKETSPHINX_EXPORT ps_search_iter_t * ps_search_iter_next(ps_search_iter_t *itor)
Updates search iterator to point to the next position.
POCKETSPHINX_EXPORT int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
Reinitialize the decoder with updated configuration.
POCKETSPHINX_EXPORT int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
Adds new search using JSGF model.
Fast and rough context-independent phoneme loop search.
dict2pid_t * d2p
Dictionary to senone mapping.
POCKETSPHINX_EXPORT fsg_model_t * ps_get_fsg(ps_decoder_t *ps, const char *name)
Get the finite-state grammar set object for this decoder.
dict2pid_t * dict2pid_retain(dict2pid_t *d2p)
Retain a pointer to dict2pid.
POCKETSPHINX_EXPORT int ps_end_utt(ps_decoder_t *ps)
End utterance processing.
int32 post
Utterance posterior probability.
char * hyp_str
Current hypothesis string.
Partial path structure used in N-best (A*) search.
dict_t * dict
Pronunciation dictionary.
int acmod_process_raw(acmod_t *acmod, int16 const **inout_raw, size_t *inout_n_samps, int full_utt)
TODO: Set queue length for utterance processing.
Flat lexicon based Viterbi search.
POCKETSPHINX_EXPORT int ps_add_word(ps_decoder_t *ps, char const *word, char const *phones, int update)
Add a word to the pronunciation dictionary.
int32 lback
Language model backoff.
POCKETSPHINX_EXPORT void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, double *out_ncpu, double *out_nwall)
Get overall performance information.
POCKETSPHINX_EXPORT logmath_t * ps_get_logmath(ps_decoder_t *ps)
Get the log-math computation object for this decoder.
int acmod_start_utt(acmod_t *acmod)
Mark the start of an utterance.
POCKETSPHINX_EXPORT int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase)
Adds new keyword to spot.
POCKETSPHINX_EXPORT int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback)
Get language, acoustic, and posterior probabilities from a segmentation iterator. ...
POCKETSPHINX_EXPORT char const * ps_get_hyp_final(ps_decoder_t *ps, int32 *out_is_final)
Get hypothesis string and final flag.
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
bin_mdef_t * mdef
Model definition.
POCKETSPHINX_EXPORT void ps_search_iter_free(ps_search_iter_t *itor)
Delete an unfinished search iterator.
ps_latlink_t * last_link
Final link in best path.
dict2pid_t * dict2pid_build(bin_mdef_t *mdef, dict_t *dict)
Build the dict2pid structure for the given model/dictionary.
POCKETSPHINX_EXPORT int ps_start_stream(ps_decoder_t *ps)
Start processing of the stream of speech.
POCKETSPHINX_EXPORT int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm)
Adds new search based on phone N-gram language model.
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
Initialize an acoustic model.
frame_idx_t n_feat_frame
Number of frames active in feat_buf.
POCKETSPHINX_EXPORT fe_t * ps_get_fe(ps_decoder_t *ps)
Get the feature extraction object for this decoder.
V-table for search algorithm.
POCKETSPHINX_EXPORT int ps_start_utt(ps_decoder_t *ps)
Start utterance processing.
ps_search_t * pls
Phoneme loop for lookahead.
ps_seg_t * ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf)
Get hypothesis segmentation from A* search.
ps_lattice_t * dag
Current hypothesis word graph.
POCKETSPHINX_EXPORT ps_decoder_t * ps_init(cmd_ln_t *config)
Initialize the decoder from a configuration object.
Base structure for hypothesis segmentation iterator.
cmd_ln_t * config
Configuration.
#define dict_size(d)
Packaged macro access to dictionary members.
int32 score
Exact score from start node up to node->sf.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
POCKETSPHINX_EXPORT const char * ps_search_iter_val(ps_search_iter_t *itor)
Retrieves the name of the search the iterator points to.
POCKETSPHINX_EXPORT int ps_get_n_frames(ps_decoder_t *ps)
Get the number of frames of data searched.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
Acoustic model structure.
Building composite triphone (as well as word internal triphones) with the dictionary.
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
POCKETSPHINX_EXPORT int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh)
Decode a senone score dump file.
POCKETSPHINX_EXPORT char const * ps_seg_word(ps_seg_t *seg)
Get word string from a segmentation iterator.
uint32 uttno
Utterance counter.
frame_idx_t sf
Start frame.
POCKETSPHINX_EXPORT const char * ps_get_search(ps_decoder_t *ps)
Returns name of curent search in decoder.
int pl_window
Window size for phoneme lookahead.
POCKETSPHINX_EXPORT void ps_get_rawdata(ps_decoder_t *ps, int16 **buffer, int32 *size)
Retrieves the raw data collected during utterance decoding.
POCKETSPHINX_EXPORT int ps_set_search(ps_decoder_t *ps, const char *name)
Actives search with the provided name.