47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
56 #define __CHAN_DUMP__ 0
58 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
60 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
70 int32 w, ndiph, i, n_words, n_ci;
71 dict_t *dict = ps_search_dict(ngs);
74 n_words = ps_search_n_words(ngs);
80 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
82 dimap = bitvec_alloc(n_ci * n_ci);
83 for (w = 0; w < n_words; w++) {
86 if (dict_is_single_phone(dict, w))
90 ph0 = dict_first_phone(dict, w);
91 ph1 = dict_second_phone(dict, w);
93 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
94 bitvec_set(dimap, ph0 * n_ci + ph1);
99 E_INFO(
"%d unique initial diphones\n", ndiph);
107 for (w = 0; w < n_words; ++w) {
110 if (!dict_is_single_phone(dict, w)) {
111 E_WARN(
"Filler word %d = %s has more than one phone, ignoring it.\n",
112 w, dict_wordstr(dict, w));
130 for (w = 0; w < n_words; w++) {
131 if (!dict_is_single_phone(dict, w))
137 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone),
138 bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->
rhmm_1ph[i].
ciphone));
147 E_INFO(
"%d root, %d non-root channels, %d single-phone words\n",
179 int32 w, i, j, p, ph, tmatid;
181 dict_t *dict = ps_search_dict(ngs);
184 n_words = ps_search_n_words(ngs);
186 E_INFO(
"Creating search tree\n");
188 for (w = 0; w < n_words; w++)
191 E_INFO(
"before: %d root, %d non-root channels, %d single-phone words\n",
198 for (w = 0; w < n_words; w++) {
199 int ciphone, ci2phone;
202 if (!ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
206 if (dict_is_single_phone(dict, w)) {
207 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
215 ciphone = dict_first_phone(dict, w);
216 ci2phone = dict_second_phone(dict, w);
224 rhmm->
hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
226 hmm_mpx_ssid(&rhmm->
hmm, 0) =
227 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
235 E_DEBUG(3,(
"word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->
root_chan));
237 if (dict_pronlen(dict, w) == 2) {
239 if ((j = rhmm->penult_phn_wid) < 0)
240 rhmm->penult_phn_wid = w;
249 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, 1));
253 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
259 for (; hmm && (hmm_nonmpx_ssid(&hmm->
hmm) != ph); hmm = hmm->
alt)
263 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, 1), tmatid);
267 E_DEBUG(3,(
"phone %s = %d\n",
269 dict_second_phone(dict, w)), ph));
270 for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
272 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef,
dict_pron(dict, w, p));
276 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
282 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
287 init_nonroot_chan(ngs, hmm, ph,
dict_pron(dict, w, p), tmatid);
291 E_DEBUG(3,(
"phone %s = %d\n",
297 if ((j = hmm->info.penult_phn_wid) < 0)
298 hmm->info.penult_phn_wid = w;
309 for (w = 0; w < n_words; ++w) {
311 if (!dict_is_single_phone(dict, w))
316 if (ngram_model_set_known_wid(ngs->
lmset, dict_basewid(dict, w)))
318 E_DEBUG(1,(
"single_phone_wid[%d] = %s\n",
336 E_ERROR(
"No word from the language model has pronunciation in the dictionary\n");
338 E_INFO(
"after: %d root, %d non-root channels, %d single-phone words\n",
348 for (child = hmm->
next; child; child = sibling) {
349 sibling = child->
alt;
350 reinit_search_subtree(ngs, child);
373 reinit_search_subtree(ngs, hmm);
387 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
388 sizeof(*ngs->bestbp_rc));
389 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
390 sizeof(*ngs->lastphn_cand));
391 init_search_tree(ngs);
392 create_search_tree(ngs);
400 n_words = ps_search_n_words(ngs);
405 for (i = w = 0; w < n_words; ++w) {
406 if (!dict_is_single_phone(ps_search_dict(ngs), w))
427 double n_speech = (double)ngs->n_tot_frame
428 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
430 E_INFO(
"TOTAL fwdtree %.2f CPU %.3f xRT\n",
431 ngs->fwdtree_perf.t_tot_cpu,
432 ngs->fwdtree_perf.t_tot_cpu / n_speech);
433 E_INFO(
"TOTAL fwdtree %.2f wall %.3f xRT\n",
434 ngs->fwdtree_perf.t_tot_elapsed,
435 ngs->fwdtree_perf.t_tot_elapsed / n_speech);
438 reinit_search_tree(ngs);
440 deinit_search_tree(ngs);
445 ckd_free(ngs->cand_sf);
447 ckd_free(ngs->bestbp_rc);
448 ngs->bestbp_rc = NULL;
449 ckd_free(ngs->lastphn_cand);
450 ngs->lastphn_cand = NULL;
457 reinit_search_tree(ngs);
459 deinit_search_tree(ngs);
461 ckd_free(ngs->lastphn_cand);
462 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
463 sizeof(*ngs->lastphn_cand));
465 ngs->
word_chan = ckd_calloc(ps_search_n_words(ngs),
468 init_search_tree(ngs);
469 create_search_tree(ngs);
480 n_words = ps_search_n_words(ngs);
483 memset(&ngs->
st, 0,
sizeof(ngs->
st));
484 ptmr_reset(&ngs->fwdtree_perf);
485 ptmr_start(&ngs->fwdtree_perf);
492 for (i = 0; i < n_words; ++i)
493 ngs->word_lat_idx[i] = NO_BP;
501 ngs->renormalized = 0;
504 for (i = 0; i < n_words; i++)
505 ngs->last_ltrans[i].sf = -1;
541 if (hmm_frame(&rhmm->
hmm) == frame_idx)
548 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
555 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
564 if (hmm_frame(&rhmm->
hmm) == frame_idx)
570 renormalize_scores(
ngram_search_t *ngs,
int frame_idx, int32 norm)
578 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
586 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
593 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
601 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
606 ngs->renormalized = TRUE;
617 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
618 int32 score = chan_v_eval(rhmm);
621 ++ngs->
st.n_root_chan_eval;
636 ngs->
st.n_nonroot_chan_eval += i;
638 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
639 int32 score = chan_v_eval(hmm);
640 assert(hmm_frame(&hmm->hmm) == frame_idx);
653 int32 i, w, bestscore, *awl, j, k;
660 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
668 assert(hmm_frame(&hmm->
hmm) == frame_idx);
669 score = chan_v_eval(hmm);
686 if (hmm_frame(&rhmm->
hmm) < frame_idx)
689 score = chan_v_eval(rhmm);
691 if (score
BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
697 ngs->
st.n_last_chan_eval += k + j;
698 ngs->
st.n_nonroot_chan_eval += k + j;
699 ngs->
st.n_word_lastchan_eval +=
706 evaluate_channels(
ngram_search_t *ngs, int16
const *senone_scores,
int frame_idx)
711 ngs->
best_score = eval_root_chan(ngs, frame_idx);
714 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->
best_score)
732 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
739 newphone_thresh = ngs->
best_score + ngs->pbeam;
740 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
745 E_DEBUG(3,(
"Root channel %d frame %d score %d thresh %d\n",
746 i, hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm), thresh));
748 if (hmm_frame(&rhmm->
hmm) < frame_idx)
751 if (hmm_bestscore(&rhmm->
hmm) BETTER_THAN thresh) {
752 hmm_frame(&rhmm->
hmm) = nf;
753 E_DEBUG(3,(
"Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->
hmm)));
756 newphone_score = hmm_out_score(&rhmm->
hmm) + ngs->pip;
757 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
758 for (hmm = rhmm->
next; hmm; hmm = hmm->
alt) {
759 int32 pl_newphone_score = newphone_score
761 if (pl_newphone_score BETTER_THAN newphone_thresh) {
762 if ((hmm_frame(&hmm->
hmm) < frame_idx)
763 || (newphone_score BETTER_THAN hmm_in_score(&hmm->
hmm))) {
765 hmm_out_history(&rhmm->
hmm), nf);
777 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
778 for (w = rhmm->penult_phn_wid; w >= 0;
780 int32 pl_newphone_score = newphone_score
782 (pls, dict_last_phone(ps_search_dict(ngs),w));
783 E_DEBUG(3,(
"word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score));
784 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
785 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
786 ngs->n_lastphn_cand++;
789 newphone_score - ngs->nwpen;
790 candp->bp = hmm_out_history(&rhmm->
hmm);
808 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
816 newphone_thresh = ngs->
best_score + ngs->pbeam;
817 lastphn_thresh = ngs->
best_score + ngs->lpbeam;
823 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
824 --i, hmm = *(acl++)) {
825 assert(hmm_frame(&hmm->hmm) >= frame_idx);
827 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
829 if (hmm_frame(&hmm->hmm) != nf) {
830 hmm_frame(&hmm->hmm) = nf;
835 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
836 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
837 for (nexthmm = hmm->
next; nexthmm; nexthmm = nexthmm->
alt) {
838 int32 pl_newphone_score = newphone_score
840 if ((pl_newphone_score BETTER_THAN newphone_thresh)
841 && ((hmm_frame(&nexthmm->
hmm) < frame_idx)
843 BETTER_THAN hmm_in_score(&nexthmm->
hmm)))) {
844 if (hmm_frame(&nexthmm->
hmm) != nf) {
849 hmm_out_history(&hmm->hmm), nf);
859 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
860 for (w = hmm->info.penult_phn_wid; w >= 0;
862 int32 pl_newphone_score = newphone_score
864 (pls, dict_last_phone(ps_search_dict(ngs),w));
865 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
866 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
867 ngs->n_lastphn_cand++;
870 newphone_score - ngs->nwpen;
871 candp->bp = hmm_out_history(&hmm->hmm);
876 else if (hmm_frame(&hmm->hmm) != nf) {
891 int32 i, j, k, nf, bp, bpend, w;
895 int32 bestscore, dscr;
902 ngs->
st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
906 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
913 bpe = &(ngs->bp_table[candp->bp]);
917 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
919 candp->score -= start_score;
927 if (ngs->last_ltrans[candp->wid].sf != bpe->
frame + 1) {
930 for (j = 0; j < n_cand_sf; j++) {
931 if (ngs->cand_sf[j].bp_ef == bpe->
frame)
936 candp->next = ngs->cand_sf[j].cand;
939 if (n_cand_sf >= ngs->cand_sf_alloc) {
940 if (ngs->cand_sf_alloc == 0) {
942 ckd_calloc(CAND_SF_ALLOCSIZE,
943 sizeof(*ngs->cand_sf));
944 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
947 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
948 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
950 *
sizeof(*ngs->cand_sf));
951 E_INFO(
"cand_sf[] increased to %d entries\n",
959 ngs->cand_sf[j].bp_ef = bpe->
frame;
962 ngs->cand_sf[j].cand = i;
965 ngs->last_ltrans[candp->wid].sf = bpe->
frame + 1;
970 for (i = 0; i < n_cand_sf; i++) {
972 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
973 bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1];
974 for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) {
978 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
980 candp = &(ngs->lastphn_cand[j]);
983 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
986 dscr += ngram_tg_score(ngs->
lmset,
987 dict_basewid(ps_search_dict(ngs), candp->wid),
993 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
994 ngs->last_ltrans[candp->wid].dscr = dscr;
995 ngs->last_ltrans[candp->wid].bp = bp;
1003 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
1004 candp->score += ngs->last_ltrans[candp->wid].dscr;
1005 candp->bp = ngs->last_ltrans[candp->wid].bp;
1007 if (candp->score BETTER_THAN bestscore)
1008 bestscore = candp->score;
1013 thresh = bestscore + ngs->lponlybeam;
1014 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
1015 if (candp->score BETTER_THAN thresh) {
1022 if ((hmm_frame(&hmm->
hmm) < frame_idx)
1023 || (candp->score BETTER_THAN hmm_in_score(&hmm->
hmm))) {
1024 assert(hmm_frame(&hmm->
hmm) != nf);
1026 candp->score, candp->bp, nf);
1032 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1052 int32 newword_thresh, lastphn_thresh;
1063 for (i = ngs->
n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
1064 --i, w = *(awl++)) {
1067 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
1068 assert(hmm_frame(&hmm->
hmm) >= frame_idx);
1071 if (hmm_bestscore(&hmm->
hmm) BETTER_THAN lastphn_thresh) {
1073 hmm_frame(&hmm->
hmm) = nf;
1075 phmmp = &(hmm->
next);
1078 if (hmm_out_score(&hmm->
hmm) BETTER_THAN newword_thresh) {
1081 hmm_out_score(&hmm->
hmm),
1082 hmm_out_history(&hmm->
hmm),
1086 else if (hmm_frame(&hmm->
hmm) == nf) {
1087 phmmp = &(hmm->
next);
1095 if ((k > 0) && (bitvec_is_clear(ngs->
word_active, w))) {
1096 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
1110 E_DEBUG(3,(
"Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
1111 dict_wordstr(ps_search_dict(ngs),w),
1112 hmm_frame(&rhmm->
hmm), hmm_bestscore(&rhmm->
hmm),
1113 lastphn_thresh, hmm_out_score(&rhmm->
hmm), newword_thresh));
1114 if (hmm_frame(&rhmm->
hmm) < frame_idx)
1116 if (hmm_bestscore(&rhmm->
hmm) BETTER_THAN lastphn_thresh) {
1117 hmm_frame(&rhmm->
hmm) = nf;
1120 if (hmm_out_score(&rhmm->
hmm) BETTER_THAN newword_thresh) {
1121 E_DEBUG(4,(
"Exiting single phone word %s with %d > %d, %d\n",
1122 dict_wordstr(ps_search_dict(ngs),w),
1123 hmm_out_score(&rhmm->
hmm),
1124 lastphn_thresh, newword_thresh));
1126 hmm_out_score(&rhmm->
hmm),
1127 hmm_out_history(&rhmm->
hmm), 0);
1137 ngs->n_lastphn_cand = 0;
1139 ngs->dynamic_beam = ngs->beam;
1140 if (ngs->maxhmmpf != -1
1141 && ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval > ngs->maxhmmpf) {
1143 int32 bins[256], bw, nhmms, i;
1148 bw = -ngs->beam / 256;
1149 memset(bins, 0,
sizeof(bins));
1162 for (i = ngs->
n_active_chan[frame_idx & 0x1], hmm = *(acl++);
1163 i > 0; --i, hmm = *(acl++)) {
1167 b = (ngs->
best_score - hmm_bestscore(&hmm->hmm)) / bw;
1173 for (i = nhmms = 0; i < 256; ++i) {
1175 if (nhmms > ngs->maxhmmpf)
1178 ngs->dynamic_beam = -(i * bw);
1181 prune_root_chan(ngs, frame_idx);
1182 prune_nonroot_chan(ngs, frame_idx);
1183 last_phone_transition(ngs, frame_idx);
1184 prune_word_chan(ngs, frame_idx);
1195 int32 bestscr, worstscr;
1196 bptbl_t *bpe, *bestbpe, *worstbpe;
1199 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
1203 bestscr = (int32) 0x80000000;
1206 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1207 bpe = &(ngs->bp_table[bp]);
1209 if (bpe->
score BETTER_THAN bestscr) {
1210 bestscr = bpe->
score;
1218 if (bestbpe != NULL) {
1219 bestbpe->
valid = TRUE;
1225 - ngs->bp_table_idx[frame_idx]) - n;
1226 for (; n > ngs->maxwpf; --n) {
1228 worstscr = (int32) 0x7fffffff;
1230 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
1231 bpe = &(ngs->bp_table[bp]);
1233 worstscr = bpe->
score;
1238 if (worstbpe == NULL)
1239 E_FATAL(
"PANIC: No worst BPtable entry remaining\n");
1240 worstbpe->
valid = FALSE;
1247 int32 i, k, bp, w, nf;
1249 int32 thresh, newscore, pl_newscore;
1254 dict_t *dict = ps_search_dict(ngs);
1262 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
1263 ngs->bestbp_rc[i].score = WORST_SCORE;
1268 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1269 bpe = &(ngs->bp_table[bp]);
1270 ngs->word_lat_idx[bpe->
wid] = NO_BP;
1272 if (bpe->
wid == ps_search_finish_wid(ngs))
1282 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1283 if (bpe->
score BETTER_THAN ngs->bestbp_rc[rc].score) {
1284 E_DEBUG(4,(
"bestbp_rc[0] = %d lc %d\n",
1286 ngs->bestbp_rc[rc].score = bpe->
score;
1287 ngs->bestbp_rc[rc].path = bp;
1294 int32 *rcss = &(ngs->bscore_stack[bpe->
s_idx]);
1295 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
1296 if (rcss[rssid->
cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
1297 E_DEBUG(4,(
"bestbp_rc[%d] = %d lc %d\n",
1299 ngs->bestbp_rc[rc].score = rcss[rssid->
cimap[rc]];
1300 ngs->bestbp_rc[rc].path = bp;
1310 thresh = ngs->
best_score + ngs->dynamic_beam;
1316 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->
ciphone]);
1318 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip;
1319 pl_newscore = newscore
1321 if (pl_newscore BETTER_THAN thresh) {
1322 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1323 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1325 bestbp_rc_ptr->path, nf);
1328 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1329 dict2pid_ldiph_lc(d2p, rhmm->
ciphone, rhmm->
ci2phone, bestbp_rc_ptr->lc);
1341 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
1343 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
1344 bpe = &(ngs->bp_table[bp]);
1352 (ngs, bpe, dict_first_phone(dict, w));
1353 E_DEBUG(4, (
"initial newscore for %s: %d\n",
1354 dict_wordstr(dict, w), newscore));
1355 if (newscore != WORST_SCORE)
1356 newscore += ngram_tg_score(ngs->
lmset,
1357 dict_basewid(dict, w),
1364 if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
1365 ngs->last_ltrans[w].dscr = newscore;
1366 ngs->last_ltrans[w].bp = bp;
1376 if (w == dict_startwid(ps_search_dict(ngs)))
1379 newscore = ngs->last_ltrans[w].dscr + ngs->pip;
1381 if (pl_newscore BETTER_THAN thresh) {
1382 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
1383 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1384 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1386 newscore, ngs->last_ltrans[w].bp, nf);
1389 hmm_mpx_ssid(&rhmm->
hmm, 0) =
1391 dict_last_phone(dict, bpe->
wid));
1398 w = ps_search_silence_wid(ngs);
1400 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
1401 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip;
1402 pl_newscore = newscore
1404 if (pl_newscore BETTER_THAN thresh) {
1405 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1406 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1408 newscore, bestbp_rc_ptr->path, nf);
1411 for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
1412 if (w == ps_search_silence_wid(ngs))
1416 if (w == dict_startwid(ps_search_dict(ngs)))
1422 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip;
1423 pl_newscore = newscore
1425 if (pl_newscore BETTER_THAN thresh) {
1426 if ((hmm_frame(&rhmm->
hmm) < frame_idx)
1427 || (newscore BETTER_THAN hmm_in_score(&rhmm->
hmm))) {
1429 newscore, bestbp_rc_ptr->path, nf);
1443 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1451 if (hmm_frame(&rhmm->
hmm) == frame_idx) {
1460 int16
const *senscr;
1463 if (!ps_search_acmod(ngs)->compallsen)
1464 compute_sen_active(ngs, frame_idx);
1467 if ((senscr =
acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
1469 ngs->
st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
1480 E_INFO(
"Renormalizing Scores at frame %d, best score %d\n",
1482 renormalize_scores(ngs, frame_idx, ngs->
best_score);
1486 evaluate_channels(ngs, senscr, frame_idx);
1488 prune_channels(ngs, frame_idx);
1490 bptable_maxwpf(ngs, frame_idx);
1492 word_transition(ngs, frame_idx);
1494 deactivate_channels(ngs, frame_idx);
1504 int32 i, w, cf, *awl;
1509 cf = ps_search_acmod(ngs)->output_frame;
1522 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
1529 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
1531 if (dict_is_single_phone(ps_search_dict(ngs), w))
1547 ptmr_stop(&ngs->fwdtree_perf);
1550 double n_speech = (double)(cf + 1)
1551 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1552 E_INFO(
"%8d words recognized (%d/fr)\n",
1553 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
1554 E_INFO(
"%8d senones evaluated (%d/fr)\n", ngs->
st.n_senone_active_utt,
1555 (ngs->
st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
1556 E_INFO(
"%8d channels searched (%d/fr), %d 1st, %d last\n",
1557 ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval,
1558 (ngs->
st.n_root_chan_eval + ngs->
st.n_nonroot_chan_eval) / (cf + 1),
1559 ngs->
st.n_root_chan_eval, ngs->
st.n_last_chan_eval);
1560 E_INFO(
"%8d words for which last channels evaluated (%d/fr)\n",
1561 ngs->
st.n_word_lastchan_eval,
1562 ngs->
st.n_word_lastchan_eval / (cf + 1));
1563 E_INFO(
"%8d candidate words for entering last phone (%d/fr)\n",
1564 ngs->
st.n_lastphn_cand_utt, ngs->
st.n_lastphn_cand_utt / (cf + 1));
1565 E_INFO(
"fwdtree %.2f CPU %.3f xRT\n",
1566 ngs->fwdtree_perf.t_cpu,
1567 ngs->fwdtree_perf.t_cpu / n_speech);
1568 E_INFO(
"fwdtree %.2f wall %.3f xRT\n",
1569 ngs->fwdtree_perf.t_elapsed,
1570 ngs->fwdtree_perf.t_elapsed / n_speech);
hmm_t hmm
Basic HMM structure.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
Base structure for search module.
int32 n_nonroot_chan
Number of valid non-root channels.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
chan_t * next
first descendant of this channel
listelem_alloc_t * chan_alloc
For chan_t.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
frame_idx_t frame
start or end frame
hmm_context_t * hmmctx
HMM context.
int32 n_active_chan[2]
Number entries in active_chan_list.
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int16 last2_phone
next-to-last phone of this word
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
#define BAD_SSID
Invalid senone sequence ID (limited to 16 bits for PocketSphinx).
bitvec_t * word_active
array of active flags for all words.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
int16 ciphone
first ciphone of this node; all words rooted at this node begin with this ciphone ...
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
Lexicon tree based Viterbi search.
int32 * single_phone_wid
list of single-phone word ids
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
int32 n_root_chan_alloc
Number of root_chan allocated.
int16 ci2phone
second ciphone of this node; one root HMM for each unique right context
int32 penult_phn_wid
list of words whose last phone follows this one; this field indicates the first of the list; the rest...
int32 n_active_word[2]
Number entries in active_word_list.
int32 rc_id
right-context id for last phone of words
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
N-Gram search module structure.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
void hmm_normalize(hmm_t *h, int32 bestscr)
Renormalize the scores in this HMM based on the given best score.
int32 max_nonroot_chan
Maximum possible number of non-root channels.
int32 last_phone_best_score
Best Viterbi path score for last phone.
int32 real_wid
wid of this or latest predecessor real word
root_chan_t * rhmm_1ph
Root HMMs for single-phone words.
int32 prev_real_wid
wid of second-last real word
#define WORST_SCORE
Large "bad" score.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Lexical tree node data type.
hmm_t hmm
Basic HMM structure.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
#define SENSCR_SHIFT
Shift count for senone scores.
chan_t *** active_chan_list
Array of active channels for current and next frame.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
struct chan_s * alt
sibling; i.e., next descendant of parent HMM
#define WORSE_THAN
Is one score worse than another?
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
int32 best_score
Best Viterbi path score.
Back pointer table (forward pass lattice; actually a tree)
int32 n_1ph_LMwords
Number single phone dict words also in LM; these come first in single_phone_wid.
cross word triphone model structure
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
Fast and rough context-independent phoneme loop search.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
root_chan_t * root_chan
Search structure of HMM instances.
char * hyp_str
Current hypothesis string.
#define BETTER_THAN
Is one score better than another?
int32 s_idx
Start of BScoreStack for various right contexts.
int32 n_frame
Number of frames actually present.
ngram_model_t * lmset
Set of language models.
uint8 valid
For absolute pruning.
int32 n_1ph_words
Number single phone words in dict (total)
int32 ciphone
ciphone for this node
ngram_search_stats_t st
Various statistics for profiling.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
int32 score
Score (best among all right contexts)
int32 n_root_chan
Number of valid root_chan.
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
int32 * homophone_set
Each node in the HMM tree structure may point to a set of words whose last phone would follow that no...
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Building composite triphone (as well as word internal triphones) with the dictionary.
#define phone_loop_search_score(pls, ci)
Return lookahead heuristic score for a specific phone.
Phone loop search structure.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
int16 last_phone
last phone of this word