42 #include <sphinxbase/err.h>
43 #include <sphinxbase/ckd_alloc.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/pio.h>
46 #include <sphinxbase/cmd_ln.h>
49 #include "allphone_search.h"
68 allphone_search_seg_free(
ps_seg_t * seg)
78 seg->
ascr = phseg->score;
79 seg->
lscr = phseg->tscore;
84 allphone_search_seg_next(
ps_seg_t * seg)
89 itor->seg = itor->seg->next;
91 if (itor->seg == NULL) {
92 allphone_search_seg_free(seg);
95 phseg = gnode_ptr(itor->seg);
96 allphone_search_fill_iter(seg, phseg);
102 allphone_search_seg_next,
103 allphone_search_seg_free
108 allphone_search_seg_iter(
ps_search_t * search, int32 * out_score)
113 allphone_backtrace(allphs, allphs->
frame - 1);
114 if (allphs->segments == NULL)
119 iter->base.
vt = &fsg_segfuncs;
120 iter->base.
search = search;
121 iter->seg = allphs->segments;
122 allphone_search_fill_iter((
ps_seg_t *)iter, gnode_ptr(iter->seg));
128 allphone_search_start,
129 allphone_search_step,
130 allphone_search_finish,
131 allphone_search_reinit,
132 allphone_search_free,
133 allphone_search_lattice,
135 allphone_search_prob,
136 allphone_search_seg_iter,
153 for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->
next) {
154 if (mdef_pid2tmatid(mdef, p->
pid) == mdef_pid2tmatid(mdef, pid))
155 if (mdef_pid2ssid(mdef, p->
pid) == mdef_pid2ssid(mdef, pid))
177 rclist = (int32 *) ckd_calloc(mdef->
n_ciphone + 1,
sizeof(int32));
181 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
182 for (p = ci_phmm[ci]; p; p = p->
next) {
185 for (rc = 0; rc < mdef->
n_ciphone; rc++) {
186 if (bitvec_is_set(p->
rc, rc))
192 for (i = 0; IS_S3CIPID(rclist[i]); i++) {
193 for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->
next) {
194 if (bitvec_is_set(p2->
lc, ci)) {
196 l = (
plink_t *) ckd_calloc(1,
sizeof(*l));
231 (
phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef),
sizeof(
phmm_t *));
233 (
phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef),
sizeof(
phmm_t *));
237 nphone = allphs->
ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
238 E_INFO(
"Building PHMM net of %d phones\n", nphone);
239 for (pid = 0; pid < nphone; pid++) {
240 if ((p = phmm_lookup(allphs, pid)) == NULL) {
242 p = (
phmm_t *) ckd_calloc(1,
sizeof(*p));
244 mdef_pid2ssid(mdef, pid), mdef->
phone[pid].
tmat);
246 p->
ci = bin_mdef_pid2ci(mdef, pid);
256 lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
257 lc = ckd_calloc(n_phmm * 2 * lrc_size,
sizeof(bitvec_t));
258 rc = lc + (n_phmm * lrc_size);
259 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
270 (
s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
275 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
277 bitvec_set_all(p->
lc, bin_mdef_n_ciphone(mdef));
278 bitvec_set_all(p->
rc, bin_mdef_n_ciphone(mdef));
279 if (mdef->
phone[ci].info.
ci.filler) {
287 for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
291 if (mdef->
phone[mdef->
phone[pid].info.cd.ctx[1]].info.
ci.filler) {
292 for (i = 0; IS_S3CIPID(filler[i]); i++)
293 bitvec_set(p->
lc, filler[i]);
296 bitvec_set(p->
lc, mdef->
phone[pid].info.cd.ctx[1]);
298 if (mdef->
phone[mdef->
phone[pid].info.cd.ctx[2]].info.
ci.filler) {
299 for (i = 0; IS_S3CIPID(filler[i]); i++)
300 bitvec_set(p->
rc, filler[i]);
303 bitvec_set(p->
rc, mdef->
phone[pid].info.cd.ctx[2]);
309 n_link = phmm_link(allphs);
311 E_INFO(
"%d nodes, %d links\n", n_phmm, n_link);
326 for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) {
329 for (p = allphs->
ci_phmm[ci]; p; p = next) {
333 for (l = p->
succlist; l; l = lnext) {
360 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
361 for (p = ci_phmm[(
unsigned) ci]; p; p = p->
next) {
362 if (hmm_frame(&(p->
hmm)) == allphs->
frame) {
388 th = best + allphs->
pbeam;
391 mdef = ps_search_acmod(allphs)->mdef;
392 curfrm = allphs->
frame;
398 for (ci = 0; ci < mdef->
n_ciphone; ci++) {
399 for (p = ci_phmm[(
unsigned) ci]; p; p = p->
next) {
400 if (hmm_frame(&(p->
hmm)) == curfrm) {
402 if (hmm_bestscore(&(p->
hmm)) >= th) {
404 h = (
history_t *) ckd_calloc(1,
sizeof(*h));
407 h->
hist = hmm_out_history(&(p->
hmm));
408 h->
score = hmm_out_score(&(p->
hmm));
417 blkarray_list_get(history, h->
hist);
419 if (pred->
hist > 0) {
421 blkarray_list_get(history,
424 ngram_tg_score(allphs->
lm,
435 ngram_bg_score(allphs->
lm,
452 blkarray_list_append(history, h);
455 hmm_frame(&(p->
hmm)) = nf;
468 int32 frame_history_start)
473 int32 newscore, nf, curfrm;
477 curfrm = allphs->
frame;
482 for (hist_idx = frame_history_start;
483 hist_idx < blkarray_list_n_valid(allphs->
history); hist_idx++) {
484 h = blkarray_list_get(allphs->
history, hist_idx);
495 else if (ci2lmwid[to->
ci] == NGRAM_INVALID_WID)
503 ngram_tg_score(allphs->
lm,
510 tscore = ngram_bg_score(allphs->
lm,
517 newscore = h->
score + tscore;
518 if ((newscore > best + allphs->beam)
519 && (newscore > hmm_in_score(&(to->
hmm)))) {
527 allphone_search_init(
const char *name,
535 static char *lmname =
"default";
538 ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod,
544 if (allphs->
hmmctx == NULL) {
545 ps_search_free(ps_search_base(allphs));
549 allphs->
ci_only = cmd_ln_boolean_r(config,
"-allphone_ci");
550 allphs->lw = cmd_ln_float32_r(config,
"-lw");
556 allphs->
lm = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
559 (
"Failed to initialize ngram model set for phoneme decoding");
568 ngram_wid(allphs->
lm,
571 if (allphs->
ci2lmwid[i] == NGRAM_INVALID_WID
574 ngram_wid(allphs->
lm,
582 (
"Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n");
585 (acmod->
lmath, cmd_ln_float32_r(config,
"-pip"))
591 allphs->segments = NULL;
596 (int32) logmath_log(acmod->
lmath,
597 cmd_ln_float64_r(config,
"-beam"))
601 (int32) logmath_log(acmod->
lmath,
602 cmd_ln_float64_r(config,
"-pbeam"))
606 allphs->
history = blkarray_list_init();
609 allphs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
611 E_INFO(
"Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->
pbeam);
613 ptmr_init(&allphs->
perf);
629 (
"-lm argument missing; doing unconstrained phone-loop decoding\n");
633 cmd_ln_float32_r(search->
config,
648 / cmd_ln_int32_r(ps_search_config(allphs),
"-frate");
650 E_INFO(
"TOTAL fwdflat %.2f CPU %.3f xRT\n",
651 allphs->
perf.t_tot_cpu,
652 allphs->
perf.t_tot_cpu / n_speech);
653 E_INFO(
"TOTAL fwdflat %.2f wall %.3f xRT\n",
654 allphs->
perf.t_tot_elapsed,
655 allphs->
perf.t_tot_elapsed / n_speech);
662 ngram_model_free(allphs->
lm);
666 blkarray_list_free(allphs->
history);
683 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
684 for (p = allphs->
ci_phmm[(
unsigned) ci]; p; p = p->
next) {
693 blkarray_list_reset(allphs->
history);
697 ci = bin_mdef_silphone(mdef);
705 ptmr_reset(&allphs->
perf);
706 ptmr_start(&allphs->
perf);
719 acmod = ps_search_acmod(allphs);
723 for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++)
725 if (hmm_frame(&(p->
hmm)) == allphs->
frame)
730 allphone_search_step(
ps_search_t * search,
int frame_idx)
732 int32 bestscr, frame_history_start;
738 allphone_search_sen_active(allphs);
741 bestscr = phmm_eval_all(allphs, senscr);
743 frame_history_start = blkarray_list_n_valid(allphs->
history);
744 phmm_exit(allphs, bestscr);
745 phmm_trans(allphs, bestscr, frame_history_start);
755 int32 score = h->
score;
759 score -= pred->
score;
769 for (gn = allphs->segments; gn; gn = gn->next) {
770 ckd_free(gnode_ptr(gn));
772 glist_free(allphs->segments);
773 allphs->segments = NULL;
779 int32 best, hist_idx, best_idx;
785 allphone_clear_segments(allphs);
789 hist_idx = blkarray_list_n_valid(allphs->
history) - 1;
790 while (hist_idx > 0) {
791 h = blkarray_list_get(allphs->
history, hist_idx);
793 frm = last_frm = h->
ef;
803 best = (int32) 0x80000000;
805 while (frm == last_frm && hist_idx > 0) {
806 h = blkarray_list_get(allphs->
history, hist_idx);
808 if (h->
score > best && frm == last_frm) {
819 while (best_idx > 0) {
820 h = blkarray_list_get(allphs->
history, best_idx);
826 h->
hist))->ef + 1 : 0;
828 s->score = ascore(allphs, h);
830 allphs->segments = glist_add_ptr(allphs->segments, s);
847 n_hist = blkarray_list_n_valid(allphs->
history);
849 (
"%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n",
854 n_hist, (allphs->
frame > 0) ? n_hist / allphs->
frame : 0);
857 allphone_backtrace(allphs, allphs->
frame - 1);
860 ptmr_stop(&allphs->
perf);
862 cf = ps_search_acmod(allphs)->output_frame;
864 double n_speech = (double) (cf + 1)
865 / cmd_ln_int32_r(ps_search_config(allphs),
"-frate");
866 E_INFO(
"allphone %.2f CPU %.3f xRT\n",
867 allphs->
perf.t_cpu, allphs->
perf.t_cpu / n_speech);
868 E_INFO(
"allphone %.2f wall %.3f xRT\n",
869 allphs->
perf.t_elapsed, allphs->
perf.t_elapsed / n_speech);
877 allphone_search_hyp(
ps_search_t * search, int32 * out_score,
878 int32 * out_is_final)
883 const char *phone_str;
885 int len, hyp_idx, phone_idx;
895 allphone_backtrace(allphs, allphs->
frame - 1);
896 if (allphs->segments == NULL) {
900 len = glist_count(allphs->segments) * 10;
902 search->
hyp_str = (
char *) ckd_calloc(len,
sizeof(*search->
hyp_str));
904 for (gn = allphs->segments; gn; gn = gn->next) {
908 while (phone_str[phone_idx] !=
'\0')
909 search->
hyp_str[hyp_idx++] = phone_str[phone_idx++];
910 search->
hyp_str[hyp_idx++] =
' ';
912 search->
hyp_str[--hyp_idx] =
'\0';
913 E_INFO(
"Hyp: %s\n", search->
hyp_str);
Internal implementation of PocketSphinx decoder.
History (paths) information at any point in allphone Viterbi search.
struct phmm_s * next
Next unique PHMM for same parent basephone.
Base structure for search module.
ptmr_t perf
Performance counter.
void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid)
Populate a previously-allocated HMM structure, allocating internal data.
s3cipid_t ci
Parent basephone for this PHMM.
int32 n_sen_eval
Total senones evaluated this utt.
const char * bin_mdef_ciphone_str(bin_mdef_t *m, int32 ci)
In: ciphone id for which name wanted.
void ps_search_base_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
Re-initialize base structure with new dictionary.
int n_senone_active
Number of active GMMs.
acmod_t * acmod
Acoustic model.
An individual HMM among the HMM search space.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state]...
ps_segfuncs_t * vt
V-table of seg methods.
logmath_t * lmath
Log-math computation.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
void hmm_deinit(hmm_t *hmm)
Free an HMM structure, releasing internal data (but not the HMM structure itself).
int32 lscr
Language model score.
s3pid_t pid
Phone id (temp.
int32 tmat
Transition matrix ID.
void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
Activate senones associated with an HMM.
int32 hist
Previous history entry.
bitvec_t * lc
Set (bit-vector) of left context phones seen for this PHMM.
int32 score
Path score for this path.
#define BAD_S3CIPID
Ci phone id.
char const * word
Word string (pointer into dictionary hash)
ps_search_t * search
Search object from whence this came.
void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, const char *type, const char *name, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize base structure.
int32 * ci2lmwid
Mapping of CI phones to LM word IDs.
int32 inspen
Language weights.
int32 hmm_vit_eval(hmm_t *hmm)
Viterbi evaluation of given HMM.
struct mdef_entry_s::@0::@1 ci
< CI phone information - attributes (just "filler" for now)
uint8 compallsen
Compute all senones?
bitvec_t * rc
Set (bit-vector) of right context phones seen for this PHMM.
hmm_context_t * hmm_context_init(int32 n_emit_state, uint8 **const *tp, int16 const *senscore, uint16 *const *sseq)
Create an HMM context.
void ps_search_base_free(ps_search_t *search)
Free search.
Implementation of allphone search structure.
cmd_ln_t * config
Configuration.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
#define WORST_SCORE
Large "bad" score.
tmat_t * tmat
Transition matrices.
int32 ascr
Acoustic score.
int32 tscore
Transition score for this path.
hmm_context_t * hmmctx
HMM context.
void hmm_enter(hmm_t *h, int32 score, int32 histid, int frame)
Enter an HMM with the given path score and history ID.
Segment iterator over list of phseg.
void acmod_clear_active(acmod_t *acmod)
Clear set of active senones.
#define S3_SILENCE_CIPHONE
Hard-coded silence CI phone name.
struct plink_s * next
Next link for parent PHMM node.
#define hmm_context_set_senscore(ctx, senscr)
Change the senone score array for a context.
#define SENSCR_SHIFT
Shift count for senone scores.
a structure for a dictionary.
float32 ascale
Acoustic score scale for posterior probabilities.
int32 n_tot_frame
Total number of frames processed.
ngram_model_t * lm
Ngram model set.
Word graph structure used in bestpath/nbest search.
hmm_t hmm
Base HMM structure.
void hmm_clear(hmm_t *h)
Reset the states of the HMM to the invalid condition.
char * hyp_str
Current hypothesis string.
int32 ci_only
Use context-independent phones for decoding.
int32 n_hmm_eval
Total HMMs evaluated this utt.
struct plink_s * succlist
List of predecessor PHMM nodes.
Models a single unique pair.
phmm_t * phmm
PHMM ending this path.
phmm_t * phmm
Successor PHMM node.
void hmm_context_free(hmm_context_t *ctx)
Free an HMM context.
int32 pbeam
Effective beams after applying beam_factor.
bin_mdef_t * mdef
Model definition.
V-table for search algorithm.
mdef_entry_t * phone
All phone structures.
blkarray_list_t * history
List of history nodes allocated in each frame.
Base structure for hypothesis segmentation iterator.
int32 n_ciphone
Number of base (CI) phones.
cmd_ln_t * config
Configuration.
phmm_t ** ci_phmm
PHMM lists (for each CI phone)
Acoustic model structure.
Phone level segmentation information.
Building composite triphone (as well as word internal triphones) with the dictionary.
List of links from a PHMM node to its successors; one link per successor.
frame_idx_t sf
Start frame.
int16 const * acmod_score(acmod_t *acmod, int *inout_frame_idx)
Score one frame of data.
frame_idx_t frame
Current frame.