46 #include <sphinxbase/hash_table.h>
51 #include "pocketsphinx_export.h"
53 #define S3DICT_INC_SZ 4096
151 #define dict_size(d) ((d)->n_word)
152 #define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d))
158 #define dict_num_real_words(d) \
159 (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2)
160 #define dict_basewid(d,w) ((d)->word[w].basewid)
161 #define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word)
162 #define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word)
163 #define dict_nextalt(d,w) ((d)->word[w].alt)
164 #define dict_pronlen(d,w) ((d)->word[w].pronlen)
165 #define dict_pron(d,w,p) ((d)->word[w].ciphone[p])
166 #define dict_filler_start(d) ((d)->filler_start)
167 #define dict_filler_end(d) ((d)->filler_end)
168 #define dict_startwid(d) ((d)->startwid)
169 #define dict_finishwid(d) ((d)->finishwid)
170 #define dict_silwid(d) ((d)->silwid)
171 #define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1)
172 #define dict_first_phone(d,w) ((d)->word[w].ciphone[0])
173 #define dict_second_phone(d,w) ((d)->word[w].ciphone[1])
174 #define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2])
175 #define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1])
178 #define S3_START_WORD "<s>"
179 #define S3_FINISH_WORD "</s>"
180 #define S3_SILENCE_WORD "<sil>"
181 #define S3_UNKNOWN_WORD "<UNK>"
dict_t * dict_init(cmd_ln_t *config, bin_mdef_t *mdef)
Initialize a new dictionary.
POCKETSPHINX_EXPORT s3wid_t dict_wordid(dict_t *d, const char *word)
Return word id for given word string if present.
char * word
Ascii word string.
int dict_free(dict_t *d)
Release a pointer to a dictionary.
Binary format model definition files, with support for heterogeneous topologies and variable-size N-p...
int32 n_word
#Occupied entries in dict; ie, excluding empty slots
const char * dict_ciphone_str(dict_t *d, s3wid_t wid, int32 pos)
Return value: CI phone string for the given word, phone position.
dict_t * dict_retain(dict_t *d)
Retain a pointer to an dict_t.
int32 filler_end
Last filler word id (read from filler dict)
int dict_write(dict_t *dict, char const *filename, char const *format)
Write dictionary to a file.
s3wid_t startwid
FOR INTERNAL-USE ONLY.
s3wid_t silwid
FOR INTERNAL-USE ONLY.
s3wid_t alt
Next alternative pronunciation id, NOT_S3WID if none.
int16 s3cipid_t
Size definitions for more semantially meaningful units.
int dict_filler_word(dict_t *d, s3wid_t w)
Return 1 if w is a filler word, 0 if not.
Size definition of semantically units.
a structure for one dictionary word.
s3wid_t basewid
Base pronunciation id.
s3wid_t finishwid
FOR INTERNAL-USE ONLY.
a structure for a dictionary.
POCKETSPHINX_EXPORT int dict_real_word(dict_t *d, s3wid_t w)
Test if w is a "real" word, i.e.
int32 filler_start
First filler word id (read from filler dict)
int32 max_words
#Entries allocated in dict, including empty slots
s3cipid_t * ciphone
Pronunciation.
dictword_t * word
Array of entries in dictionary.
s3wid_t dict_add_word(dict_t *d, char const *word, s3cipid_t const *p, int32 np)
Add a word with the given ciphone pronunciation list to the dictionary.
int32 pronlen
Pronunciation length.
bin_mdef_t * mdef
Model definition used for phone IDs; NULL if none used.
hash_table_t * ht
Hash table for mapping word strings to word ids.
void dict_report(dict_t *d)
Report a dictionary structure.
int32 dict_word2basestr(char *word)
If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative pronunciation spe...