/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 2001 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author: Alan W Black (awb@cs.cmu.edu) */ /* Date: January 2001 */ /*************************************************************************/ /* Feature functions used by various cart trees etc */ /* These have been create as needed, and as some of the trees are */ /* from University of Edinburgh's Festival system their names and */ /* semantics follow them */ /*************************************************************************/ /* ----------------------------------------------------------------- */ /* The English TTS System "Flite+hts_engine" */ /* developed by HTS Working Group */ /* http://hts-engine.sourceforge.net/ */ /* ----------------------------------------------------------------- */ /* */ /* Copyright (c) 2005-2013 Nagoya Institute of Technology */ /* Department of Computer Science */ /* */ /* 2005-2008 Tokyo Institute of Technology */ /* Interdisciplinary Graduate School of */ /* Science and Engineering */ /* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ /* without modification, are permitted provided that the following */ /* conditions are met: */ /* */ /* - Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* - Redistributions in binary form must reproduce the above */ /* copyright notice, this list of conditions and the following */ /* disclaimer in the documentation and/or other materials provided */ /* with the distribution. */ /* - Neither the name of the HTS working group nor the names of its */ /* contributors may be used to endorse or promote products derived */ /* from this software without specific prior written permission. */ /* */ /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ /* POSSIBILITY OF SUCH DAMAGE. */ /* ----------------------------------------------------------------- */ #include "cst_hrg.h" #include "cst_phoneset.h" #include "cst_regex.h" #include "cst_ffeatures.h" #include "us_ffeatures.h" static const cst_val *gpos(const cst_item *word); DEF_STATIC_CONST_VAL_STRING(val_string_numeric,"numeric"); DEF_STATIC_CONST_VAL_STRING(val_string_number,"number"); DEF_STATIC_CONST_VAL_STRING(val_string_month,"month"); DEF_STATIC_CONST_VAL_STRING(val_string_day,"day"); DEF_STATIC_CONST_VAL_STRING(val_string_other,"_other_"); DEF_STATIC_CONST_VAL_STRING(val_string_a,"a"); DEF_STATIC_CONST_VAL_STRING(val_string_flight,"flight"); DEF_STATIC_CONST_VAL_STRING(val_string_to,"to"); DEF_STATIC_CONST_VAL_STRING(val_string_content,"content"); static const cst_val *gpos(const cst_item *word) { /* Guess at part of speech (function/content) */ const char *w; int s,t; w = item_feat_string(word,"name"); for (s=0; us_gpos[s]; s++) { for (t=1; us_gpos[s][t]; t++) if (cst_streq(w,val_string(us_gpos[s][t]))) return us_gpos[s][0]; } return (cst_val *)&val_string_content; } static const cst_val *num_digits(const cst_item *token) { const char *name = item_feat_string(token,"name"); return val_int_n(cst_strlen(name)); } static const cst_val *month_range(const cst_item *token) { int v = item_feat_int(token,"name"); if ((v > 0) && ( v < 32)) return VAL_STRING_1; else return VAL_STRING_0; } static const cst_val* token_pos_guess(const cst_item* token) { const char* name = item_feat_string(token, "name"); char* dc = cst_downcase(name); const cst_val* r; if (cst_regex_match(cst_rx_digits, dc)) r = (cst_val*)&val_string_numeric; else if ((cst_regex_match(cst_rx_double, dc)) || (cst_regex_match(cst_rx_double, dc))) r = (cst_val*)&val_string_number; else if (cst_streq(dc, "jan") || cst_streq(dc, "january") || cst_streq(dc, "feb") || cst_streq(dc, "february") || cst_streq(dc, "mar") || cst_streq(dc, "march") || cst_streq(dc, "apr") || cst_streq(dc, "april") || cst_streq(dc, "may") || cst_streq(dc, "jun") || cst_streq(dc, "june") || cst_streq(dc, "jul") || cst_streq(dc, "july") || cst_streq(dc, "aug") || cst_streq(dc, "august") || cst_streq(dc, "sep") || cst_streq(dc, "sept") || cst_streq(dc, "september") || cst_streq(dc, "oct") || cst_streq(dc, "october") || cst_streq(dc, "nov") || cst_streq(dc, "november") || cst_streq(dc, "dec") || cst_streq(dc, "december")) r = (cst_val*)&val_string_month; else if (cst_streq(dc, "sun") || cst_streq(dc, "sunday") || cst_streq(dc, "mon") || cst_streq(dc, "monday") || cst_streq(dc, "tue") || cst_streq(dc, "tues") || cst_streq(dc, "tuesday") || cst_streq(dc, "wed") || cst_streq(dc, "wednesday") || cst_streq(dc, "thu") || cst_streq(dc, "thurs") || cst_streq(dc, "thursday") || cst_streq(dc, "fri") || cst_streq(dc, "friday") || cst_streq(dc, "sat") || cst_streq(dc, "saturday")) r = (cst_val*)&val_string_day; /* ignoring the "token_most_common" condition, does get used */ else if (cst_streq(dc, "a")) r = (cst_val*)&val_string_a; else if (cst_streq(dc, "flight")) r = (cst_val*)&val_string_flight; else if (cst_streq(dc, "to")) r = (cst_val*)&val_string_to; else r = (cst_val*)&val_string_other; cst_free(dc); return r; } #ifdef FLITE_PLUS_HTS_ENGINE const cst_val *accented(const cst_item *p); /* defined in cst_ffeatures.c */ int item_after_length(const cst_item *n){ int i = 0; if(n == NULL) return 0; for(;n;n=n->n,i++); return i; } const cst_item *item_first(const cst_item *n){ if(n == 0) return 0; for(;n->p != 0;n=n->p); return n; } /* 11 12 13 53 54 */ static const cst_val *syl_numphones(const cst_item *syl){ return val_string_n(item_after_length(item_daughter(item_as(syl,"SylStructure")))); } /* 14 */ static const cst_val *pos_in_word(const cst_item *syl){ const cst_item *ss,*p; int c = 0; ss = item_as(syl,"SylStructure"); for(p = item_daughter(item_parent(ss));p;p = item_next(p),c++){ if(p == ss) return val_string_n(c); } return val_string_n(c); } /* 21 by Toda-san */ static const cst_val *lisp_distance_to_p_stress(const cst_item *syl){ const cst_item *s, *fs; int c; s=item_as(syl,"Syllable"); fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter"); if (item_equal(s,fs)) return val_string_n(0); s=item_prev(s); for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX); s=item_prev(s),c++) if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c); if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c); else return val_string_n(0); } /* 22 by Toda-san */ static const cst_val *lisp_distance_to_n_stress(const cst_item *syl){ const cst_item *s, *fs; int c; s=item_as(syl,"Syllable"); fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern"); if (item_equal(s,fs)) return val_string_n(0); s=item_next(s); for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX); s=item_next(s),c++) if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c); if (strcmp("1", ffeature_string(s,"stress")) == 0) return val_string_n(c); else return val_string_n(0); } /* 23 by Toda-san */ static const cst_val *lisp_distance_to_p_accent(const cst_item *syl){ const cst_item *s, *fs; int c; s=item_as(syl,"Syllable"); fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughter.R:SylStructure.daughter"); if (item_equal(s,fs)) return val_string_n(0); s=item_prev(s); for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX); s=item_prev(s),c++) if (val_int(accented(s))) return val_string_n(c); if (val_int(accented(s))) return val_string_n(c); else return val_string_n(0); } /* 24 by Toda-san */ static const cst_val *lisp_distance_to_n_accent(const cst_item *syl){ const cst_item *s, *fs; int c; s=item_as(syl,"Syllable"); fs = path_to_item(syl,"R:SylStructure.parent.R:Phrase.parent.daughtern.R:SylStructure.daughtern"); if (item_equal(s,fs)) return val_string_n(0); s=item_next(s); for (c=1; s && (!item_equal(s,fs)) && (c < CST_CONST_INT_MAX); s=item_next(s),c++) if (val_int(accented(s))) return val_string_n(c); if (val_int(accented(s))) return val_string_n(c); else return val_string_n(0); } /* 25 */ static char syl_vowel_str[16]; static cst_val syl_vowel_val; static const cst_val *syl_vowel(const cst_item *syl){ /* the vowel in the syllable */ const cst_item *ss,*p; int size; CST_VAL_TYPE(&syl_vowel_val) = CST_VAL_TYPE_STRING; ss = item_as(syl,"SylStructure"); for(p = item_daughter(ss),size = 0;p;p = item_next(p),size++){ if(strchr("aeiou",item_name(p)[0]) != NULL){ strcpy(syl_vowel_str,item_name(p)); CST_VAL_STRING_LVAL(&syl_vowel_val) = syl_vowel_str; return &syl_vowel_val; } } /* no vowel */ strcpy(syl_vowel_str,"novowel"); CST_VAL_STRING_LVAL(&syl_vowel_val) = syl_vowel_str; return &syl_vowel_val; } /* 32 */ static const cst_val *pos_in_phrase(const cst_item *syl){ const cst_item *ss,*p; int c = 0; ss = item_as(syl,"Phrase"); for (p = item_first(ss);p;p=item_next(p),c++) if(p == ss) return val_string_n(c); return val_string_n(c); } /* 33 */ static const cst_val *words_out(const cst_item *syl){ const cst_item *ss,*p; int c = 0; ss = item_as(syl,"Phrase"); for(p = ss;p;p=item_next(p),c++); return val_string_n(c); } /* 34 by Toda-san */ static const cst_val *content_words_in(const cst_item *word){ const cst_item *ss,*p,*fs; int c; ss = word; fs = path_to_item(word,"R:Phrase.parent.daughter"); for (c=0, p=ss; p && (!item_equal(p,fs)) && (c < CST_CONST_INT_MAX); p=item_prev(p)) if (cst_streq("content", ffeature_string(p,"gpos"))) c++; return val_string_n(c); /* its used randomly as int and float */ } /* 35 by Toda-san */ static const cst_val *content_words_out(const cst_item *word){ const cst_item *ss,*p,*fs; int c; ss = word; fs = path_to_item(word,"R:Phrase.parent.daughtern"); for (c=0, p=ss; p && (!item_equal(p,fs)) && (c < CST_CONST_INT_MAX); p=item_next(p)) if (cst_streq("content", ffeature_string(p,"gpos"))) c++; return val_string_n(c); /* its used randomly as int and float */ } /* 36 */ static const cst_val *lisp_distance_to_p_content(const cst_item *syl){ const cst_item *p; int c = 0; for(p=item_prev(item_as(syl,"Phrase"));p;p=item_prev(p)){ c++; if(gpos(p)==(cst_val*)&val_string_content) break; } return val_string_n(c); } /* 37 */ static const cst_val *lisp_distance_to_n_content(const cst_item *syl){ const cst_item *p; int c = 0; for(p=item_next(item_as(syl,"Phrase"));p;p=item_next(p)){ c++; if (gpos(p)==(cst_val*)&val_string_content) break; } return val_string_n(c); } /* 38 39 40 59 60 by Toda-san */ static const cst_val *lisp_num_syls_in_phrase(const cst_item *phrase){ const cst_item *sw,*fw; int c; sw = path_to_item(phrase,"daughter"); fw = path_to_item(phrase,"daughtern"); for (c=0; sw && (!item_equal(sw,fw)) && (c < CST_CONST_INT_MAX); sw=item_next(sw)) c += ffeature_int(sw, "word_numsyls"); c += ffeature_int(sw, "word_numsyls"); return val_string_n(c); } /* 41 42 43 61 62 by Toda-san */ static const cst_val *lisp_num_words_in_phrase(const cst_item *phrase){ const cst_item *sw,*fw; int c; sw = path_to_item(phrase,"daughter"); fw = path_to_item(phrase,"daughtern"); for (c=1; sw && (!item_equal(sw,fw)) && (c < CST_CONST_INT_MAX); sw=item_next(sw)) c++; return val_string_n(c); } /* 46 by Toda-san */ static const cst_val *lisp_total_syls(const cst_item *phrase){ const cst_item *sp, *fp; int c; sp = phrase; while (item_prev(sp) != NULL) sp = item_prev(sp); fp = phrase; while (item_next(fp) != NULL) fp = item_next(fp); for (c = 0; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX); sp = item_next(sp)) c += ffeature_int(sp, "lisp_num_syls_in_phrase"); c += ffeature_int(sp, "lisp_num_syls_in_phrase"); return val_string_n(c); } /* 47 by Toda-san */ static const cst_val *lisp_total_words(const cst_item *phrase){ const cst_item *sp, *fp; int c; sp = phrase; while (item_prev(sp) != NULL) sp = item_prev(sp); fp = phrase; while (item_next(fp) != NULL) fp = item_next(fp); for (c = 0; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX); sp = item_next(sp)) c += ffeature_int(sp, "lisp_num_words_in_phrase"); c += ffeature_int(sp, "lisp_num_words_in_phrase"); return val_string_n(c); } /* 48 by Toda-san */ static const cst_val *lisp_total_phrases(const cst_item *phrase){ const cst_item *sp, *fp; int c; sp = phrase; while (item_prev(sp) != NULL) sp = item_prev(sp); fp = phrase; while (item_next(fp) != NULL) fp = item_next(fp); for (c = 1; sp && (!item_equal(sp, fp)) && (c < CST_CONST_INT_MAX); sp = item_next(sp)) c++; return val_string_n(c); } #endif /* FLITE_PLUS_HTS_ENGINE */ void us_ff_register(cst_features *ffunctions) { /* The language independent ones */ basic_ff_register(ffunctions); ff_register(ffunctions, "gpos",gpos); ff_register(ffunctions, "num_digits",num_digits); ff_register(ffunctions, "month_range",month_range); ff_register(ffunctions, "token_pos_guess",token_pos_guess); #ifdef FLITE_PLUS_HTS_ENGINE ff_register(ffunctions, "syl_numphones",syl_numphones); /* 11 12 13 53 54 */ ff_register(ffunctions, "pos_in_word",pos_in_word); /* 14 */ ff_register(ffunctions, "lisp_distance_to_p_stress",lisp_distance_to_p_stress); /* 21 */ ff_register(ffunctions, "lisp_distance_to_n_stress",lisp_distance_to_n_stress); /* 22 */ ff_register(ffunctions, "lisp_distance_to_p_accent",lisp_distance_to_p_accent); /* 23 */ ff_register(ffunctions, "lisp_distance_to_n_accent",lisp_distance_to_n_accent); /* 24 */ ff_register(ffunctions, "syl_vowel",syl_vowel); /* 25 */ ff_register(ffunctions, "pos_in_phrase",pos_in_phrase); /* 32 */ ff_register(ffunctions, "words_out",words_out); /* 33 */ ff_register(ffunctions, "content_words_in",content_words_in); /* 34 */ ff_register(ffunctions, "content_words_out",content_words_out); /* 35 */ ff_register(ffunctions, "lisp_distance_to_p_content",lisp_distance_to_p_content); /* 36 */ ff_register(ffunctions, "lisp_distance_to_n_content",lisp_distance_to_n_content); /* 37 */ ff_register(ffunctions, "lisp_num_syls_in_phrase",lisp_num_syls_in_phrase); /* 38 39 40 59 60 */ ff_register(ffunctions, "lisp_num_words_in_phrase",lisp_num_words_in_phrase); /* 41 42 43 61 62 */ ff_register(ffunctions, "lisp_total_syls",lisp_total_syls); /* 46 */ ff_register(ffunctions, "lisp_total_words",lisp_total_words); /* 47 */ ff_register(ffunctions, "lisp_total_phrases",lisp_total_phrases); /* 48 */ #endif /* FLITE_PLUS_HTS_ENGINE */ }