flightgear/3rdparty/hts_engine_API/lib/HTS_vocoder.c
2022-10-20 20:29:11 +08:00

998 lines
28 KiB
C

/* ----------------------------------------------------------------- */
/* The HMM-Based Speech Synthesis Engine "hts_engine API" */
/* developed by HTS Working Group */
/* http://hts-engine.sourceforge.net/ */
/* ----------------------------------------------------------------- */
/* */
/* Copyright (c) 2001-2015 Nagoya Institute of Technology */
/* Department of Computer Science */
/* */
/* 2001-2008 Tokyo Institute of Technology */
/* Interdisciplinary Graduate School of */
/* Science and Engineering */
/* */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials provided */
/* with the distribution. */
/* - Neither the name of the HTS working group nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* ----------------------------------------------------------------- */
#ifndef HTS_VOCODER_C
#define HTS_VOCODER_C
#ifdef __cplusplus
#define HTS_VOCODER_C_START extern "C" {
#define HTS_VOCODER_C_END }
#else
#define HTS_VOCODER_C_START
#define HTS_VOCODER_C_END
#endif /* __CPLUSPLUS */
HTS_VOCODER_C_START;
#include <math.h> /* for sqrt(),log(),exp(),pow(),cos() */
/* hts_engine libraries */
#include "HTS_hidden.h"
static const double HTS_pade[21] = {
1.00000000000,
1.00000000000,
0.00000000000,
1.00000000000,
0.00000000000,
0.00000000000,
1.00000000000,
0.00000000000,
0.00000000000,
0.00000000000,
1.00000000000,
0.49992730000,
0.10670050000,
0.01170221000,
0.00056562790,
1.00000000000,
0.49993910000,
0.11070980000,
0.01369984000,
0.00095648530,
0.00003041721
};
/* HTS_movem: move memory */
static void HTS_movem(double *a, double *b, const int nitem)
{
long i = (long) nitem;
if (a > b)
while (i--)
*b++ = *a++;
else {
a += i;
b += i;
while (i--)
*--b = *--a;
}
}
/* HTS_mlsafir: sub functions for MLSA filter */
static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d)
{
double y = 0.0;
int i;
d[0] = x;
d[1] = aa * d[0] + a * d[1];
for (i = 2; i <= m; i++)
d[i] += a * (d[i + 1] - d[i - 1]);
for (i = 2; i <= m; i++)
y += d[i] * b[i];
for (i = m + 1; i > 1; i--)
d[i] = d[i - 1];
return (y);
}
/* HTS_mlsadf1: sub functions for MLSA filter */
static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
{
double v, out = 0.0, *pt;
int i;
pt = &d[pd + 1];
for (i = pd; i >= 1; i--) {
d[i] = aa * pt[i - 1] + a * d[i];
pt[i] = d[i] * b[1];
v = pt[i] * ppade[i];
x += (1 & i) ? v : -v;
out += v;
}
pt[0] = x;
out += x;
return (out);
}
/* HTS_mlsadf2: sub functions for MLSA filter */
static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
{
double v, out = 0.0, *pt;
int i;
pt = &d[pd * (m + 2)];
for (i = pd; i >= 1; i--) {
pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]);
v = pt[i] * ppade[i];
x += (1 & i) ? v : -v;
out += v;
}
pt[0] = x;
out += x;
return (out);
}
/* HTS_mlsadf: functions for MLSA filter */
static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d)
{
const double aa = 1 - a * a;
const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]);
x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade);
x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade);
return (x);
}
/* HTS_rnd: functions for random noise generation */
static double HTS_rnd(unsigned long *next)
{
double r;
*next = *next * 1103515245L + 12345;
r = (*next / 65536L) % 32768L;
return (r / RANDMAX);
}
/* HTS_nrandom: functions for gaussian random noise generation */
static double HTS_nrandom(HTS_Vocoder * v)
{
if (v->sw == 0) {
v->sw = 1;
do {
v->r1 = 2 * HTS_rnd(&v->next) - 1;
v->r2 = 2 * HTS_rnd(&v->next) - 1;
v->s = v->r1 * v->r1 + v->r2 * v->r2;
} while (v->s > 1 || v->s == 0);
v->s = sqrt(-2 * log(v->s) / v->s);
return (v->r1 * v->s);
} else {
v->sw = 0;
return (v->r2 * v->s);
}
}
/* HTS_mceq: function for M-sequence random noise generation */
static int HTS_mseq(HTS_Vocoder * v)
{
int x0, x28;
v->x >>= 1;
if (v->x & B0)
x0 = 1;
else
x0 = -1;
if (v->x & B28)
x28 = 1;
else
x28 = -1;
if (x0 + x28)
v->x &= B31_;
else
v->x |= B31;
return (x0);
}
/* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */
static void HTS_mc2b(double *mc, double *b, int m, const double a)
{
if (mc != b) {
if (a != 0.0) {
b[m] = mc[m];
for (m--; m >= 0; m--)
b[m] = mc[m] - a * b[m + 1];
} else
HTS_movem(mc, b, m + 1);
} else if (a != 0.0)
for (m--; m >= 0; m--)
b[m] -= a * b[m + 1];
}
/* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */
static void HTS_b2mc(const double *b, double *mc, int m, const double a)
{
double d, o;
d = mc[m] = b[m];
for (m--; m >= 0; m--) {
o = b[m] + a * d;
d = b[m];
mc[m] = o;
}
}
/* HTS_freqt: frequency transformation */
static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a)
{
int i, j;
const double b = 1 - a * a;
double *g;
if (m2 > v->freqt_size) {
if (v->freqt_buff != NULL)
HTS_free(v->freqt_buff);
v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double));
v->freqt_size = m2;
}
g = v->freqt_buff + v->freqt_size + 1;
for (i = 0; i < m2 + 1; i++)
g[i] = 0.0;
for (i = -m1; i <= 0; i++) {
if (0 <= m2)
g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]);
if (1 <= m2)
g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]);
for (j = 2; j <= m2; j++)
g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]);
}
HTS_movem(g, c2, m2 + 1);
}
/* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */
static void HTS_c2ir(const double *c, const int nc, double *h, const int leng)
{
int n, k, upl;
double d;
h[0] = exp(c[0]);
for (n = 1; n < leng; n++) {
d = 0;
upl = (n >= nc) ? nc - 1 : n;
for (k = 1; k <= upl; k++)
d += k * c[k] * h[n - k];
h[n] = d / n;
}
}
/* HTS_b2en: calculate frame energy */
static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a)
{
int i;
double en = 0.0;
double *cep;
double *ir;
if (v->spectrum2en_size < m) {
if (v->spectrum2en_buff != NULL)
HTS_free(v->spectrum2en_buff);
v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double));
v->spectrum2en_size = m;
}
cep = v->spectrum2en_buff + m + 1;
ir = cep + IRLENG;
HTS_b2mc(b, v->spectrum2en_buff, m, a);
HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a);
HTS_c2ir(cep, IRLENG, ir, IRLENG);
for (i = 0; i < IRLENG; i++)
en += ir[i] * ir[i];
return (en);
}
/* HTS_ignorm: inverse gain normalization */
static void HTS_ignorm(double *c1, double *c2, int m, const double g)
{
double k;
if (g != 0.0) {
k = pow(c1[0], g);
for (; m >= 1; m--)
c2[m] = k * c1[m];
c2[0] = (k - 1.0) / g;
} else {
HTS_movem(&c1[1], &c2[1], m);
c2[0] = log(c1[0]);
}
}
/* HTS_gnorm: gain normalization */
static void HTS_gnorm(double *c1, double *c2, int m, const double g)
{
double k;
if (g != 0.0) {
k = 1.0 + g * c1[0];
for (; m >= 1; m--)
c2[m] = c1[m] / k;
c2[0] = pow(k, 1.0 / g);
} else {
HTS_movem(&c1[1], &c2[1], m);
c2[0] = exp(c1[0]);
}
}
/* HTS_lsp2lpc: transform LSP to LPC */
static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m)
{
int i, k, mh1, mh2, flag_odd;
double xx, xf, xff;
double *p, *q;
double *a0, *a1, *a2, *b0, *b1, *b2;
flag_odd = 0;
if (m % 2 == 0)
mh1 = mh2 = m / 2;
else {
mh1 = (m + 1) / 2;
mh2 = (m - 1) / 2;
flag_odd = 1;
}
if (m > v->lsp2lpc_size) {
if (v->lsp2lpc_buff != NULL)
HTS_free(v->lsp2lpc_buff);
v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double));
v->lsp2lpc_size = m;
}
p = v->lsp2lpc_buff + m;
q = p + mh1;
a0 = q + mh2;
a1 = a0 + (mh1 + 1);
a2 = a1 + (mh1 + 1);
b0 = a2 + (mh1 + 1);
b1 = b0 + (mh2 + 1);
b2 = b1 + (mh2 + 1);
HTS_movem(lsp, v->lsp2lpc_buff, m);
for (i = 0; i < mh1 + 1; i++)
a0[i] = 0.0;
for (i = 0; i < mh1 + 1; i++)
a1[i] = 0.0;
for (i = 0; i < mh1 + 1; i++)
a2[i] = 0.0;
for (i = 0; i < mh2 + 1; i++)
b0[i] = 0.0;
for (i = 0; i < mh2 + 1; i++)
b1[i] = 0.0;
for (i = 0; i < mh2 + 1; i++)
b2[i] = 0.0;
/* lsp filter parameters */
for (i = k = 0; i < mh1; i++, k += 2)
p[i] = -2.0 * cos(v->lsp2lpc_buff[k]);
for (i = k = 0; i < mh2; i++, k += 2)
q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]);
/* impulse response of analysis filter */
xx = 1.0;
xf = xff = 0.0;
for (k = 0; k <= m; k++) {
if (flag_odd) {
a0[0] = xx;
b0[0] = xx - xff;
xff = xf;
xf = xx;
} else {
a0[0] = xx + xf;
b0[0] = xx - xf;
xf = xx;
}
for (i = 0; i < mh1; i++) {
a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i];
a2[i] = a1[i];
a1[i] = a0[i];
}
for (i = 0; i < mh2; i++) {
b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i];
b2[i] = b1[i];
b1[i] = b0[i];
}
if (k != 0)
a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]);
xx = 0.0;
}
for (i = m - 1; i >= 0; i--)
a[i + 1] = -a[i];
a[0] = 1.0;
}
/* HTS_gc2gc: generalized cepstral transformation */
static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2)
{
int i, min, k, mk;
double ss1, ss2, cc;
if (m1 > v->gc2gc_size) {
if (v->gc2gc_buff != NULL)
HTS_free(v->gc2gc_buff);
v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double));
v->gc2gc_size = m1;
}
HTS_movem(c1, v->gc2gc_buff, m1 + 1);
c2[0] = v->gc2gc_buff[0];
for (i = 1; i <= m2; i++) {
ss1 = ss2 = 0.0;
min = m1 < i ? m1 : i - 1;
for (k = 1; k <= min; k++) {
mk = i - k;
cc = v->gc2gc_buff[k] * c2[mk];
ss2 += k * cc;
ss1 += mk * cc;
}
if (i <= m1)
c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;
else
c2[i] = (g2 * ss2 - g1 * ss1) / i;
}
}
/* HTS_mgc2mgc: frequency and generalized cepstral transformation */
static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2)
{
double a;
if (a1 == a2) {
HTS_gnorm(c1, c1, m1, g1);
HTS_gc2gc(v, c1, m1, g1, c2, m2, g2);
HTS_ignorm(c2, c2, m2, g2);
} else {
a = (a2 - a1) / (1 - a1 * a2);
HTS_freqt(v, c1, m1, c2, m2, a);
HTS_gnorm(c2, c2, m2, g1);
HTS_gc2gc(v, c2, m2, g1, c2, m2, g2);
HTS_ignorm(c2, c2, m2, g2);
}
}
/* HTS_lsp2mgc: transform LSP to MGC */
static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha)
{
int i;
/* lsp2lpc */
HTS_lsp2lpc(v, lsp + 1, mgc, m);
if (v->use_log_gain)
mgc[0] = exp(lsp[0]);
else
mgc[0] = lsp[0];
/* mgc2mgc */
if (NORMFLG1)
HTS_ignorm(mgc, mgc, m, v->gamma);
else if (MULGFLG1)
mgc[0] = (1.0 - mgc[0]) * ((double) v->stage);
if (MULGFLG1)
for (i = m; i >= 1; i--)
mgc[i] *= -((double) v->stage);
HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma);
if (NORMFLG2)
HTS_gnorm(mgc, mgc, m, v->gamma);
else if (MULGFLG2)
mgc[0] = mgc[0] * v->gamma + 1.0;
if (MULGFLG2)
for (i = m; i >= 1; i--)
mgc[i] *= v->gamma;
}
/* HTS_mglsadff: sub functions for MGLSA filter */
static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d)
{
int i;
double y;
y = d[0] * b[1];
for (i = 1; i < m; i++) {
d[i] += a * (d[i + 1] - d[i - 1]);
y += d[i] * b[i + 1];
}
x -= y;
for (i = m; i > 0; i--)
d[i] = d[i - 1];
d[0] = a * d[0] + (1 - a * a) * x;
return x;
}
/* HTS_mglsadf: sub functions for MGLSA filter */
static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d)
{
int i;
for (i = 0; i < n; i++)
x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]);
return x;
}
/* THS_check_lsp_stability: check LSP stability */
static void HTS_check_lsp_stability(double *lsp, size_t m)
{
size_t i, j;
double tmp;
double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1);
HTS_Boolean find;
for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) {
find = FALSE;
for (j = 1; j < m; j++) {
tmp = lsp[j + 1] - lsp[j];
if (tmp < min) {
lsp[j] -= 0.5 * (min - tmp);
lsp[j + 1] += 0.5 * (min - tmp);
find = TRUE;
}
}
if (lsp[1] < min) {
lsp[1] = min;
find = TRUE;
}
if (lsp[m] > PI - min) {
lsp[m] = PI - min;
find = TRUE;
}
if (find == FALSE)
break;
}
}
/* HTS_lsp2en: calculate frame energy */
static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha)
{
size_t i;
double en = 0.0;
double *buff;
if (v->spectrum2en_size < m) {
if (v->spectrum2en_buff != NULL)
HTS_free(v->spectrum2en_buff);
v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double));
v->spectrum2en_size = m;
}
buff = v->spectrum2en_buff + m + 1;
/* lsp2lpc */
HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m);
if (v->use_log_gain)
v->spectrum2en_buff[0] = exp(lsp[0]);
else
v->spectrum2en_buff[0] = lsp[0];
/* mgc2mgc */
if (NORMFLG1)
HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma);
else if (MULGFLG1)
v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage);
if (MULGFLG1)
for (i = 1; i <= m; i++)
v->spectrum2en_buff[i] *= -((double) v->stage);
HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1);
for (i = 0; i < IRLENG; i++)
en += buff[i] * buff[i];
return en;
}
/* HTS_white_noise: return white noise */
static double HTS_white_noise(HTS_Vocoder * v)
{
if (v->gauss)
return (double) HTS_nrandom(v);
else
return (double) HTS_mseq(v);
}
/* HTS_Vocoder_initialize_excitation: initialize excitation */
static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf)
{
size_t i;
v->pitch_of_curr_point = pitch;
v->pitch_counter = pitch;
v->pitch_inc_per_point = 0.0;
if (nlpf > 0) {
v->excite_buff_size = nlpf;
v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double));
for (i = 0; i < v->excite_buff_size; i++)
v->excite_ring_buff[i] = 0.0;
v->excite_buff_index = 0;
} else {
v->excite_buff_size = 0;
v->excite_ring_buff = NULL;
v->excite_buff_index = 0;
}
}
/* HTS_Vocoder_start_excitation: start excitation of each frame */
static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch)
{
if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) {
v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd;
} else {
v->pitch_inc_per_point = 0.0;
v->pitch_of_curr_point = pitch;
v->pitch_counter = pitch;
}
}
/* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */
static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise)
{
size_t center = (v->excite_buff_size - 1) / 2;
v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise;
}
/* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */
static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf)
{
size_t i;
size_t center = (v->excite_buff_size - 1) / 2;
if (noise != 0.0) {
for (i = 0; i < v->excite_buff_size; i++) {
if (i == center)
v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]);
else
v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]);
}
}
if (pulse != 0.0) {
for (i = 0; i < v->excite_buff_size; i++)
v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i];
}
}
/* HTS_Vocoder_get_excitation: get excitation of each sample */
static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf)
{
double x;
double noise, pulse = 0.0;
if (v->excite_buff_size > 0) {
noise = HTS_white_noise(v);
pulse = 0.0;
if (v->pitch_of_curr_point == 0.0) {
HTS_Vocoder_excite_unvoiced_frame(v, noise);
} else {
v->pitch_counter += 1.0;
if (v->pitch_counter >= v->pitch_of_curr_point) {
pulse = sqrt(v->pitch_of_curr_point);
v->pitch_counter -= v->pitch_of_curr_point;
}
HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf);
v->pitch_of_curr_point += v->pitch_inc_per_point;
}
x = v->excite_ring_buff[v->excite_buff_index];
v->excite_ring_buff[v->excite_buff_index] = 0.0;
v->excite_buff_index++;
if (v->excite_buff_index >= v->excite_buff_size)
v->excite_buff_index = 0;
} else {
if (v->pitch_of_curr_point == 0.0) {
x = HTS_white_noise(v);
} else {
v->pitch_counter += 1.0;
if (v->pitch_counter >= v->pitch_of_curr_point) {
x = sqrt(v->pitch_of_curr_point);
v->pitch_counter -= v->pitch_of_curr_point;
} else {
x = 0.0;
}
v->pitch_of_curr_point += v->pitch_inc_per_point;
}
}
return x;
}
/* HTS_Vocoder_end_excitation: end excitation of each frame */
static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch)
{
v->pitch_of_curr_point = pitch;
}
/* HTS_Vocoder_postfilter_mcp: postfilter for MCP */
static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta)
{
double e1, e2;
int k;
if (beta > 0.0 && m > 1) {
if (v->postfilter_size < m) {
if (v->postfilter_buff != NULL)
HTS_free(v->postfilter_buff);
v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
v->postfilter_size = m;
}
HTS_mc2b(mcp, v->postfilter_buff, m, alpha);
e1 = HTS_b2en(v, v->postfilter_buff, m, alpha);
v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2];
for (k = 2; k <= m; k++)
v->postfilter_buff[k] *= (1.0 + beta);
e2 = HTS_b2en(v, v->postfilter_buff, m, alpha);
v->postfilter_buff[0] += log(e1 / e2) / 2;
HTS_b2mc(v->postfilter_buff, mcp, m, alpha);
}
}
/* HTS_Vocoder_postfilter_lsp: postfilter for LSP */
static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta)
{
double e1, e2;
size_t i;
double d1, d2;
if (beta > 0.0 && m > 1) {
if (v->postfilter_size < m) {
if (v->postfilter_buff != NULL)
HTS_free(v->postfilter_buff);
v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
v->postfilter_size = m;
}
e1 = HTS_lsp2en(v, lsp, m, alpha);
/* postfiltering */
for (i = 0; i <= m; i++) {
if (i > 1 && i < m) {
d1 = beta * (lsp[i + 1] - lsp[i]);
d2 = beta * (lsp[i] - lsp[i - 1]);
v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1));
} else {
v->postfilter_buff[i] = lsp[i];
}
}
HTS_movem(v->postfilter_buff, lsp, m + 1);
e2 = HTS_lsp2en(v, lsp, m, alpha);
if (e1 != e2) {
if (v->use_log_gain)
lsp[0] += 0.5 * log(e1 / e2);
else
lsp[0] *= sqrt(e1 / e2);
}
}
}
/* HTS_Vocoder_initialize: initialize vocoder */
void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod)
{
/* set parameter */
v->is_first = TRUE;
v->stage = stage;
if (stage != 0)
v->gamma = -1.0 / v->stage;
else
v->gamma = 0.0;
v->use_log_gain = use_log_gain;
v->fprd = fperiod;
v->next = SEED;
v->gauss = GAUSS;
v->rate = rate;
v->pitch_of_curr_point = 0.0;
v->pitch_counter = 0.0;
v->pitch_inc_per_point = 0.0;
v->excite_ring_buff = NULL;
v->excite_buff_size = 0;
v->excite_buff_index = 0;
v->sw = 0;
v->x = 0x55555555;
/* init buffer */
v->freqt_buff = NULL;
v->freqt_size = 0;
v->gc2gc_buff = NULL;
v->gc2gc_size = 0;
v->lsp2lpc_buff = NULL;
v->lsp2lpc_size = 0;
v->postfilter_buff = NULL;
v->postfilter_size = 0;
v->spectrum2en_buff = NULL;
v->spectrum2en_size = 0;
if (v->stage == 0) { /* for MCP */
v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
v->cc = v->c + m + 1;
v->cinc = v->cc + m + 1;
v->d1 = v->cinc + m + 1;
} else { /* for LSP */
v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
v->cc = v->c + m + 1;
v->cinc = v->cc + m + 1;
v->d1 = v->cinc + m + 1;
}
}
/* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio)
{
double x;
int i, j;
short xs;
int rawidx = 0;
double p;
/* lf0 -> pitch */
if (lf0 == LZERO)
p = 0.0;
else if (lf0 <= MIN_LF0)
p = v->rate / MIN_F0;
else if (lf0 >= MAX_LF0)
p = v->rate / MAX_F0;
else
p = v->rate / exp(lf0);
/* first time */
if (v->is_first == TRUE) {
HTS_Vocoder_initialize_excitation(v, p, nlpf);
if (v->stage == 0) { /* for MCP */
HTS_mc2b(spectrum, v->c, m, alpha);
} else { /* for LSP */
HTS_movem(spectrum, v->c, m + 1);
HTS_lsp2mgc(v, v->c, v->c, m, alpha);
HTS_mc2b(v->c, v->c, m, alpha);
HTS_gnorm(v->c, v->c, m, v->gamma);
for (i = 1; i <= m; i++)
v->c[i] *= v->gamma;
}
v->is_first = FALSE;
}
HTS_Vocoder_start_excitation(v, p);
if (v->stage == 0) { /* for MCP */
HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
HTS_mc2b(spectrum, v->cc, m, alpha);
for (i = 0; i <= m; i++)
v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
} else { /* for LSP */
HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
HTS_check_lsp_stability(spectrum, m);
HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
HTS_mc2b(v->cc, v->cc, m, alpha);
HTS_gnorm(v->cc, v->cc, m, v->gamma);
for (i = 1; i <= m; i++)
v->cc[i] *= v->gamma;
for (i = 0; i <= m; i++)
v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
}
for (j = 0; j < v->fprd; j++) {
x = HTS_Vocoder_get_excitation(v, lpf);
if (v->stage == 0) { /* for MCP */
if (x != 0.0)
x *= exp(v->c[0]);
x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
} else { /* for LSP */
if (!NGAIN)
x *= v->c[0];
x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
}
x *= volume;
/* output */
if (rawdata)
rawdata[rawidx++] = x;
if (audio) {
if (x > 32767.0)
xs = 32767;
else if (x < -32768.0)
xs = -32768;
else
xs = (short) x;
HTS_Audio_write(audio, xs);
}
for (i = 0; i <= m; i++)
v->c[i] += v->cinc[i];
}
HTS_Vocoder_end_excitation(v, p);
HTS_movem(v->cc, v->c, m + 1);
}
/* HTS_Vocoder_clear: clear vocoder */
void HTS_Vocoder_clear(HTS_Vocoder * v)
{
if (v != NULL) {
/* free buffer */
if (v->freqt_buff != NULL) {
HTS_free(v->freqt_buff);
v->freqt_buff = NULL;
}
v->freqt_size = 0;
if (v->gc2gc_buff != NULL) {
HTS_free(v->gc2gc_buff);
v->gc2gc_buff = NULL;
}
v->gc2gc_size = 0;
if (v->lsp2lpc_buff != NULL) {
HTS_free(v->lsp2lpc_buff);
v->lsp2lpc_buff = NULL;
}
v->lsp2lpc_size = 0;
if (v->postfilter_buff != NULL) {
HTS_free(v->postfilter_buff);
v->postfilter_buff = NULL;
}
v->postfilter_size = 0;
if (v->spectrum2en_buff != NULL) {
HTS_free(v->spectrum2en_buff);
v->spectrum2en_buff = NULL;
}
v->spectrum2en_size = 0;
if (v->c != NULL) {
HTS_free(v->c);
v->c = NULL;
}
v->excite_buff_size = 0;
v->excite_buff_index = 0;
if (v->excite_ring_buff != NULL) {
HTS_free(v->excite_ring_buff);
v->excite_ring_buff = NULL;
}
}
}
HTS_VOCODER_C_END;
#endif /* !HTS_VOCODER_C */