Edinburgh Speech Tools 2.4-release
spectrogram.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1994,1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor */
34/* Date : December 96 */
35/*-----------------------------------------------------------------------*/
36/* Spectrogram Generation */
37/* */
38/*=======================================================================*/
39#include <cmath>
40#include <climits>
41#include <cfloat> /* needed for FLT_MAX */
42#include "EST_error.h"
43#include "EST_Track.h"
44#include "EST_Wave.h"
45#include "sigpr/EST_Window.h"
46#include "EST_Option.h"
47#include "sigpr/EST_fft.h"
48#include "sigpr/EST_spectrogram.h"
49#include "sigpr/EST_misc_sigpr.h"
50
51
52void make_spectrogram(EST_Wave &sig, EST_Track &sp, EST_Features &op)
53{
54 EST_Wave psig;
55
56 EST_pre_emphasis(sig, psig, op.F("preemph"));
57
58 // calculate raw spectrogram
59 raw_spectrogram(sp, psig, op.F("frame_length"), op.F("frame_shift"),
60 op.I("frame_order"), op.present("slow_fft"));
61
62 if (op.present("raw"))
63 {
64 cout << "no scaling\n";
65 return;
66 }
67 // coerce the values so as to emphasis important features
68
69 if (op.present("sp_range") || op.present("sp_wcut") || op.present("sp_bcut"))
70 {
71 if (!op.present("sp_range"))
72 op.set("sp_range", 1.0);
73
74 if (!op.present("sp_wcut"))
75 op.set("sp_wcut", 1.0);
76
77 if (!op.present("sp_bcut"))
78 op.set("sp_bcut", 0.0);
79 scale_spectrogram(sp, op.F("sp_range"),op.F("sp_wcut"),op.F("sp_bcut"));
80 }
81}
82
83void scale_spectrogram(EST_Track &sp, float range, float wcut, float bcut)
84{
85 float max, min, scale, v;
86 int i, j;
87
88 max = -FLT_MIN;
89 min = FLT_MAX;
90
91 // find min and max values
92 for (i = 0; i < sp.num_frames(); ++i)
93 for (j = 0; j < sp.num_channels(); ++j)
94 {
95 float vv = sp.a_no_check(i, j);
96
97 if (vv > max)
98 max = vv;
99 if (vv < min)
100 min = vv;
101 }
102 scale = (max - min);
103
104 // for every value:
105 // 1. Effectively scale in range 0 to 1
106 // 2. Impose white and black cut offs
107 // 3. Rescale to 0 and 1
108 // 4. scale to fit in "range"
109 // this can obviously be done more efficiently
110
111 float mag = (float)range / (float)(bcut - wcut);
112 for (i = 0; i < sp.num_frames(); ++i)
113 for (j = 0; j < sp.num_channels(); ++j)
114 {
115 v = (((sp.a_no_check(i, j) - min) / scale) - wcut) * mag;
116 if (v > range) v = range;
117 if (v < 0.0) v = 0.0;
118 sp.a_no_check(i, j) = v;
119 }
120}
121
122void raw_spectrogram(EST_Track &sp, EST_Wave &sig,
123 float length,
124 float shift,
125 int order,
126 bool slow)
127{
128 int frame_length = (int) (length * (float) sig.sample_rate() +0.5);
129 int frame_shift = (int) (shift * (float) sig.sample_rate() +0.5);
130
131 EST_WindowFunc *make_window = EST_Window::creator("hamming");
132
133 // sanity check, we can't analyse more signal than order allows.
134 if (frame_length > order)
135 {
136 EST_warning("frame_length reduced to %f (%d samples) to fit order\n",
137 (float)order/(float) sig.sample_rate(), order);
138 frame_length=order;
139 }
140
141 // enough frames to cover the entire signal
142 int num_frames= (int)ceil(sig.num_samples()/(float)frame_shift);
143
144 // spectrogram gets order/2 powers, the moduli of order/2
145 // complex numbers
146 sp.resize(num_frames, order/2, FALSE);
147
148 EST_FVector real(order);
149 EST_FVector imag(order);
150
151 // create the window shape
152 EST_TBuffer<float> window_vals(frame_length);
153 make_window(frame_length, window_vals,-1);
154
155 for (int k = 0 ; k < num_frames ; k++)
156 {
157 int pos = frame_shift * k;
158 int window_start = pos - frame_length/2;
159
160 real.empty();
161
162 // imag not used in old FFT code
163 if (slow)
164 imag.empty();
165
167 window_vals,
168 window_start,
169 frame_length,
170 real, FALSE);
171
172 int state = slow?power_spectrum_slow(real, imag):power_spectrum(real, imag);
173 if (state != 0)
174 {
175 fprintf(stderr, "FFT Failed for frame %d\n", k);
176 for (int i = 0; i < order /2; ++i)
177 sp.a_no_check(k, i) = 0;
178 }
179 else
180 sp.copy_frame_in(k, real);
181 }
182 sp.fill_time(shift);
183}
184
void set(const EST_String &name, int ival)
Definition: EST_Features.h:185
const float F(const EST_String &path) const
Definition: EST_Features.h:135
int present(const EST_String &name) const
const int I(const EST_String &path) const
Definition: EST_Features.h:146
void copy_frame_in(int n, const float *buf, int offset=0, int num=EST_ALL)
Definition: EST_Track.h:345
int num_channels() const
return number of channels in track
Definition: EST_Track.h:656
int num_frames() const
return number of frames in track
Definition: EST_Track.h:650
float & a_no_check(int i, int c=0)
Definition: EST_Track.h:419
void resize(int num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:211
void fill_time(float t, int start=1)
Definition: EST_Track.cc:786
int sample_rate() const
return the sampling rate (frequency)
Definition: EST_Wave.h:147
int num_samples() const
return the number of samples in the waveform
Definition: EST_Wave.h:143
static void window_signal(const EST_Wave &sig, EST_WindowFunc *make_window, int start, int size, EST_TBuffer< float > &frame)
Definition: EST_Window.cc:275
static Func * creator(const char *name, bool report_error=false)
Return the creation function for the given window type.
Definition: EST_Window.cc:216