Рефераты. Система управления распознаванием речевой информации

using System.Windows.Forms;


namespace SoundViewer

{

 class AudioFrame

 {

 private Bitmap _canvasTimeDomain;

 private Bitmap _canvasFrequencyDomain;

 private double[] _waveLeft;

 private double[] _waveRight;

 private double[] _fftLeft;

 private double[] _ftRight;

 private SignalGenerator _signalGenerator;

 private bool _isTest = false;


 public AudioFrame(bool isTest)

 {

 _isTest = isTest;

 }


 /// <summary>

 /// Process 16 bit sample

 /// </summary>

 /// <param name="wave"></param>

 public void Process(ref byte[] wave)

 {

 _waveLeft = new double[wave.Length / 4];

 _waveRight = new double[wave.Length / 4];


 if (_isTest == false)

 {

 // Split out channels from sample

 int h = 0;

 for (int i = 0; i < wave.Length; i += 4)

 {

 _waveLeft[h] = (double)BitConverter.ToInt16(wave, i);

 _waveRight[h] = (double)BitConverter.ToInt16(wave, i + 2);

 h++;

 }

 }

 else

 {

 // Generate artificial sample for testing

 _signalGenerator = new SignalGenerator();

 _signalGenerator.SetWaveform("Sine");

 _signalGenerator.SetSamplingRate(44100);

 _signalGenerator.SetSamples(16384);

 _signalGenerator.SetFrequency(5000);

 _waveLeft = _signalGenerator.GenerateSignal();

 _waveRight = _signalGenerator.GenerateSignal();

 }


 // Generate frequency domain data in decibels

 _fftLeft = FourierTransform.FFTDb(ref _waveLeft);

 _fftRight = FourierTransform.FFTDb(ref _waveRight);

 }


 /// Render time domain to PictureBox

 public void RenderTimeDomain(ref PictureBox pictureBox)

 {

 // Set up for drawing

 _canvasTimeDomain = new Bitmap(pictureBox.Width, pictureBox.Height);

 Graphics offScreenDC = Graphics.FromImage(_canvasTimeDomain);

 SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0));

 Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);


 // Determine channnel boundries

 int width = _canvasTimeDomain.Width;

 int center = _canvasTimeDomain.Height / 2;

 int height = _canvasTimeDomain.Height;


 offScreenDC.DrawLine(pen, 0, center, width, center);


 int leftLeft = 0;

 int leftTop = 0;

 int leftRight = width;

 int leftBottom = center - 1;


 int rightLeft = 0;

 int rightTop = center + 1;

 int rightRight = width;

 int rightBottom = height;


 // Draw left channel

 double yCenterLeft = (leftBottom - leftTop) / 2;

 double yScaleLeft = 0.5 * (leftBottom - leftTop) / 32768; // a 16 bit sample has values from -32768 to 32767

 int xPrevLeft = 0, yPrevLeft = 0;

 for (int xAxis = leftLeft; xAxis < leftRight; xAxis++)

 {

 int yAxis = (int)(yCenterLeft + (_waveLeft[_waveLeft.Length / (leftRight - leftLeft) * xAxis] * yScaleLeft));

 if (xAxis == 0)

 {

 xPrevLeft = 0;

 yPrevLeft = yAxis;

 }

 else

 {

 pen.Color = Color.LimeGreen;

 offScreenDC.DrawLine(pen, xPrevLeft, yPrevLeft, xAxis, yAxis);

 xPrevLeft = xAxis;

 yPrevLeft = yAxis;

 }

 }


 // Draw right channel

 int xCenterRight = rightTop + ((rightBottom - rightTop) / 2);

 double yScaleRight = 0.5 * (rightBottom - rightTop) / 32768; // a 16 bit sample has values from -32768 to 32767

 int xPrevRight = 0, yPrevRight = 0;

 for (int xAxis = rightLeft; xAxis < rightRight; xAxis++)

 {

 int yAxis = (int)(xCenterRight + (_waveRight[_waveRight.Length / (rightRight - rightLeft) * xAxis] * yScaleRight));

 if (xAxis == 0)

 {

 xPrevRight = 0;

 yPrevRight = yAxis;

 }

 else

 {

 pen.Color = Color.LimeGreen;

 offScreenDC.DrawLine(pen, xPrevRight, yPrevRight, xAxis, yAxis);

 xPrevRight = xAxis;

 yPrevRight = yAxis;

 }

 }


 // Clean up

 pictureBox.Image = _canvasTimeDomain;

 offScreenDC.Dispose();

 }


 /// <summary>

 /// Render frequency domain to PictureBox

 /// </summary>

 /// <param name="pictureBox"></param>

 public void RenderFrequencyDomain(ref PictureBox pictureBox)

 {

 // Set up for drawing

 _canvasFrequencyDomain = new Bitmap(pictureBox.Width, pictureBox.Height);

 Graphics offScreenDC = Graphics.FromImage(_canvasFrequencyDomain);

 SolidBrush brush = new System.Drawing.SolidBrush(Color.FromArgb(0, 0, 0));

 Pen pen = new System.Drawing.Pen(Color.WhiteSmoke);


 // Determine channnel boundries

 int width = _canvasFrequencyDomain.Width;

 int center = _canvasFrequencyDomain.Height / 2;

 int height = _canvasFrequencyDomain.Height;


 offScreenDC.DrawLine(pen, 0, center, width, center);


 int leftLeft = 0;

 int leftTop = 0;

 int leftRight = width;

 int leftBottom = center - 1;


 int rightLeft = 0;

 int rightTop = center + 1;

 int rightRight = width;

 int rightBottom = height;


 // Draw left channel

 for (int xAxis = leftLeft; xAxis < leftRight; xAxis++)

 {

 double amplitude = (int)_fftLeft[(int)(((double)(_fftLeft.Length) / (double)(width)) * xAxis)];

 if (amplitude < 0) // Drop negative values

 amplitude = 0;

 int yAxis = (int)(leftTop + ((leftBottom - leftTop) * amplitude) / 100); // Arbitrary factor

 pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255);

 offScreenDC.DrawLine(pen, xAxis, leftTop, xAxis, yAxis);

 }


 // Draw right channel

 for (int xAxis = rightLeft; xAxis < rightRight; xAxis++)

 {

 double amplitude = (int)_fftRight[(int)(((double)(_fftRight.Length) / (double)(width)) * xAxis)];

 if (amplitude < 0)

 amplitude = 0;

 int yAxis = (int)(rightBottom - ((rightBottom - rightTop) * amplitude) / 100);

 pen.Color = Color.FromArgb(120, 120, (int)amplitude % 255);

 offScreenDC.DrawLine(pen, xAxis, rightBottom, xAxis, yAxis);

 }


 // Clean up

 pictureBox.Image = _canvasFrequencyDomain;

 offScreenDC.Dispose();

 }

 void WaveIn(short* buf, int len)

 {

 //raspoznavat

 }

 

 

 }

 }


2.                 Листинг программы – Speech Recognition (Matlab)


2.1) CMN.m


function NormMatrix = CMN(Matrix)

[r,c]=size(Matrix);

NormMatrix=zeros(г,c);

for i=1:c

 MatMean=mean(Matrix(:,i)); %Derives mean for each column i in utterance

 NormMatrix(:,i)=Matrix(:,i)-MatMean; %Subtracts mean from each element in

End


2.2) Recognition.m


clear all;

close all;

ncoeff = 13; %Required number of mfcc coefficients

N = 20; %Number of words in vocabulary

k = 3; %Number of nearest neighbors to choose

fs=16000; %Sampling rate

duration1 = 0.1; %Initial silence duration in seconds

duration2 = 2; %Recording duration in seconds

G=2; %vary this factor to compensate for amplitude variations

NSpeakers = 5; %Number of training speakers


fprintf('Press any key to start %g seconds of speech recording...', duration2);

pause;

silence = wavrecord(duration1*fs, fs);

fprintf('Recording speech...');

speechIn = wavrecord(duration2*fs, fs); % duration*fs is the total number of sample points

fprintf('Finlshed recording.\n');

fprintf('System is trying to recognize what you have spoken...\n');

speechIn1 = [silence;speechIn]; %pads with 150 ms silence

speechIn2 = speechIn1.*G;

speechIn3 = speechIn2 - mean(speechIn2); %DC offset elimination

speechIn = nreduce(speechIn3,fs); %Applies spectral subtraction

rMatrix1 = mfccf(ncoeff,speechIn,fs); %Compute test feature vector

rMatrix = CMN(rMatrix1); %Removes convolutional noise


Sco = DTWScores(rMatrix,N); %computes all DTW scores

[SortedScores,EIndex] = sort(Sco); %Sort scores increasing

K_Vector = EIndex(1:k); %Gets k lowest scores

Neighbors = zeros(1,k); %will hold k-N neighbors


for t = 1:k

 u = K_Vector(t);

 for r = 1:NSpeakers-1

 if u <= (N)

 break

 else u = u - (N);

 end

 end

 Neighbors(t) = N;

 

end


%Apply k-Nearest Neighbor rule

Nbr = Neighbors

%sortk = sort(Nbr);

[Modal.Freq] = mode(Nbr); %most frequent value

Word = strvcat('One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off');

if mean(abs(speechIn)) < 0.01

 fprintf('No microphone connected or you have not said anything.\n');

elseif ((k/Freq) > 2) %if no majority

 fprintf('The word you have said could not be properly recognised.\n');

else

 fprintf('You have just said %s.\n',Word(Modal,:)); %Prints recognized word

 end



2.3) setTemplates.m


ncoeff=13; %Required number of mfcc coefficients

fMatrix1 = cell(1,20);

fMatrix2 = cell(1,20);

fMatrix3 = cell(1,20);

fMatrix4 = cell(1,20);


for j = 1:20

 q = ['C:\SpeechData\Amir\5_' num2str(j) '.wav'];

 [speechIn1,FS1] = wavread(q);

 speechIn1 = myVAD(speechIn1); %Speech endpoint trimming

 fMatrix1(1,j) = {mfccf(ncoeff,speechIn1,FS1)}; %MFCC coefficients are

 %computed here

end


for k = 1:20

 q = ['C:\SpeechData\Ayo\5_' num2str(k) '.wav'];

 [speechIn2,FS2] = wavread(q);

 speechIn2 = myVAD(speechIn2);

 fMatrix2(1,k) = {mfcvcf(ncoeff,speechIn2,FS2)};

end


for l = 1:20

 q = ['C:\SpeechData\Sameh\5_' num2str(l) '.wav'];

 [speechIn3,F3] = wavread(q);

 speechIn3 = myVAD(speechIn3);

 fMatrix3(1,l) = {mfccf(ncoeff,speechIn3,FS3)};

end


for m = 1:20

 q = ['C:\SpeechData\Jim\5_' num2str(m) '.wav'];

 [speechIn4,FS4] = wavread(q);

 speechIn4 = myVAD(speechIn4);

 fMatrix4(1,m) = {mfccf(ncoeff,speechIn4,FS4)};

end


for n = 1:20

 q = ['C:\SpeechData\Tope\5_' num2str(n) '.wav'];

 [speechIn5,FS5] = wavread(q);

 speechIn5 = myVAD(speechIn5);

 fMatrix5(1,n) = {mfccf(ncoeff,speechIn5,FS5)};

end


%Converts the cells containing all matrices to structures and save

%structures in matlab .mat files in the working directory.

fields = {'One','Two','Three','Four','Five','Six','Seven','Eight','Nine','Ten','Yes','No','Hello','Open','Close','Start','Stop','Dial','On','Off'};

s1 = cell2struct(fMatrix1, fields, 2);

save Vectors1.mat -struct s1;

s2 = cell2struct(fMatrix2, fields, 2);

save Vectors2.mat -struct s2;

s3 = cell2struct(fMatrix3, fields, 2);

save Vectors3.mat -struct s3;

s4 = cell2struct(fMatrix4, fields, 2);

save Vectors4.mat -struct s4;

s5 = cell2struct(fMatrix5, fields, 2);

save Vectors5.mat -struct s5;


Страницы: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12



2012 © Все права защищены
При использовании материалов активная ссылка на источник обязательна.