Advertisement

Having problems with neural network training C#

Started by November 19, 2010 08:59 AM
22 comments, last by sjaakiejj 13 years, 11 months ago
Hi everyone!
I'm new and I've a problem with a neural network I'm working on.
I'm writing it in c#
I think the problem is in the "train" method.
Could anyone help me?
I'm going to paste here my whole code so you can read it.
Thanks in advice!
(Sorry for the bad english)

public class NeuralNetwork    {        /// <summary>        /// Create a new NeuralNetwork object        /// </summary>        /// <param name="inputs">Number of input nodes</param>        /// <param name="outputs">Number of output nodes</param>        /// <param name="hiddens">Number of hidden nodes</param>        /// <param name="Beta">Indicates the learning rate, from 0.0 to 1.0, best is 0.5</param>        public NeuralNetwork(int inputs,int outputs,int hiddens,float Beta)//Constructor/initialization        {            X = new float[inputs];            Y = new float[outputs,2];            W1 = new float[inputs * hiddens]; // first serie of synapses            H1 = new float[hiddens,2];            W2 = new float[hiddens * hiddens]; // second serie of synapses            H2 = new float[hiddens,2];            W3 = new float[hiddens * hiddens]; // third serie of synapses            H3 = new float[hiddens,2];            W4 = new float[hiddens * outputs]; // third serie of synapses            errors = new float[(outputs*hiddens)*2];            B = Beta;            Eta = (inputs / outputs) * 0.5f;// quickprop/rprop            reset(false);        }        //Variable Declaration        private float[] X; //Input vector        private float[,] Y; //Output vector        private float[,] H1; //Hidden vector        private float[,] H2; //Hidden vector        private float[,] H3; //Hidden vector        private float[] W1; //Synapse vector/s 1         private float[] W2; //Synapse vector/s 2  | =Four hidden levels        private float[] W3; //Synapse vector/s 3  /        private float[] W4; //Synapse vector/s 4 /        public float[] errors;        private float B;        private float Eta;        /// <summary>        /// Reset the neural network so it can be trained again        /// </summary>        /// <param name="kill">Reset to 0 all the threshold</param>        public void reset(bool kill)        {            int value = 1;            if (kill) value = 0;            //For the initialization we will use the "Equal Distribution" random method, from -0.5 to 0.5?            //first serie            for (int a = 0; a < W1.Length; a++)            {                W1[a] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //hiddens 1            for (int a = 0; a < H1.Length/2; a++)            {                H1[a, 1] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //second serie            for (int a = 0; a < W2.Length; a++)            {                W2[a] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //hiddens 2            for (int a = 0; a < H2.Length/2; a++)            {                H2[a,1] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //third serie            for (int a = 0; a < W3.Length; a++)            {                W3[a] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //hiddens 2             for (int a = 0; a < H3.Length / 2; a++)            {                H3[a,1] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //third serie            for (int a = 0; a < W4.Length; a++)            {                W4[a] = Game1.RandomBetween(0.0f, 0.5f) * value;            }            //outputs            for (int a = 0; a < Y.Length/2; a++)            {                Y[a, 1] = Game1.RandomBetween(0.0f, 0.5f) * value;            }        }        //Activation function        private float sigma(float val)        {            return (float)(Math.Tanh(B * val));        }        //Training        /// <summary>        /// Train the NeuralNetwork with the specified Training Set        /// </summary>        /// <param name="TS">used Training Set</param>        public int train(float[,,] TS)        {            /*             * The training set should be built so:             * float[proofs,test level,value]             * so for example             * we should execute a test aiming to get the NeuralNetwork pointing to the maximum number among four             * TS=new float[100,2,4]             * so 100 proofs             * level 0 contains input data             * level 1 contains optimal output             * with 4 input and output            */            int returner = 0;            //input initialization            float[] inputs = new float[Y.Length/2];            //training sets cycle            for (int a = 0; a < TS.GetLength(0); a++)            {                for (int b = 0; b < Y.Length/2; b++)                {                    inputs = TS[a, 0, b];                }                //update..                returner = update(inputs);                //########### BACK-PROPAGATION ###########                //calculate errors                for (int c = 0; c < Y.Length / 2; c++)                {                    errors[c] = TS[a, 1, c] - Y[c, 0];                }                //last level (Y)                for (int c = 0; c < Y.Length/2; c++)                {                    Y[c, 1] = (float)(errors[c] * B * (1 - Math.Pow(Y[c, 0], 2)));                }                //########### I THINK THAT ERRORS OCCURS HERE ###########                //level H3                for (int c = 0; c < H3.Length / 2; c++)                {                    float sum = 0;                    //first sum all the values                    for (int d = 0; d < Y.Length/2; d++)                    {                        sum += W4[(d * (H3.Length / 2)) + c] * Y[d,1];                    }                    //then change the values                    for (int d = 0; d < Y.Length/2; d++)                    {                        H3[c, 1] = (float)(B * (1 - Math.Pow(Y[d,0], 2)) * sum);                    }                }                //level H2                for (int c = 0; c < H2.Length / 2; c++)                {                    float sum = 0;                    //first sum all the values                    for (int d = 0; d < H3.Length / 2; d++)                    {                        sum += W3[(d * (H2.Length / 2)) + c] * H3[d, 1];                    }                    //then change the values                    for (int d = 0; d < H3.Length / 2; d++)                    {                        H2[c, 1] = (float)(B * (1 - Math.Pow(H3[d, 0], 2)) * sum);                    }                }                //level H1                for (int c = 0; c < H1.Length / 2; c++)                {                    float sum = 0;                    //first sum all the values                    for (int d = 0; d < H2.Length / 2; d++)                    {                        sum += W2[(d * (H1.Length / 2)) + c] * H2[d, 1];                    }                    //then change the values                    for (int d = 0; d < H2.Length / 2; d++)                    {                        H1[c, 1] = (float)(B * (1 - Math.Pow(H2[d, 0], 2)) * sum);                    }                }                float val = 0;                //level W4                for (int c = 0; c < H3.Length / 2; c++)                {                    for (int d = 0; d < Y.Length/2; d++)                    {                        val += H3[c, 1] * Y[d,1];                    }                }                for (int c = 0; c < H3.Length / 2; c++)                {                    for (int d = 0; d < Y.Length/2; d++)                    {                        W4[(c * (Y.Length/2)) + d] += -Eta * val;                    }                }                //level W3                val = 0;                for (int c = 0; c < H2.Length / 2; c++)                {                    for (int d = 0; d < H3.Length / 2; d++)                    {                        val += H2[c, 1] * H3[d, 1];                    }                }                for (int c = 0; c < H2.Length / 2; c++)                {                    for (int d = 0; d < H3.Length / 2; d++)                    {                        W3[(c * (H3.Length / 2)) + d] += -Eta * val;                    }                }                //level W2                val = 0;                for (int c = 0; c < H1.Length / 2; c++)                {                    for (int d = 0; d < H2.Length / 2; d++)                    {                        val += H1[c, 1] * H2[d, 1];                    }                }                for (int c = 0; c < H1.Length / 2; c++)                {                    for (int d = 0; d < H2.Length / 2; d++)                    {                        W2[(c * (H2.Length / 2)) + d] += -Eta * val;                    }                }                //level W1                val = 0;                for (int c = 0; c < X.Length; c++)                {                    for (int d = 0; d < H1.Length / 2; d++)                    {                        val += X[c] * H1[d, 1];                    }                }                for (int c = 0; c < X.Length; c++)                {                    for (int d = 0; d < H1.Length / 2; d++)                    {                        W1[(c * (H1.Length / 2)) + d] += -Eta * val;                    }                }            }            return returner;        }        //Give the input        /// <summary>        /// Update the NN by giving it the inputs.        /// </summary>        /// <param name="inputs">Values to pass.</param>        /// <param name="training">Is this a training session?.</param>        /// <param name="expected">Only if training is true: the array value you expect from the input.</param>        /// <returns>Returns the index of one of the outputs.</returns>        public int update(float[] inputs) // inputs->X        {            X = inputs;            //first transmission            for (int a = 0; a < H1.Length / 2; a++)            {                H1[a,0] = 0;                for (int b = 0; b < X.Length; b++)                {                    H1[a,0] += X * W1[(a * X.Length) + b];                }                H1[a, 0] = sigma(H1[a, 0] - H1[a, 1]);            }            //ok            //second transimssion            for (int a = 0; a < H2.Length / 2; a++)            {                H2[a,0] = 0;                for (int b = 0; b < H1.Length/2; b++)                {                    H2[a,0] += H1[b,0] * W2[(a * H1.Length / 2) + b];                }                H2[a, 0] = sigma(H2[a, 0] - H2[a, 1]);            }            //ok            //third transimssion            for (int a = 0; a < H3.Length / 2; a++)            {                H3[a,0] = 0;                for (int b = 0; b < H2.Length / 2; b++)                {                    H3[a,0] += H2[b,0] * W3[(a * H2.Length / 2) + b];                }                H3[a, 0] = sigma(H3[a, 0] - H3[a, 1]);            }            //last sigma, output results            float max = 0;            int highest = 0;            for (int a = 0; a < Y.Length/2; a++)            {                Y[a,0] = 0;                for (int b = 0; b < H3.Length/2; b++)                {                    Y[a,0] += H3[b,0] * W4[(a * H3.Length / 2) + b];                }                Y[a,0] = sigma(Y[a,0]-Y[a,1]);                //also find the highest                if (Y[a,0] > max)                { max = Y[a,0]; highest = a; }            }            //done            return highest;        }}


[edit: added source tags]

[Edited by - InnocuousFox on November 19, 2010 1:10:28 PM]
What exactly is the problem?
Advertisement
The back-propagation (train method) seems to not work correctly, I've made lot of attempts but it seems that the values of the synapsis (W arrays) and of the levels (input, hidden, output) don't update in a correct way.
Is the code understandable?
I've done many attempts but the neural network seems to change the values of all the level in a wrong way, I've worked on this with my computer science teacher but I can't find the errors..
So from what I can understand, your code is a 7 unit Neural Network. I implemented one of these in Matlab, not in C#, but what I can recommend you to do is to initialize and print off the set of random weights, and then print off the deltas, activation levels and final results from the first two features that you try to train it on.

Then based on the weights and input features, you can mathematically work out what the value should be at each step. Do that, find the step where it goes wrong, and you'll probably identify the issue. Also, make sure your features are normalized - a Neural Network doesn't perform very well on large values.
Ok I'll try moving as you suggested me!
Advertisement
I've changed the code because there was a terrible mistake, now the it is this:

public class NeuralNetwork
{
/// <summary>
/// Create a new NeuralNetwork object
/// </summary>
/// <param name="inputs">Number of input nodes</param>
/// <param name="outputs">Number of output nodes</param>
/// <param name="hiddens">Number of hidden nodes</param>
/// <param name="Beta">Indicates the learning rate, from 0.0 to 1.0, best is 0.5</param>
public NeuralNetwork(int inputs,int outputs,int hiddens,float Beta)//Constructor/initialization
{
X = new float[inputs];
Y = new float[outputs,2];
W1 = new float[inputs * hiddens]; // first serie of synapses
H1 = new float[hiddens,2];
W2 = new float[hiddens * hiddens]; // second serie of synapses
H2 = new float[hiddens,2];
W3 = new float[hiddens * hiddens]; // third serie of synapses
H3 = new float[hiddens,2];
W4 = new float[hiddens * outputs]; // third serie of synapses
errors = new float[(outputs*hiddens)*2];
B = Beta;
Eta = (inputs / outputs) * 0.5f;// quickprop/rprop
reset(false);
}
//Variable Declaration
private float[] X; //Input vector
private float[,] Y; //Output vector
private float[,] H1; //Hidden vector
private float[,] H2; //Hidden vector
private float[,] H3; //Hidden vector
private float[] W1; //Synapse vector/s 1 private float[] W2; //Synapse vector/s 2 | =Four hidden levels
private float[] W3; //Synapse vector/s 3 /
private float[] W4; //Synapse vector/s 4 /
public float[] errors;
private float B;
private float Eta;

/// <summary>
/// Reset the neural network so it can be trained again
/// </summary>
/// <param name="kill">Reset to 0 all the threshold</param>
public void reset(bool kill)
{
int value = 1;
if (kill) value = 0;
//For the initialization we will use the "Equal Distribution" random method, from -0.5 to 0.5?
//first serie
for (int a = 0; a < W1.Length; a++)
{
W1[a] = 0.5f * value;
}
//hiddens 1
for (int a = 0; a < H1.Length / 2; a++)
{
H1[a, 1] = 0.5f * value;
}
//second serie
for (int a = 0; a < W2.Length; a++)
{
W2[a] = 0.5f * value;
}
//hiddens 2
for (int a = 0; a < H2.Length / 2; a++)
{
H2[a, 1] = 0.5f * value;
}
//third serie
for (int a = 0; a < W3.Length; a++)
{
W3[a] = 0.5f * value;
}
//hiddens 2
for (int a = 0; a < H3.Length / 2; a++)
{
H3[a, 1] = 0.5f * value;
}
//third serie
for (int a = 0; a < W4.Length; a++)
{
W4[a] = 0.5f * value;
}
//outputs
for (int a = 0; a < Y.Length / 2; a++)
{
Y[a, 1] = 0.5f * value;
}
}

//Activation function
private float sigma(float val)
{
return (float)(Math.Tanh(B * val));
}

//Training
/// <summary>
/// Train the NeuralNetwork with the specified Training Set
/// </summary>
/// <param name="TS">used Training Set</param>
public void train(float[,,] TS,StreamWriter writer)
{
/*
* The training set should be built so:
* float[proofs,test level,value]
* so for example
* we should execute a test aiming to get the NeuralNetwork pointing to the maximum number among four
* TS=new float[100,2,4]
* so 100 proofs
* level 0 contains input data
* level 1 contains optimal output
* with 4 input and output
*/
int returner = 0;
//input initialization
float[] inputs = new float[Y.Length/2];
//training sets cycle
for (int a = 0; a < TS.GetLength(0); a++)
{
for (int b = 0; b < Y.Length/2; b++)
{
inputs = TS[a, 0, b];
}
//update..
returner = update(inputs);
writer.WriteLine(a + " test: " + TS[a, 0, 0] + " | " + TS[a, 0, 1] + " | " + TS[a, 0, 2] + " | " + TS[a, 0, 3] + " | " + " -> " + returner);
//########### BACK-PROPAGATION ###########
//calculate errors using optimal output
for (int c = 0; c < Y.Length / 2; c++)
{
errors[c] = TS[a, 1, c] - Y[c, 0];
}
//last level (Y)
for (int c = 0; c < Y.Length/2; c++)
{
//Y[c, 1] = sigma(Y[c, 1]) * errors[c];
Y[c, 1] = (float)(errors[c] * B * (1-Math.Pow(Y[c, 0], 2)));
}
//########### I THINK THAT ERRORS OCCUR HERE ###########
//level H3
for (int c = 0; c < H3.Length / 2; c++)
{
float sum = 0;
//first sum all the values
for (int d = 0; d < Y.Length/2; d++)
{
sum += W4[(d * (H3.Length / 2)) + c] * Y[d,1];

}
//then change the values
for (int d = 0; d < Y.Length/2; d++)
{
//error*Beta*(1-Y^2)*sum
H3[c, 1] = (float)(B * (1-Math.Pow(Y[d,0], 2)) * sum);
}
}
//level H2
for (int c = 0; c < H2.Length / 2; c++)
{
float sum = 0;
//first sum all the values
for (int d = 0; d < H3.Length / 2; d++)
{
sum += W3[(d * (H2.Length / 2)) + c] * H3[d, 1];

}
//then change the values
for (int d = 0; d < H3.Length / 2; d++)
{
H2[c, 1] = (float)(B * (1-Math.Pow(H3[d, 0], 2)) * sum);
}
}
//level H1
for (int c = 0; c < H1.Length / 2; c++)
{
float sum = 0;
//first sum all the values
for (int d = 0; d < H2.Length / 2; d++)
{
sum += W2[(d * (H1.Length / 2)) + c] * H2[d, 1];

}
//then change the values
for (int d = 0; d < H2.Length / 2; d++)
{
H1[c, 1] = (float)(B * (1-Math.Pow(H2[d, 0], 2)) * sum);
}
}
float val = 0;
//Now change the synapsis
//level W4
for (int c = 0; c < H3.Length / 2; c++)
{
for (int d = 0; d < Y.Length/2; d++)
{
val += H3[c, 1] * Y[d,1];
}
}
for (int c = 0; c < H3.Length / 2; c++)
{
for (int d = 0; d < Y.Length/2; d++)
{
W4[(c * (Y.Length/2)) + d] += -Eta * val;
}
}
//level W3
val = 0;
for (int c = 0; c < H2.Length / 2; c++)
{
for (int d = 0; d < H3.Length / 2; d++)
{
val += H2[c, 1] * H3[d, 1];
}
}
for (int c = 0; c < H2.Length / 2; c++)
{
for (int d = 0; d < H3.Length / 2; d++)
{
W3[(c * (H3.Length / 2)) + d] += -Eta * val;
}
}
//level W2
val = 0;
for (int c = 0; c < H1.Length / 2; c++)
{
for (int d = 0; d < H2.Length / 2; d++)
{
val += H1[c, 1] * H2[d, 1];
}
}
for (int c = 0; c < H1.Length / 2; c++)
{
for (int d = 0; d < H2.Length / 2; d++)
{
W2[(c * (H2.Length / 2)) + d] += -Eta * val;
}
}
//level W1
val = 0;
for (int c = 0; c < X.Length; c++)
{
for (int d = 0; d < H1.Length / 2; d++)
{
val += X[c] * H1[d, 1];
}
}
for (int c = 0; c < X.Length; c++)
{
for (int d = 0; d < H1.Length / 2; d++)
{
W1[(c * (H1.Length / 2)) + d] += -Eta * val;
}
}
}
return;
}
//Give the input
/// <summary>
/// Update the NN by giving it the inputs.
/// </summary>
/// <param name="inputs">Values to pass.</param>
/// <param name="training">Is this a training session?.</param>
/// <param name="expected">Only if training is true: the array value you expect from the input.</param>
/// <returns>Returns the index of one of the outputs.</returns>
public int update(float[] inputs) // inputs->X
{
X = inputs;
//first transmission
for (int a = 0; a < H1.Length / 2; a++)
{
H1[a,0] = 0;
for (int b = 0; b < X.Length; b++)
{
H1[a,0] += X * W1[(a * X.Length) + b];
}
H1[a, 0] = sigma(H1[a, 0] - H1[a, 1]);
}
//ok
//second transimssion
for (int a = 0; a < H2.Length / 2; a++)
{
H2[a,0] = 0;
for (int b = 0; b < H1.Length/2; b++)
{
H2[a,0] += H1[b,0] * W2[(a * H1.Length / 2) + b];
}
H2[a, 0] = sigma(H2[a, 0] - H2[a, 1]);
}
//ok
//third transimssion
for (int a = 0; a < H3.Length / 2; a++)
{
H3[a,0] = 0;
for (int b = 0; b < H2.Length / 2; b++)
{
H3[a,0] += H2[b,0] * W3[(a * H2.Length / 2) + b];
}
H3[a, 0] = sigma(H3[a, 0] - H3[a, 1]);
}
//last sigma, output results
float max = 0;
int highest = 0;
for (int a = 0; a < Y.Length/2; a++)
{
Y[a,0] = 0;
for (int b = 0; b < H3.Length/2; b++)
{
Y[a,0] += H3[b,0] * W4[(a * H3.Length / 2) + b];
}
Y[a,0] = sigma(Y[a,0]-Y[a,1]);
//also find the highest
if (Y[a,0] > max)
{ max = Y[a,0]; highest = a; }

}
//done
return highest;
}
}

I've tried passing to the train method a training set composed of 4 random values and of 4 optimal outputs, and they were set like this:
TS 1
input 2 3 6 1
optimal output 2 (because from the first position (0-starting) six is the 2)

And I've obtained an output like this:

REMOVED

[Edited by - Metalsoul on December 8, 2010 2:06:26 PM]
Can you write (without C# code) how you calculate error for last layer and hidden layers? And how weights are corrected?
When I look at your code I notice two things -

1. It's really quite big, and difficult to read/understand
2. The way you calculate the errors/deltas seems to be incorrect. For updating the weights in a neural network, you have to update the weights according to a delta. The delta is calculated using the activation levels and the current weights.. I might just be looking at your code the wrong way though, but compare your method with the one on the website below.
http://www-speech.sri.com/people/anand/771/html/node37.html

I will post down here the formulas I used:
(I've translated them from italian, maybe there could be some translation errors, sorry :( )






This topic is closed to new replies.

Advertisement