Advertisement

(Still) struggling with the back-propagation part in my ANN

Started by January 20, 2006 12:38 PM
2 comments, last by GameDev.net 18 years, 10 months ago
Im now up in the fifth rewrite. Hehe, I truly rock dont I? [smile] Anyways, I think I got a better structure for the net now, easier to understand. However the training part doesnt seem to be working as intended. Ive tried everything, stepping through with debugger, doing cout's after every line, taking breaks and coming back to the code etc. Nothing seems to be working. Anyone care to take a look? Its not a long code. Heres how it works, x(l,n) returns whatever neuron n in layer l fired, n == 0 is the bias, 1. The function w(l,f,n) returns the weight assigned to the input f for neuron n on layer l. If f == 0 its the bias weight. I start every training run with calling the function Process(), the only thing it does is calculating the x (outputs) of every neuron in the net so that when I train, I train on the new outputs calculated from the new weights. Heres the code; although the only part relevant is Train(), I left the other stuff in (except for some logging) for reference if you couldnt understand my description above. :P

#include "NeuralNet.h"

NeuralNet::NeuralNet(int _Inputs, int _NeuronsInHidden, int _Outputs) {
	Inputs = _Inputs; 
	NeuronsInHidden = _NeuronsInHidden; 
	Outputs = _Outputs;

	srand( (unsigned)time( NULL ) );	// seed the randomizer

	W.resize(2);	// setup weights
	W[0].resize(((Inputs+1) * NeuronsInHidden), (float)((rand()%2000)-1000)/1000);	// hidden layer
	W[1].resize(((NeuronsInHidden+1) * Outputs), (float)((rand()%2000)-1000)/1000);	// output layer

	X.resize(3);
	X[0].resize(Inputs+1, 0);
	X[1].resize(NeuronsInHidden+1, 0);
	X[2].resize(Outputs, 0);
}

NeuralNet::~NeuralNet() {
}

float NeuralNet::w(int l, int f, int n) {	// f = 0 is bias weight
	if(l == 0)	// hidden layer
		return W[0][((Inputs+1) * n) + f];
	if(l == 1)	// output layer
		return W[1][((NeuronsInHidden+1) * n) + f];
	else if(l != 0 && l != 1) {
		cout << "Error: Bad layer number: " << l << endl;
		return 0;
	}
}

void NeuralNet::SetW(int l, int f, int n, float NewWeight) {
	if(l == 0)	// hidden layer
		W[0][((Inputs+1) * n) + f] = NewWeight;
	if(l == 1)	// output layer
		W[1][((NeuronsInHidden+1) * n) + f] = NewWeight;
	else if(l != 0 && l != 1) {
		cout << "Error: Bad layer number: " << l << endl;
		return;
	}
}

float NeuralNet::x(int l, int n) {	// n = 0 is bias (1)
	return X[l][n];
}

void NeuralNet::Train(float i1, float i2, float d) {
	// first, process so we have the correct values stored inside the neural net
	Process(i1, i2);

	vector<float> HiddenDelta;
	HiddenDelta.resize(NeuronsInHidden);
	vector<float> OutputDelta;
	OutputDelta.resize(Outputs);

	// output layer delta
	// d3(1) = x3(1)(1 - x3(1))(d - x3(1))
	for(int n = 0; n < Outputs; n++) {
		OutputDelta[n] = x(2,n) * (1 - x(2,n)) * (d - x(2,n));
		//cout << "OutputDelta["<<n<<"] = " << x(2,n) << " * " << (1 - x(2,n)) << " * " << (d - x(2,n)) << endl;
		//cout << OutputDelta[n] << endl;
	}

	// hidden layer delta
	// formula: d2(1) = x2(1)(1 - x2(1))w3(1,1)d3(1)
	for(int n = 0; n < NeuronsInHidden; n++) {
		HiddenDelta[n] = x(1,n+1) * (1 - x(1,n+1)) * w(1,n+1,0) * OutputDelta[0];
		//cout << "HiddenDelta["<<n<<"] = " << x(1,n+1) <<" * " << (1 - x(1,n+1)) <<" * " << w(1,n+1,0) << " * " << OutputDelta[0] << endl;
		//cout << HiddenDelta[n] << endl;
	}

	// deltas calculated, now alter the weights
	// formula: w2(0,1) = h*x1(0)*d2(1)
	for(int n = 0; n < Outputs; n++) {
		for(int i = 0; i < NeuronsInHidden+1; i++) {
			SetW(1,i,n, w(1,i,n)+(LEARN_RATE * x(1,i) * OutputDelta[0]));
			//cout << w(1,i,n)+(LEARN_RATE * x(1,i) * OutputDelta[0]) << endl;
		}
	}

	for(int n = 0; n < NeuronsInHidden; n++) {
		for(int i = 0; i < Inputs+1; i++) {
			SetW(0,i,n, w(0,i,n)+(LEARN_RATE * x(0,i) * HiddenDelta[n]));
			//cout << w(0,i,n)+(LEARN_RATE * x(0,i) * HiddenDelta[n]) << endl;
		}
	}

	//cout << "--------\n";
}

float NeuralNet::Process(float i1, float i2) {
	// store biases
	X[0][0] = 1;
	X[1][0] = 1;

	// temp inputs
	X[0][1] = 0;
	X[0][2] = 0;

	// sort out the hidden layers outputs
	for(int n = 1; n < NeuronsInHidden+1; n++) {	// loop through hiddens
		for(int i = 0; i < Inputs+1; i++) 		// loop through inputs
			X[1][n] += x(0, i) * w(0, i, n-1);

		X[1][n] = Sigmoid(X[1][n]);
		//cout << X[1][n] << endl;
	}

	// output neuron
	for(int i = 0; i < NeuronsInHidden+1; i++)
		X[2][0] += x(1, i) * w(1, i, n);

	X[2][0] = Sigmoid(X[2][0]);

	// --- Calculation done ---

	return X[2][0];	
}



And heres how I train it in main, the logical operation AND:

#include <iostream>
#include <conio.h>

#include "NeuralNet.h"

int main() {
	NeuralNet X(2, 2, 1);

	// Train
	for(int a = 0; a < 5000; ++a) {
		X.Train(0, 0, 0);
		X.Train(1, 0, 0);
		X.Train(0, 1, 0);
		X.Train(1, 1, 1);
	}
	
	// Output what weve learned
	cout << endl << X.Process(0, 0);
	cout << endl << X.Process(1, 0);
	cout << endl << X.Process(0, 1);
	cout << endl << X.Process(1, 1);

	getch();
	return 1;
}



It always outputs the same value no matter what the inputs are. I cant find what Im doing wrong and I got that bad feeling Ive tried absolutely everything. :P Any help cracking this one is greatly appreciated! [smile]
this is probably not your main problem ...but you should change this:

W.resize(2);	// setup weightsW[0].resize(((Inputs+1) * NeuronsInHidden), (float)((rand()%2000)-1000)/1000);	// hidden layerW[1].resize(((NeuronsInHidden+1) * Outputs), (float)((rand()%2000)-1000)/1000);	// output layer


to this:

W.resize(2);	// setup weightsW[0].resize(((Inputs+1) * NeuronsInHidden), (float)rand()/(float)RAND_MAX);	// hidden layerW[1].resize(((NeuronsInHidden+1) * Outputs), (float)rand()/(float)RAND_MAX);	// output layer


that will just give you better random values =)
If you want the random number intervall to be -1≤X≤1 instead of 0≤X≤1

W.resize(2);	// setup weightsW[0].resize(((Inputs+1) * NeuronsInHidden), 2.0f*((float)rand()/(float)RAND_MAX)-0.5f);	// hidden layerW[1].resize(((NeuronsInHidden+1) * Outputs), 2.0f*((float)rand()/(float)RAND_MAX)-0.5f);	// output layer
Advertisement
Thanks, Ill change that. [smile] Ive always thought that was a crude way to do it, that one looks much nicer.
Also when you set the weights like that, you are only generating one random value and setting the all the new values in the vector to that value. If you want each weight set randomly, resize first and then iterate over the vector setting the values.

This topic is closed to new replies.

Advertisement