说实话,具体的训练公式,我没有自己推导,姑且认为他写的代码是对的。总体上看,用bp的方法。特殊之处,在于输入层和输出层是完完全全的“同一层”。
void dA::get_corrupted_input (int *x, // the original input 0-1 vector -- inputint *tilde_x, // the resulted 0-1 vector gotten noised -- outputdouble p // the p probability of noise, binomial test -- input)
{for(int i=0; i<n_visible; i++) {if(x[i] == 0) {// if the state is 0, do noghing tilde_x[i] = 0;} else {// if the state is 1, add the noise of p probability on ittilde_x[i] = binomial(1, p);}}
}// Encode
void dA::get_hidden_values (int *x, // the input from visible nodesdouble *y // the output of hidden nodes)
{for(int i=0; i<n_hidden; i++) {// calculated sum_j(vj * wij) + biy[i] = 0;for(int j=0; j<n_visible; j++) {y[i] += W[i][j] * x[j];}y[i] += hbias[i];// sigmod (y)y[i] = sigmoid(y[i]);}
}// Decode
void dA::get_reconstructed_input (double *y, // the input from hidden nodesdouble *z // the output reconstructed of visible nodes)
{for(int i=0; i<n_visible; i++) {// calculated sum_j(hj * wij) + ciz[i] = 0;for(int j=0; j<n_hidden; j++) {z[i] += W[j][i] * y[j];}z[i] += vbias[i];// sigmod (z)z[i] = sigmoid(z[i]);}
}void dA::train (int *x, // the input sample from visiable nodedouble lr, // the learning ratedouble corruption_level // corruption_level is the probability of noise)
{// the auto-encoder networks:// input(visible) layer --> hidden layer --> output(visible) layer// the input layer is the same as the output layer, the two layers are totally same.// we train it by the standard bp algorithm, from output layer to the hidden layer, and to the input layer// Here is the whole process: int *tilde_x = new int[n_visible]; // the noise inputdouble *y = new double[n_hidden]; // the output of hidden layerdouble *z = new double[n_visible]; // the output of output layer, reconstructiondouble *L_vbias = new double[n_visible]; // temp value for visible biasdouble *L_hbias = new double[n_hidden]; // temp value for hidden biasdouble p = 1 - corruption_level;// make the input sample noise by the p probabilityget_corrupted_input(x, tilde_x, p);// calculate the output of hidden nodes by the noise input, encodeget_hidden_values(tilde_x, y);// reconstruct the input sample from visible nodes, decodeget_reconstructed_input(y, z);// update the bias of visible nodesfor(int i=0; i<n_visible; i++) {// the difference between input sample and the PROBABILITY of reconstructed probability of visible node// it's different from RBM that in RBM we calcualte the difference between input sample and // the 0-1 state of the reconstructed visiable node// here use the standard bp algorithm, from visible layer to hidden layerL_vbias[i] = x[i] - z[i];// update the value by the learning ratevbias[i] += lr * L_vbias[i] / N;}// update the bias of hidden nodesfor(int i=0; i<n_hidden; i++) {// propgate the bias from visible nodes// here use the standard bp algorithm, from visible layer to hidden layerL_hbias[i] = 0;for(int j=0; j<n_visible; j++) {L_hbias[i] += W[i][j] * L_vbias[j];}L_hbias[i] *= y[i] * (1 - y[i]);hbias[i] += lr * L_hbias[i] / N;}// update the weight of networksfor(int i=0; i<n_hidden; i++) {for(int j=0; j<n_visible; j++) {W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;}}delete[] L_hbias;delete[] L_vbias;delete[] z;delete[] y;delete[] tilde_x;
}void dA::reconstruct (int *x, // the input sample -- inputdouble *z // the reconstructed value -- output)
{double *y = new double[n_hidden];// calculate the output of hidden layerget_hidden_values(x, y);// reconstruct from hidden layer to visible layerget_reconstructed_input(y, z);delete[] y;
}