对应反向传播算法内容
Programming Exercise 4 - Neural Networks Learning 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 import pandas as pdimport numpy as npimport matplotlib as mplimport matplotlib.pyplot as pltfrom scipy.io import loadmatpd.set_option('display.notebook_repr_html' , False ) pd.set_option('display.max_columns' , None ) pd.set_option('display.max_rows' , 150 ) pd.set_option('display.max_seq_items' , None ) %matplotlib inline import seaborn as snssns.set_context('notebook' ) sns.set_style('darkgrid' )
Load MATLAB datafiles 1 2 data = loadmat('data/ex4data1.mat' ) data.keys()
dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])
1 2 3 4 5 6 y = data['y' ] X = np.c_[np.ones((data['X' ].shape[0 ],1 )), data['X' ]] print ('X:' ,X.shape, '(with intercept)' )print ('y:' ,y.shape)
X: (5000, 401) (with intercept)
y: (5000, 1)
1 2 weights = loadmat('data/ex3weights.mat' ) weights.keys()
dict_keys(['__header__', '__version__', '__globals__', 'Theta1', 'Theta2'])
1 2 3 4 5 theta1, theta2 = weights['Theta1' ], weights['Theta2' ] print ('theta1 :' , theta1.shape)print ('theta2 :' , theta2.shape)params = np.r_[theta1.ravel(), theta2.ravel()] print ('params :' , params.shape)
theta1 : (25, 401)
theta2 : (10, 26)
params : (10285,)
Neural Network Input layer size = 400 (20x20 pixels) Hidden layer size = 25 Number of labels = 10
Neural Networks - Feed Forward and Cost Function 1 2 def sigmoid (z ): return (1 / (1 + np.exp(-z)))
Sigmoid gradient where
1 2 def sigmoidGradient (z ): return (sigmoid(z)*(1 -sigmoid(z)))
Cost Function Regularized Cost Function 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def nnCostFunction (nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg ): theta1 = nn_params[0 :(hidden_layer_size*(input_layer_size+1 ))].reshape(hidden_layer_size,(input_layer_size+1 )) theta2 = nn_params[(hidden_layer_size*(input_layer_size+1 )):].reshape(num_labels,(hidden_layer_size+1 )) m = features.shape[0 ] y_matrix = pd.get_dummies(classes.ravel()).as_matrix() a1 = features z2 = theta1.dot(a1.T) a2 = np.c_[np.ones((features.shape[0 ],1 )),sigmoid(z2.T)] z3 = theta2.dot(a2.T) a3 = sigmoid(z3) J = -1 *(1 /m)*np.sum ((np.log(a3.T)*(y_matrix)+np.log(1 -a3).T*(1 -y_matrix))) + \ (reg/(2 *m))*(np.sum (np.square(theta1[:,1 :])) + np.sum (np.square(theta2[:,1 :]))) d3 = a3.T - y_matrix d2 = theta2[:,1 :].T.dot(d3.T)*sigmoidGradient(z2) delta1 = d2.dot(a1) delta2 = d3.T.dot(a2) theta1_ = np.c_[np.ones((theta1.shape[0 ],1 )),theta1[:,1 :]] theta2_ = np.c_[np.ones((theta2.shape[0 ],1 )),theta2[:,1 :]] theta1_grad = delta1/m + (theta1_*reg)/m theta2_grad = delta2/m + (theta2_*reg)/m return (J, theta1_grad, theta2_grad)
1 2 nnCostFunction(params, 400 , 25 , 10 , X, y, 0 )[0 ]
0.28762916516131892
1 2 nnCostFunction(params, 400 , 25 , 10 , X, y, 1 )[0 ]
0.38376985909092365
1 [sigmoidGradient(z) for z in [-1 , -0.5 , 0 , 0.5 , 1 ]]
[0.19661193324148185,
0.23500371220159449,
0.25,
0.23500371220159449,
0.19661193324148185]