deflrcostFunctionReg(theta, reg, X, y): m = y.size h = sigmoid(X.dot(theta)) J = -1*(1/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y)) + (reg/(2*m))*np.sum(np.square(theta[1:])) if np.isnan(J[0]): return(np.inf) return(J[0])
1 2 3 4 5 6 7
deflrgradientReg(theta, reg, X,y): m = y.size h = sigmoid(X.dot(theta.reshape(-1,1))) grad = (1/m)*X.T.dot(h-y) + (reg/m)*np.r_[[[0]],theta[1:].reshape(-1,1)] return(grad.flatten())
for c in np.arange(1, n_labels+1): res = minimize(lrcostFunctionReg, initial_theta, args=(reg, features, (classes == c)*1), method=None, jac=lrgradientReg, options={'maxiter':50}) all_theta[c-1] = res.x return(all_theta)
1
theta = oneVsAll(X, y, 10, 0.1)
One-vs-all Prediction
1 2 3 4 5 6
defpredictOneVsAll(all_theta, features): probs = sigmoid(X.dot(all_theta.T)) # Adding one because Python uses zero based indexing for the 10 columns (0-9), # while the 10 classes are numbered from 1 to 10. return(np.argmax(probs, axis=1)+1)
1 2
pred = predictOneVsAll(theta, X) print('Training set accuracy: {} %'.format(np.mean(pred == y.ravel())*100))
Training set accuracy: 93.24 %
Multiclass Logistic Regression with scikit-learn
1 2 3
clf = LogisticRegression(C=10, penalty='l2', solver='liblinear') # Scikit-learn fits intercept automatically, so we exclude first column with 'ones' from X when fitting. clf.fit(X[:,1:],y.ravel())