Convolutional layer impl in numpy import numpy as np num_records = 700 x = np.linspace(0.0, 5.0, num=num_records) # Generally examples of funtion we want to learn are # - Regression: Time to reach home (google maps), age prediction # - Classification: PCtr ads, email, pushes # In our case this will be the "ideal" fuction we want to learn fx = (x**3 + 4) # The collected data has noise. So we introduce noise to our collected data y = fx + (np.random.normal(0, 10, num_records)) import matplotlib.pyplot as plt def plotData(): plt.plot(x,y,'x',color='y',label="training data") plt.plot(x,fx,'-',color='b', label="f(x)") plotData() plt.legend(loc=2) plt.show() x = np.array(x).reshape(len(x), 1) y = np.array(y).reshape(len(y), 1) # add a column with value 1 in position 0 as bias print("x before: ", x[:5]) agumented_x = np.insert(x, 0, 1, axis=1) print("x after inserting column: ", agumented_x[:5]) ################################################################################## ################################################################################## # Linear approximation lr = 1e-6 # learning rate num_features = 1 # initialize weights w = np.random.rand(num_features + 1, 1) # add weight for bias print("Initial values:\n w = %s \n" % (w)) for i in range(10000): # forward ypred = agumented_x.dot(w) # linear function loss = np.sum(0.5*(ypred - y)**2) # squared loss since this is a regression problem # backward # d_err/d_w # = d{np.sum(0.5*(ypred - y)**2)}_{d_w} # = d_ypred = (ypred - y) d_w = agumented_x.T.dot(d_ypred) # THIS IS THE MAIN derivative needed. THAT's IT # update weights w -= (lr*d_w) if i % 1000 == 0 or i < 10: print("iteration=%s \n d_w = %s \n w = %s \n loss = %s\n" % (i, d_w, w, loss)) print("last iteration=%s \n d_w = %s \n w = %s \n loss = %s\n" % (i, d_w, w, loss)) plotData() plt.plot(agumented_x[:,1],agumented_x.dot(w),'-',color='r', label="linear fit for f(x)") plt.legend(loc=2) plt.show() ################################################################################## ################################################################################## # Non-linear approximation # x -> w1 -> relu -> w2 -> y lr = 1e-6 num_features = 1 num_hidden_features = 10 w1_size = num_features + 1 w1 = np.random.random((w1_size, num_hidden_features)) w2_size = num_hidden_features + 1 w2 = np.random.random(w2_size).reshape(w2_size, 1) def relu(x): # Keep only values of x > 0; set x = 0 to all others x[x < 0] = 0 return x for i in range(10000): # forward u1 = agumented_x.dot(w1) # linear function u2 = relu(u1) # non-linear activation agumented_u2 = np.insert(u2, 0, 1, axis=1) # add bias column like before ypred = agumented_u2.dot(w2) # linear function loss = np.mean(0.5*(ypred - y)**2) # backward d_ypred = ypred - y d_w2 = agumented_u2.T.dot(d_ypred) # gradient of weights of second layer d_agumented_u2 = d_ypred.dot(w2.T) # gradient of inputs to second layer d_u2 = np.delete(d_agumented_u2, 0, axis=1) # remove the added bias column # gradient of inputs to non-linear ReLU layer d_u1 = np.zeros(d_u2.shape) d_u1[u1 != 0] = d_u2[u1 != 0] d_w1 = agumented_x.T.dot(d_u1) # gradient of first layer w1 -= (lr*d_w1) w2 -= (lr*d_w2) if i % 1000 == 0: print("iteration=%s loss=%s" % (i, loss)) print("iteration=%s loss=%s" % (i, loss)) u1 = agumented_x.dot(w1) u2 = relu(u1) agumented_u2 = np.insert(u2, 0, 1, axis=1) ypred = agumented_u2.dot(w2) plotData() plt.plot(agumented_x[:,1],agumented_x.dot(w),'-',color='r', label="linear fit for f(x)") plt.plot(agumented_x[:,1],ypred,'-',color='c', label="non-linear fit for f(x)") plt.legend(loc=2) plt.show() |