## bad example
def bad_numerical_diff(f, x):
h = 10e-50 # 아주아주 작은 값
return f(x + h) - f(x) / h
## improved example
def numerical_diff(f, x):
print("improved numerical_diff")
h = 1e-4 # 0.0001
return (f(x + h) - f(x - h)) / (2 * h)
$y = 0.01x^2 + 0.1x$
def function_1(x):
return 0.01*x**2 + 0.1*x
import numpy as np
import matplotlib.pylab as plt
x = np.arange(0.0, 20.0, 0.1) # 0 ~ 20까지의 간격 0.1인 배열x
y = function_1(x)
plt.xlabel("x")
plt.ylabel("f(x)")
plt.plot(x, y)
plt.show()
# x가 5일 떄와 10일 때 이 함수의 미분
print('x = 5일 때 미분', numerical_diff(function_1, 5))
print('x = 10일 때 미분', numerical_diff(function_1, 10))
improved numerical_diff x = 5일 때 미분 0.1999999999990898 improved numerical_diff x = 10일 때 미분 0.2999999999986347
$f(x_0, x_1) = x^{2}_{0} + x^{2}_{1}$
def function_2(x):
return x[0]**2 + x[1]**2
# x_0에 대한 편미분
def function_2_tmp1(x):
return x*x + 4.0**2.0
# x_1에 대한 편미분
def function_2_tmp2(x):
return 3.0**2.0 + x*x
print(numerical_diff(function_2_tmp1, 3.0))
print(numerical_diff(function_2_tmp2, 4.0))
improved numerical_diff 6.00000000000378 improved numerical_diff 7.999999999999119
# get gradient function
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
# f(x + h) 계산
x[idx] = tmp_val + h
fxh1 = f(x)
# f(x - h) 계산
x[idx] = tmp_val - h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 값 복원
return grad
# Gredient Descent
def gradient_descent(f, init_x, lr=0.01, step_num=100): # f : 최적화 하려는 func / init_x : 초깃값 / lr(learning rate) : 학습률 / step_num : 반복 횟수
x = init_x
for i in range(step_num):
grad = numerical_gradient(f, x)
x -= lr * grad
return x
init_x = np.array([-3.0, 4.0])
print("result : ", gradient_descent(function_2, init_x = init_x, lr = 0.1, step_num = 100))
result : [-6.11110793e-10 8.14814391e-10]
# 학습률이 너무 큰 예
init_x = np.array([-3.0, 4.0])
print(gradient_descent(function_2, init_x = init_x, lr = 10.0, step_num = 100))
# 학습률이 너무 작은 예
init_x = np.array([-3.0, 4.0])
print(gradient_descent(function_2, init_x = init_x, lr = 1e-10, step_num = 100))
[-2.58983747e+13 -1.29524862e+12] [-2.99999994 3.99999992]
# 실제 기울기 구하기
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient
class simpleNet:
def __init__(self):
self.W = np.random.randn(2, 3) # 정규분포로 초기화
# 예측 수행 함수
def predict(self, x):
return np.dot(x, self.W)
# 손실함수의 값 구하는 함수
def loss(self, x, t):
z = self.predict(x)
y = softmax(z)
loss = cross_entropy_error(y, t)
return loss
# main
net = simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
np.argmax(p) # 최댓값 index
t = np.array([0, 0, 1]) # 정답 레이블
net.loss(x, t)
[[-2.05285131 0.8881997 0.82120879] [ 0.69678673 0.45633899 -0.7004122 ]] [-0.60460273 0.94362491 -0.1376457 ]
1.5206784806778708
# main - 기울기 구하기
'''
def f(W):
return net.loss(x, t)
'''
# lambda version
f = lambda w: net.loss(x, t)
dW = numerical_gradient(f, net.W)
print(dW)
[[ 0.08221985 0.38664402 -0.46886387] [ 0.12332978 0.57996603 -0.70329581]]