$E = \frac{1}{2}{\displaystyle\sum_{k}{(y_k - t_k)^2}}$
def mean_squared_error(y, t):
return 0.5 * np.sum((y-t)**2)
import numpy as np
# mean squared error function 사용해보기
# 실제 정답은 2
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
# ex. '2'일 확률이 가장 높다고 추정 (0.6)
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print('ex1 result : ', mean_squared_error(np.array(y), np.array(t)))
# ex2. '7'일 확률이 가장 높다고 추정 (0.6)
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print('ex2 result : ', mean_squared_error(np.array(y), np.array(t)))
ex1 result : 0.09750000000000003 ex2 result : 0.5975
$E = -{\displaystyle\sum_{k}{t_klogy_k}}$
def cross_entropy_error(y, t):
delta = 1e-7
return -np.sum(t * np.log(y + delta))
# cross entropy error function 사용해보기
# 실제 정답은 2
t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
# ex. '2'일 확률이 가장 높다고 추정 (0.6)
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
print('ex1 result : ', cross_entropy_error(np.array(y), np.array(t)))
# ex2. '7'일 확률이 가장 높다고 추정 (0.6)
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
print('ex2 result : ', cross_entropy_error(np.array(y), np.array(t)))
ex1 result : 0.510825457099338 ex2 result : 2.302584092994546
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
# 전체 MNIST dataset
print('x_train shape : ', x_train.shape)
print('t_train shape : ', t_train.shape)
# 무작위로 10개 빼내기
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size) # data size 범위에서 batch size만큼 무작위 추출
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
x_train shape : (60000, 784) t_train shape : (60000, 10)
# ont-hot-encoding ver
def cross_entropy_error1(y, t):
if y.dim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(t * np.log(y)) / batch_size
# number lable ver
def cross_entropy_error2(y, t):
if y.dim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size