728x90
RNN ํน์ฑ์ ์ธํ ๊ฐ์ ์๊ณ์ด๋ก ๋ค๋ฃฌ๋ค. ๋ฐ๋ผ์ ์ด๋ฏธ์ง๋ฅผ ์ฒ๋ฆฌ ํ ๋ ํ๋ จ๋ ๋ชจ๋ธ์ ์ฒ์ ๋ช ํ์ ์ ๋ ฅํ๋ฉด ๋ค์ ํ์ ์์ธกํ๋ค. ์ด ์์ธก๋ ํ์ ํฌํจํด ๋ค์ ๋ช ๊ฐ์ ํ์ ์ ๋ ฅ์ผ๋ก ๋ค์ ํ์ ์์ธกํ๋ ํ๋ จ์ ๋ฐ๋ณตํ๋ฉด , ํญ ํ์ฉ ์ด๋ฏธ์ง๋ฅผ ์์ฑ ๊ฐ๋ฅํ๋ค.
# RNN
#RNN์ ์ด์ฉํ ์ด๋ฏธ์ง ์์ฑ
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
img_size = 8 # ์ด๋ฏธ์ง์ ํญ๊ณผ ๋์ด
n_time = 4 # ์๊ณ์ด ๋ฐ์ดํฐ์ ์
n_in = img_size # ์
๋ ฅ์ธต์ ๋ด๋ฐ ์
n_mid = 128 # ์๋์ธต์ ๋ด๋ฐ ์
n_out = img_size # ์ถ๋ ฅ์ธต์ ๋ด๋ฐ ์
n_disp = 10 # ํ์ํ ์ด๋ฏธ์ง ๊ฐ์
eta = 0.01 # ํ์ต๋ฅ
epochs = 201
batch_size = 32
interval = 10 # ํ์ต ๊ฒฐ๊ณผ ํ์ ๊ฐ๊ฒฉ
def sigmoid(x):
return 1/(1+np.exp(-x))
# ๋ฐ์ดํฐ ๋ก๋
# -- ์ด๋ฏธ์ง ๋ฐ์ดํฐ ์ค๋น --
digits = datasets.load_digits()
digits = np.asarray(digits.data)
digits_imgs = digits.reshape(-1,img_size,img_size) # 8x8
digits_imgs /= 15 # ๋ฒ์ 0~1
digits_imgs.shape # (1797, 8, 8)
digits_imgs[1]
# array([[0. , 0. , 0. , 0.8 , 0.86666667,
# 0.33333333, 0. , 0. ],
# [0. , 0. , 0. , 0.73333333, 1.06666667,
# 0.6 , 0. , 0. ],
# [0. , 0. , 0.2 , 1. , 1.06666667,
# 0.4 , 0. , 0. ],
# [0. , 0.46666667, 1. , 1.06666667, 1.06666667,
# 0.13333333, 0. , 0. ],
# [0. , 0. , 0.06666667, 1.06666667, 1.06666667,
# 0.2 , 0. , 0. ],
# [0. , 0. , 0.06666667, 1.06666667, 1.06666667,
# 0.4 , 0. , 0. ],
# [0. , 0. , 0.06666667, 1.06666667, 1.06666667,
# 0.4 , 0. , 0. ],
# [0. , 0. , 0. , 0.73333333, 1.06666667,
# 0.66666667, 0. , 0. ]])
disp_imgs = digits_imgs[:n_disp] # ๊ฒฐ๊ณผ
train_imgs = digits_imgs[n_disp:] # train
n_sample_in_img = img_size - n_time # ์ด๋ฏธ์ง ํ๋ ์์ ์ํ ์ 8 - 4
n_sample = len(train_imgs) * n_sample_in_img
input_data = np.zeros((n_sample,n_time,n_in))
correct_data = np.zeros((n_sample,n_out))
for i in range(len(train_imgs)):
for j in range(n_sample_in_img):
sample_id = i*n_sample_in_img+j
input_data[sample_id] = train_imgs[i,j:j+n_time]
correct_data[sample_id] = train_imgs[i,j+n_time]
x_train, x_test, t_train, t_test = train_test_split(input_data,correct_data)
# GRU model
class GRULayer:
def __init__(self, n_upper, n):
# ํ๋ผ๋ฏธํฐ์ ์ด๊น๊ฐ
self.w = np.random.randn(3, n_upper, n) / np.sqrt(n_upper)
self.v = np.random.randn(3, n, n) / np.sqrt(n)
def forward(self, x, y_prev):
a0 = sigmoid(np.dot(x, self.w[0]) + np.dot(y_prev, self.v[0])) # ์
๋ฐ์ดํธ ๊ฒ์ดํธ
a1 = sigmoid(np.dot(x, self.w[1]) + np.dot(y_prev, self.v[1])) # ๋ฆฌ์
๊ฒ์ดํธ
a2 = np.tanh(np.dot(x, self.w[2]) + np.dot(a1 * y_prev, self.v[2])) # ์๋ก์ด ๊ธฐ์ต
self.gates = np.stack((a0, a1, a2))
self.y = (1 - a0) * y_prev + a0 * a2 # ์ถ๋ ฅ
def backward(self, x, y, y_prev, gates, grad_y):
a0, a1, a2 = gates
# ์๋ก์ด ๊ธฐ์ต
delta_a2 = grad_y * a0 * (1 - a2 ** 2)
self.grad_w[2] += np.dot(x.T, delta_a2)
self.grad_v[2] += np.dot((a1 * y_prev).T, delta_a2)
# ์
๋ฐ์ดํธ ๊ฒ์ดํธ
delta_a0 = grad_y * (a2 - y_prev) * a0 * (1 - a0)
self.grad_w[0] += np.dot(x.T, delta_a0)
self.grad_v[0] += np.dot(y_prev.T, delta_a0)
# ๋ฆฌ๊ฒ ๊ฒ์ดํธ
s = np.dot(delta_a2, self.v[2].T)
delta_a1 = s * y_prev * a1 * (1 - a1)
self.grad_w[1] += np.dot(x.T, delta_a1)
self.grad_v[1] += np.dot(y_prev.T, delta_a1)
# x์ ๊ธฐ์ธ๊ธฐ
self.grad_x = np.dot(delta_a0, self.w[0].T)
+ np.dot(delta_a1, self.w[1].T)
+ np.dot(delta_a2, self.w[2].T)
# y_prev ๊ธฐ์ธ๊ธฐ
self.grad_y_prev = np.dot(delta_a0, self.v[0].T)
+ np.dot(delta_a1, self.v[1].T)
+ a1 * s + grad_y * (1 - a0)
def reset_sum_grad(self):
self.grad_w = np.zeros_like(self.w)
self.grad_v = np.zeros_like(self.v)
def update(self, eta):
self.w -= eta * self.grad_w
self.v -= eta * self.grad_v
# OutputLayer
class OutputLayer:
def __init__(self, n_upper, n):
self.w = np.random.randn(n_upper, n) / np.sqrt(n_upper) # ์๋น์๋ฅด ์ด๊ธฐํ(Xavier Initialization) ๊ธฐ๋ฐ์ ์ด๊น๊ฐ
self.b = np.zeros(n)
def forward(self, x):
self.x = x
u = np.dot(x, self.w) + self.b
self.y = u # ํญ๋ฑํจ์
def backward(self, t):
delta = self.y - t
self.grad_w = np.dot(self.x.T, delta)
self.grad_b = np.sum(delta, axis=0)
self.grad_x = np.dot(delta, self.w.T)
def update(self, eta):
self.w -= eta * self.grad_w
self.b -= eta * self.grad_b
def train(x_mb, t_mb):
# ์์ ํ GRU์ธต
y_rnn = np.zeros((len(x_mb), n_time+1, n_mid))
gates_rnn = np.zeros((3, len(x_mb), n_time, n_mid))
y_prev = y_rnn[:, 0, :]
for i in range(n_time):
x = x_mb[:, i, :]
gru_layer.forward(x, y_prev)
y = gru_layer.y
y_rnn[:, i+1, :] = y
y_prev = y
gates = gru_layer.gates
gates_rnn[:, :, i, :] = gates
# ์์ ํ ์ถ๋ ฅ์ธต
output_layer.forward(y)
# ้ไผๆญ ๅบๅๅฑค
output_layer.backward(t_mb)
grad_y = output_layer.grad_x
# ์ญ์ ํ ์ถ๋ ฅ์ธต
gru_layer.reset_sum_grad()
for i in reversed(range(n_time)):
x = x_mb[:, i, :]
y = y_rnn[:, i+1, :]
y_prev = y_rnn[:, i, :]
gates = gates_rnn[:, :, i, :]
gru_layer.backward(x, y, y_prev, gates, grad_y)
grad_y = gru_layer.grad_y_prev
# ํ๋ผ๋ฏธํฐ ๊ฐฑ์
gru_layer.update(eta)
output_layer.update(eta)
# -- ์์ธก--
def predict(x_mb):
# ์์ ํ GRU์ธต
y_prev = np.zeros((len(x_mb), n_mid))
for i in range(n_time):
x = x_mb[:, i, :]
gru_layer.forward(x, y_prev)
y = gru_layer.y
y_prev = y
# ์์ ํ ์ถ๋ ฅ์ธต
output_layer.forward(y)
return output_layer.y
def get_error(x,t):
y = predict(x)
return np.sum(np.square(y-t)) / len(x)
def generate_images():
#plt.figure(figsize=(10,1))
for i in range(n_disp):
plt.figure(figsize=(10, 1))
ax = plt.subplot(1,n_disp,i+1)
plt.imshow(disp_imgs[i].tolist(),cmap="Greys_r")
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
gen_imgs = disp_imgs.copy()
#plt.figure(figsize=(10,1))
for i in range(n_disp):
plt.figure(figsize=(10, 1))
for j in range(n_sample_in_img):
x = gen_imgs[i,j:j+n_time].reshape(1,n_time,img_size)
gen_imgs[i,j+n_time]=predict(x)[0]
ax = plt.subplot(1, n_disp, i + 1)
plt.imshow(gen_imgs[i].tolist(), cmap="Greys_r")
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
n_batch = len(input_data) // batch_size
for i in range(epochs):
index_random = np.arange(len(x_train))
np.random.shuffle(index_random)
for j in range(batch_size):
mb_index = index_random[j*batch_size:(j+1)*batch_size]
x_mb=x_train[mb_index,:]
t_mb=t_train[mb_index,:]
train(x_mb,t_mb)
if i%interval==0:
error_train = get_error(x_train,t_train)
error_test = get_error(x_test,t_test)
print("Epoch:"+str(i)+"/"+str(epochs-1),
"Error_train: "+str(error_train),
"Error_test: "+str(error_test))
generate_images()
๋ฐ์ํ
'๐พ Deep Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
VAE(Variational Autoencoder) (1) (0) | 2021.02.18 |
---|---|
nvidia-smi ์ต์ (0) | 2021.02.16 |
[DL] GRU (gated recurrent unit) (0) | 2021.02.10 |
activation ์ข ๋ฅ (0) | 2021.02.10 |
XG ๋ถ์คํธ(eXtream Gradient Boosting) (0) | 2021.02.09 |