728x90

 RNN ํŠน์„ฑ์ƒ ์ธํ’‹ ๊ฐ’์„ ์‹œ๊ณ„์—ด๋กœ ๋‹ค๋ฃฌ๋‹ค. ๋”ฐ๋ผ์„œ ์ด๋ฏธ์ง€๋ฅผ ์ฒ˜๋ฆฌ ํ•  ๋•Œ ํ›ˆ๋ จ๋œ ๋ชจ๋ธ์— ์ฒ˜์Œ ๋ช‡ ํ–‰์„ ์ž…๋ ฅํ•˜๋ฉด ๋‹ค์Œ ํ–‰์„ ์˜ˆ์ธกํ•œ๋‹ค. ์ด ์˜ˆ์ธก๋œ ํ–‰์„ ํฌํ•จํ•ด ๋‹ค์‹œ ๋ช‡ ๊ฐœ์˜ ํ–‰์„ ์ž…๋ ฅ์œผ๋กœ ๋‹ค์Œ ํ–‰์„ ์˜ˆ์ธกํ•˜๋Š” ํ›ˆ๋ จ์„ ๋ฐ˜๋ณตํ•˜๋ฉด , ํ•ญ ํ–‰์”ฉ ์ด๋ฏธ์ง€๋ฅผ ์ƒ์„ฑ ๊ฐ€๋Šฅํ•˜๋‹ค. 

 

  # RNN

#RNN์„ ์ด์šฉํ•œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split

img_size = 8 # ์ด๋ฏธ์ง€์˜ ํญ๊ณผ ๋†’์ด
n_time = 4 # ์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ์˜ ์ˆ˜
n_in = img_size # ์ž…๋ ฅ์ธต์˜ ๋‰ด๋Ÿฐ ์ˆ˜
n_mid = 128 # ์€๋‹‰์ธต์˜ ๋‰ด๋Ÿฐ ์ˆ˜
n_out = img_size # ์ถœ๋ ฅ์ธต์˜ ๋‰ด๋Ÿฐ ์ˆ˜
n_disp = 10 # ํ‘œ์‹œํ•  ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜

eta = 0.01 # ํ•™์Šต๋ฅ 
epochs = 201
batch_size = 32
interval = 10 # ํ•™์Šต ๊ฒฐ๊ณผ ํ‘œ์‹œ ๊ฐ„๊ฒฉ

def sigmoid(x):
    return 1/(1+np.exp(-x))

# ๋ฐ์ดํ„ฐ ๋กœ๋“œ

# -- ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ ์ค€๋น„ --
digits = datasets.load_digits()
digits = np.asarray(digits.data)
digits_imgs = digits.reshape(-1,img_size,img_size) # 8x8
digits_imgs /= 15 # ๋ฒ”์œ„ 0~1
digits_imgs.shape # (1797, 8, 8)
digits_imgs[1]
# array([[0.        , 0.        , 0.        , 0.8       , 0.86666667,
#         0.33333333, 0.        , 0.        ],
#        [0.        , 0.        , 0.        , 0.73333333, 1.06666667,
#         0.6       , 0.        , 0.        ],
#        [0.        , 0.        , 0.2       , 1.        , 1.06666667,
#         0.4       , 0.        , 0.        ],
#        [0.        , 0.46666667, 1.        , 1.06666667, 1.06666667,
#         0.13333333, 0.        , 0.        ],
#        [0.        , 0.        , 0.06666667, 1.06666667, 1.06666667,
#         0.2       , 0.        , 0.        ],
#        [0.        , 0.        , 0.06666667, 1.06666667, 1.06666667,
#         0.4       , 0.        , 0.        ],
#        [0.        , 0.        , 0.06666667, 1.06666667, 1.06666667,
#         0.4       , 0.        , 0.        ],
#        [0.        , 0.        , 0.        , 0.73333333, 1.06666667,
#         0.66666667, 0.        , 0.        ]])

disp_imgs = digits_imgs[:n_disp] # ๊ฒฐ๊ณผ
train_imgs = digits_imgs[n_disp:] # train
n_sample_in_img = img_size - n_time # ์ด๋ฏธ์ง€ ํ•˜๋‚˜ ์•ˆ์˜ ์ƒ˜ํ”Œ ์ˆ˜ 8 - 4
n_sample = len(train_imgs) * n_sample_in_img

input_data = np.zeros((n_sample,n_time,n_in))
correct_data = np.zeros((n_sample,n_out))
for i in range(len(train_imgs)):
    for j in range(n_sample_in_img):
        sample_id = i*n_sample_in_img+j
        input_data[sample_id] = train_imgs[i,j:j+n_time]
        correct_data[sample_id] = train_imgs[i,j+n_time]

x_train, x_test, t_train, t_test = train_test_split(input_data,correct_data)

# GRU model 

class GRULayer:
    def __init__(self, n_upper, n):
        # ํŒŒ๋ผ๋ฏธํ„ฐ์˜ ์ดˆ๊นƒ๊ฐ’
        self.w = np.random.randn(3, n_upper, n) / np.sqrt(n_upper)
        self.v = np.random.randn(3, n, n) / np.sqrt(n)

    def forward(self, x, y_prev):
        a0 = sigmoid(np.dot(x, self.w[0]) + np.dot(y_prev, self.v[0]))  # ์—…๋ฐ์ดํŠธ ๊ฒŒ์ดํŠธ
        a1 = sigmoid(np.dot(x, self.w[1]) + np.dot(y_prev, self.v[1]))  # ๋ฆฌ์…‹ ๊ฒŒ์ดํŠธ
        a2 = np.tanh(np.dot(x, self.w[2]) + np.dot(a1 * y_prev, self.v[2]))  # ์ƒˆ๋กœ์šด ๊ธฐ์–ต
        self.gates = np.stack((a0, a1, a2))

        self.y = (1 - a0) * y_prev + a0 * a2  # ์ถœ๋ ฅ

    def backward(self, x, y, y_prev, gates, grad_y):
        a0, a1, a2 = gates

        # ์ƒˆ๋กœ์šด ๊ธฐ์–ต
        delta_a2 = grad_y * a0 * (1 - a2 ** 2)
        self.grad_w[2] += np.dot(x.T, delta_a2)
        self.grad_v[2] += np.dot((a1 * y_prev).T, delta_a2)

        # ์—…๋ฐ์ดํŠธ ๊ฒŒ์ดํŠธ
        delta_a0 = grad_y * (a2 - y_prev) * a0 * (1 - a0)
        self.grad_w[0] += np.dot(x.T, delta_a0)
        self.grad_v[0] += np.dot(y_prev.T, delta_a0)

        # ๋ฆฌ๊ฒŸ ๊ฒŒ์ดํŠธ
        s = np.dot(delta_a2, self.v[2].T)
        delta_a1 = s * y_prev * a1 * (1 - a1)
        self.grad_w[1] += np.dot(x.T, delta_a1)
        self.grad_v[1] += np.dot(y_prev.T, delta_a1)

        # x์˜ ๊ธฐ์šธ๊ธฐ
        self.grad_x = np.dot(delta_a0, self.w[0].T)
        + np.dot(delta_a1, self.w[1].T)
        + np.dot(delta_a2, self.w[2].T)

        # y_prev ๊ธฐ์šธ๊ธฐ
        self.grad_y_prev = np.dot(delta_a0, self.v[0].T)
        + np.dot(delta_a1, self.v[1].T)
        + a1 * s + grad_y * (1 - a0)

    def reset_sum_grad(self):
        self.grad_w = np.zeros_like(self.w)
        self.grad_v = np.zeros_like(self.v)

    def update(self, eta):
        self.w -= eta * self.grad_w
        self.v -= eta * self.grad_v

# OutputLayer

class OutputLayer:
    def __init__(self, n_upper, n):
        self.w = np.random.randn(n_upper, n) / np.sqrt(n_upper)  # ์ž๋น„์—๋ฅด ์ดˆ๊ธฐํ™”(Xavier Initialization) ๊ธฐ๋ฐ˜์˜ ์ดˆ๊นƒ๊ฐ’
        self.b = np.zeros(n)

    def forward(self, x):
        self.x = x
        u = np.dot(x, self.w) + self.b
        self.y = u  # ํ•ญ๋“ฑํ•จ์ˆ˜

    def backward(self, t):
        delta = self.y - t

        self.grad_w = np.dot(self.x.T, delta)
        self.grad_b = np.sum(delta, axis=0)
        self.grad_x = np.dot(delta, self.w.T)

    def update(self, eta):
        self.w -= eta * self.grad_w
        self.b -= eta * self.grad_b
def train(x_mb, t_mb):
    # ์ˆœ์ „ํŒŒ GRU์ธต
    y_rnn = np.zeros((len(x_mb), n_time+1, n_mid))
    gates_rnn = np.zeros((3, len(x_mb), n_time, n_mid))
    y_prev = y_rnn[:, 0, :]
    for i in range(n_time):
        x = x_mb[:, i, :]
        gru_layer.forward(x, y_prev)

        y = gru_layer.y
        y_rnn[:, i+1, :] = y
        y_prev = y

        gates = gru_layer.gates
        gates_rnn[:, :, i, :] = gates

    # ์ˆœ์ „ํŒŒ ์ถœ๋ ฅ์ธต
    output_layer.forward(y)

    # ้€†ไผๆ’ญ ๅ‡บๅŠ›ๅฑค
    output_layer.backward(t_mb)
    grad_y = output_layer.grad_x

    # ์—ญ์ „ํŒŒ ์ถœ๋ ฅ์ธต
    gru_layer.reset_sum_grad()
    for i in reversed(range(n_time)):
        x = x_mb[:, i, :]
        y = y_rnn[:, i+1, :]
        y_prev = y_rnn[:, i, :]
        gates = gates_rnn[:, :, i, :]

        gru_layer.backward(x, y, y_prev, gates, grad_y)
        grad_y = gru_layer.grad_y_prev

   # ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฐฑ์‹ 
    gru_layer.update(eta)
    output_layer.update(eta)
# -- ์˜ˆ์ธก--
def predict(x_mb):
    # ์ˆœ์ „ํŒŒ GRU์ธต
    y_prev = np.zeros((len(x_mb), n_mid))
    for i in range(n_time):
        x = x_mb[:, i, :]
        gru_layer.forward(x, y_prev)
        y = gru_layer.y
        y_prev = y

    # ์ˆœ์ „ํŒŒ ์ถœ๋ ฅ์ธต
    output_layer.forward(y)
    return output_layer.y
def get_error(x,t):
    y = predict(x)
    return np.sum(np.square(y-t)) / len(x)
def generate_images():
    #plt.figure(figsize=(10,1))
    for i in range(n_disp):
        plt.figure(figsize=(10, 1))
        ax = plt.subplot(1,n_disp,i+1)
        plt.imshow(disp_imgs[i].tolist(),cmap="Greys_r")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

    gen_imgs = disp_imgs.copy()
    #plt.figure(figsize=(10,1))
    for i in range(n_disp):
        plt.figure(figsize=(10, 1))
        for j in range(n_sample_in_img):
            x = gen_imgs[i,j:j+n_time].reshape(1,n_time,img_size)
            gen_imgs[i,j+n_time]=predict(x)[0]
        ax = plt.subplot(1, n_disp, i + 1)
        plt.imshow(gen_imgs[i].tolist(), cmap="Greys_r")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

n_batch = len(input_data) // batch_size

for i in range(epochs):
    index_random = np.arange(len(x_train))
    np.random.shuffle(index_random)
    for j in range(batch_size):
        mb_index = index_random[j*batch_size:(j+1)*batch_size]
        x_mb=x_train[mb_index,:]
        t_mb=t_train[mb_index,:]
        train(x_mb,t_mb)

    if i%interval==0:
        error_train = get_error(x_train,t_train)
        error_test = get_error(x_test,t_test)
        print("Epoch:"+str(i)+"/"+str(epochs-1),
              "Error_train: "+str(error_train),
              "Error_test: "+str(error_test))

        generate_images()

 

๋ฐ˜์‘ํ˜•

'๐Ÿ‘พ Deep Learning' ์นดํ…Œ๊ณ ๋ฆฌ์˜ ๋‹ค๋ฅธ ๊ธ€

VAE(Variational Autoencoder) (1)  (0) 2021.02.18
nvidia-smi ์˜ต์…˜  (0) 2021.02.16
[DL] GRU (gated recurrent unit)  (0) 2021.02.10
activation ์ข…๋ฅ˜  (0) 2021.02.10
XG ๋ถ€์ŠคํŠธ(eXtream Gradient Boosting)  (0) 2021.02.09
๋‹คํ–ˆ๋‹ค