BuildOurOwnRepublic blog rpblic

Search inside Blog:

    DeepLearningZeroToAll Lab Practice

    Tags:   DL    ZeroToAll    Practice   
    DeepLearningZeroToAll Lab Practice

    Import Libraries

    In [1]:
    import tensorflow as tf
    tf.set_random_seed(1)
    tf.__version__
    
    Out[1]:
    '1.0.0'
    In [2]:
    import numpy as np
    import matplotlib.pyplot as plt
    import random
    

    Lab01. Operations and Placeholders

    1. Constant - Hello World!

    In [3]:
    words = tf.constant("Hello TF!")
    sess = tf.Session()
    
    print(sess.run(words))
    
    b'Hello TF!'
    

    2. Operation - Add

    In [4]:
    node1 = tf.constant(3.0, tf.float32)
    node2 = tf.constant(4.0)
    
    node_add = tf.add(node1, node2)
    
    sess = tf.Session()
    
    print(sess.run([node1, node2]))
    print(sess.run(node_add))
    
    [3.0, 4.0]
    7.0
    
    Addon: Tensor Manipulation
    • np.array.ndim : dimension of array
    • np.array.shape / tf.shape(matrix) : shape of array or matrix
    • tf.reduce_mean(matrix) : mean of all elements of matrix. If take axis=0 (or 1, -1) for argument, then mean of elements of given axis.
    • tf.argmax(matrix, axis) : position of maximum value of given axis
    • tf.reshape(matrix, shape) : change the shape of matrix by given shape
    • tf.squeeze(matrix) : reduce the last dimension of matrix
    • tf.expand_dims(matrix, dim) : expand the dimension by given value
    • tf.one_hot(matrix, depth) : reshape matrix by one_hot encoding (of given depth) form. It expands the dimension of matrix.
    • tf.cast(matrix, dtype) : change the data type of given matrix.
    • tf.stack(matrix, axis=0) : stack multiple matrices by given axis
    • tf.ones_like(matrix) : create matrix with same shape of given matrix, with value 1
    • tf.zeros_like(matrix) : create matrix with same shape of given matrix, with value 0
    • zip(*args)

    3. Placeholder

    In [5]:
    ph1 = tf.placeholder(tf.float32)
    ph2 = tf.placeholder(tf.float32)
    
    node_add = ph1 + ph2
    
    print(sess.run(node_add, feed_dict = {ph1 : 3, ph2 : 4.5}))
    print(sess.run(node_add, feed_dict = {ph1 : [[1,4,5], [2,4,6]], ph2 : [[4,4,4], [1,3,5]]}))
    
    7.5
    [[  5.   8.   9.]
     [  3.   7.  11.]]
    

    Lab02-04. Linear Regression

    1. Setting W, b, cost, optimizer, train

    In [6]:
    W = tf.Variable(tf.random_normal([1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    x_train, y_train = ([1,2,3,4,5], [3,5,7,9,11])
    
    hypothesis = x_train * W + b
    
    In [7]:
    cost = tf.reduce_mean(tf.square(hypothesis - y_train))
    
    In [8]:
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.03)
    train = optimizer.minimize(cost)
    

    2. Session run

    In [9]:
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    In [10]:
    for step in range(10001):
        sess.run(train)
        if step % 400 == 0 and step <= 2000:
            print(step, sess.run(cost), sess.run(W), sess.run(b))
    
    0 5.49416 [ 1.14564347] [ 1.55450952]
    400 2.82114e-05 [ 1.99655163] [ 1.0124495]
    800 8.12876e-09 [ 1.99994147] [ 1.00021136]
    1200 8.18545e-12 [ 1.99999809] [ 1.00000679]
    1600 8.18545e-12 [ 1.99999809] [ 1.00000679]
    2000 8.18545e-12 [ 1.99999809] [ 1.00000679]
    

    3. Linear regression with placeholder

    In [11]:
    X = tf.placeholder(tf.float32)
    Y = tf.placeholder(tf.float32)
    W = tf.Variable(tf.random_normal([1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    hypothesis = X * W + b
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.03)
    train = optimizer.minimize(cost)
    
    In [12]:
    sess_using_ph = tf.Session()
    sess_using_ph.run(tf.global_variables_initializer())
    
    In [13]:
    for step in range(10001):
        cost_val , W_val, b_val, _ = sess_using_ph.run(
           [cost, W, b, train], feed_dict = {X: x_train, Y: y_train}
        )
        if step % 400 == 0 and step <= 2000:
            print(step, cost_val, W_val, b_val)
    
    0 16.6347 [ 1.87056863] [ 0.18222603]
    400 2.81237e-05 [ 2.00340819] [ 0.9876954]
    800 8.04252e-09 [ 2.0000577] [ 0.99979162]
    1200 5.50244e-12 [ 2.00000167] [ 0.99999458]
    1600 5.50244e-12 [ 2.00000167] [ 0.99999458]
    2000 5.50244e-12 [ 2.00000167] [ 0.99999458]
    

    3.1 Get Prediction value by feeding placeholder

    In [14]:
    print(sess_using_ph.run(hypothesis, feed_dict = {X: [100, 200, 300]}))
    
    [ 201.00016785  401.00033569  601.00048828]
    

    4. Visualizing cost

    In [15]:
    X = tf.placeholder(tf.float32)
    Y = tf.placeholder(tf.float32)
    W = tf.placeholder(tf.float32)
    hypothesis = X * W
    
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    
    In [16]:
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    In [17]:
    W_val = list()
    cost_val = list()
    
    for i in range(-10, 110):
        feed_W = i * 0.05
        curr_cost, curr_W = sess.run([cost, W], feed_dict={X: [1,2,3], Y: [3,5,7], W: feed_W})
        W_val.append(curr_W)
        cost_val.append(curr_cost)
    
    In [18]:
    plt.plot(W_val, cost_val)
    plt.show()
    

    5. Gradient Descent by hand

    In [19]:
    X = tf.placeholder(tf.float32)
    Y = tf.placeholder(tf.float32)
    W = tf.Variable(tf.random_normal([1]), name = 'weight')
    hypothesis = X * W
    
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    
    In [20]:
    learning_rate = 0.03
    gradient = tf.reduce_mean((hypothesis - Y)*X)
    update = W.assign(W - learning_rate*gradient)
    
    # For automatic way, we use GradientDescentOptimizer, with this way:
    #     optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
    #     train = optimizer.minimize(cost)
        
    # We could modify or check the gradient value with this way:
    #     gvs = optimizer.compute_gradients(cost)
    #         compute gradient of input function, and return (gradient_val, W_val)
    #     apply_gradients = optimizer.apply_gradients(gvs)
    #     sess.run(apply_gradients)
    #         function that applying updated gradient value(even if we modify gvs value?!)
    
    In [21]:
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    In [22]:
    step_val = list()
    W_val = list()
    cost_val = list()
    for step in range(41):
        curr_W, curr_cost, _ = sess.run([W, cost, update], feed_dict = {X: [1,2,3], Y: [3,5,7]})
        step_val.append(step)
        W_val.append(curr_W)
        cost_val.append(curr_cost)
    
    In [23]:
    fig, ax = plt.subplots(figsize = [12,12])
    
    ax.plot(W_val, cost_val)
    ax.scatter(W_val, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax.annotate(step, (W_val[i], cost_val[i]))
    
    plt.show()
    

    6. Multivariate Linear Regression (using numpy)

    In [24]:
    xy = np.loadtxt('../data-01-test-score.csv', delimiter=',', dtype=np.float32)
    x_data = xy[:, 0:-1]
    y_data = xy[:, [-1]]
    
    In [25]:
    x_data.shape
    
    Out[25]:
    (25, 3)
    In [26]:
    y_data.shape
    
    Out[26]:
    (25, 1)
    In [27]:
    X = tf.placeholder(tf.float32, shape = [None, 3])
    Y = tf.placeholder(tf.float32, shape = [None, 1])
    W = tf.Variable(tf.random_normal([3, 1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    hypothesis = tf.matmul(X, W) + b
    # Do not use *; use tf.matmul instead.
    # If we use * between two different matrices, then Tensorflow
    # do operate multiplication by Broadcasting, not matrix multiplication.
    cost = tf.reduce_mean(tf.square(hypothesis - Y)) 
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = 1e-6)
    train = optimizer.minimize(cost)
    
    In [28]:
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    In [29]:
    step_val = list()
    cost_val = list()
    W_val = np.empty((0,3), float)
    
    for step in range(201):
        curr_cost, curr_W, _ = sess.run(
            [cost, W, train], feed_dict = {X: x_data, Y: y_data}
        )
        if step % 5 == 0:
            step_val.append(step)
            cost_val.append(curr_cost)
            W_val = np.vstack((W_val, curr_W.T))
    #         Used np.vstack for collect W value; But it is known to be depreciated
    
    In [30]:
    W0_val_list = W_val[:, 0].tolist()
    W1_val_list = W_val[:, 1].tolist()
    W2_val_list = W_val[:, 2].tolist()
    
    In [31]:
    # Show three subplots for check if each W value converges
    fig = plt.figure(figsize = [12,18])
    
    ax0 = fig.add_subplot(3, 1, 1)
    
    ax0.plot(W0_val_list, cost_val)
    ax0.scatter(W0_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax0.annotate(step, (W0_val_list[i], cost_val[i]))
    ax0.set_title('W[:, 0] attribute gradient descent')
    
    ax1 = fig.add_subplot(3, 1, 2)
    
    ax1.plot(W1_val_list, cost_val)
    ax1.scatter(W1_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax1.annotate(step, (W1_val_list[i], cost_val[i]))
    ax1.set_title('W[:, 1] attribute gradient descent')
    
    ax2 = fig.add_subplot(3, 1, 3)
    
    ax2.plot(W2_val_list, cost_val)
    ax2.scatter(W2_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax2.annotate(step, (W2_val_list[i], cost_val[i]))
    ax2.set_title('W[:, 2] attribute gradient descent')
    
    plt.show()
    

    6.1 Using AdamOptimizer instead

    In [32]:
    X = tf.placeholder(tf.float32, shape = [None, 3])
    Y = tf.placeholder(tf.float32, shape = [None, 1])
    W = tf.Variable(tf.random_normal([3, 1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    hypothesis = tf.matmul(X, W) + b
    cost = tf.reduce_mean(tf.square(hypothesis - Y))
    optimizer = tf.train.AdamOptimizer(learning_rate = 1e-2)
    # Used AdamOptimizer instead of GradientDescentOptimizer; Watch the difference!
    train = optimizer.minimize(cost)
    
    In [33]:
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    In [34]:
    step_val = list()
    cost_val = list()
    W_val = np.empty((0,3), float)
    
    for step in range(201):
        curr_cost, curr_W, _ = sess.run(
            [cost, W, train], feed_dict = {X: x_data, Y: y_data}
        )
        if step % 5 == 0:
            step_val.append(step)
            cost_val.append(curr_cost)
            W_val = np.vstack((W_val, curr_W.T))
    
    In [35]:
    W0_val_list = W_val[:, 0].tolist()
    W1_val_list = W_val[:, 1].tolist()
    W2_val_list = W_val[:, 2].tolist()
    
    In [36]:
    fig = plt.figure(figsize = [12,18])
    
    ax0 = fig.add_subplot(3, 1, 1)
    
    ax0.plot(W0_val_list, cost_val)
    ax0.scatter(W0_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax0.annotate(step, (W0_val_list[i], cost_val[i]))
    ax0.set_title('W[:, 0] attribute convergence by AdamOptimizer')
    
    ax1 = fig.add_subplot(3, 1, 2)
    
    ax1.plot(W1_val_list, cost_val)
    ax1.scatter(W1_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax1.annotate(step, (W1_val_list[i], cost_val[i]))
    ax1.set_title('W[:, 1] attribute convergence by AdamOptimizer')
    
    ax2 = fig.add_subplot(3, 1, 3)
    
    ax2.plot(W2_val_list, cost_val)
    ax2.scatter(W2_val_list, cost_val, c='r', s=12)
    for i, step in enumerate(step_val):
        ax2.annotate(step, (W2_val_list[i], cost_val[i]))
    ax2.set_title('W[:, 2] attribute convergence by AdamOptimizer')
    
    plt.show()
    

    Addon: Tensorflow Queue Runner

    queue_runner

    Lab05-06. Logistic Classification & Softmax Classification

    1. Simple Example

    In [37]:
    x_data = np.array([
        [1,2],
        [2,3],
        [3,1],
        [4,3],
        [5,3],
        [6,2],
    ])
    y_data = np.array([
        [0],[0],[0],[1],[1],[1],
    ])
    
    In [38]:
    for i in range(2):
        plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
    plt.show()
    
    In [39]:
    X = tf.placeholder(tf.float32, shape = [None, 2])
    Y = tf.placeholder(tf.float32, shape = [None, 1])
    W = tf.Variable(tf.random_normal([2, 1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
    # Change hypothesis to sigmoid function
    cost = - tf.reduce_mean(Y*tf.log(hypothesis) + (1-Y)*tf.log(1-hypothesis))
    train = tf.train.GradientDescentOptimizer(learning_rate = 1e-2).minimize(cost)
    
    In [40]:
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    # tf.cast: return True/False value of given condition. if dtype is tf.float32, then return 1/0 value.
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
    
    In [41]:
    b_val = list()
    W_val = np.empty((0, 2), float)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(10001):
            sess.run(train, feed_dict={X: x_data, Y: y_data})
            if step % 1500 == 0:
                curr_W, curr_b, curr_acc = sess.run([W, b, accuracy], feed_dict={X: x_data, Y: y_data})
                b_val.append(curr_b)
                W_val = np.vstack((W_val, curr_W.T))
                print('step {}'.format(step), curr_W.T.tolist()[0], curr_b, curr_acc)
    
    step 0 [-0.3905549943447113, -0.346926212310791] [ 1.2467742] 0.333333
    step 1500 [0.800638735294342, -0.7960239052772522] [-0.4085739] 0.833333
    step 3000 [0.9491398930549622, -0.5297212600708008] [-1.62931621] 0.833333
    step 4500 [1.0557641983032227, -0.2801178991794586] [-2.63536739] 0.833333
    step 6000 [1.1592700481414795, -0.08745627850294113] [-3.47840905] 1.0
    step 7500 [1.258608341217041, 0.062113694846630096] [-4.19532251] 1.0
    step 9000 [1.3519338369369507, 0.1818050891160965] [-4.81478262] 1.0
    
    In [42]:
    linx = np.linspace(0, 7, 70)
    liny = np.linspace(0, 3.2, 32)
    
    meshx, meshy = np.meshgrid(linx, liny)
    
    In [43]:
    fig = plt.figure(figsize = [12, 10])
    for i in range(2):
        plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
    for j in range(W_val.shape[0]):
        CS = plt.contour(meshx, meshy, (meshx* W_val[j, 0] + meshy * W_val[j, 1] + b_val[j]), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
        plt.clabel(CS)
        CS.collections[0].set_label('step {}'.format(1500*j))
    plt.legend()
    plt.show()
    
    In [44]:
    # Why step 4500 has accuracy 0.83?
    

    2. Diabetes Classification

    In [45]:
    xy = np.loadtxt('../data-03-diabetes.csv', delimiter=',', dtype=np.float32)
    x_data = xy[:, 0:-1]
    y_data = xy[:, [-1]]
    
    In [46]:
    x_data.shape
    
    Out[46]:
    (759, 8)
    In [47]:
    X = tf.placeholder(tf.float32, shape = [None, 8])
    Y = tf.placeholder(tf.float32, shape = [None, 1])
    W = tf.Variable(tf.random_normal([8, 1]), name = 'weight')
    b = tf.Variable(tf.random_normal([1]), name = 'bias')
    
    hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
    cost = - tf.reduce_mean(Y*tf.log(hypothesis) + (1-Y)*tf.log(1-hypothesis))
    train = tf.train.GradientDescentOptimizer(learning_rate = 1e-2).minimize(cost)
    
    In [48]:
    predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
    
    In [49]:
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(10001):
            sess.run(train, feed_dict={X: x_data, Y: y_data})
            if step % 1500 == 0:
                curr_cost, curr_acc = sess.run([cost, accuracy], feed_dict={X: x_data, Y: y_data})
                print('step {:8d} :\t\t cost {:.6f}\tacc {:.6f}'.format(step, curr_cost, curr_acc))
    
    step        0 :		 cost 0.873432	acc 0.631094
    step     1500 :		 cost 0.663700	acc 0.623188
    step     3000 :		 cost 0.589725	acc 0.667984
    step     4500 :		 cost 0.549802	acc 0.712780
    step     6000 :		 cost 0.526468	acc 0.723320
    step     7500 :		 cost 0.511868	acc 0.745718
    step     9000 :		 cost 0.502222	acc 0.753623
    

    3. Softmax Multi-label Classification

    In [50]:
    x_data = np.array([[1, 2, 1, 1],
              [2, 1, 3, 2],
              [3, 1, 3, 4],
              [4, 1, 5, 5],
              [1, 7, 5, 5],
              [1, 2, 5, 6],
              [1, 6, 6, 6],
              [1, 7, 7, 7]])
    y_data = np.array([[0, 0, 1],
              [0, 0, 1],
              [0, 0, 1],
              [0, 1, 0],
              [0, 1, 0],
              [0, 1, 0],
              [1, 0, 0],
              [1, 0, 0]])
    
    In [51]:
    att_len = x_data.shape[1]
    nb_classes = y_data.shape[1]
    
    X = tf.placeholder('float', [None, att_len])
    Y = tf.placeholder('float', [None, nb_classes])
    W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
    b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
    
    hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
    cost = tf.reduce_mean(-tf.reduce_sum(Y*tf.log(hypothesis), axis=1))
    train = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
    
    In [52]:
    step_val = list()
    cost_val = list()
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for step in range(2002):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 10 == 1:
            step_val.append(step)
            cost_val.append(sess.run(cost, feed_dict={X: x_data, Y: y_data}))
    
    In [53]:
    fig = plt.figure(figsize =[12, 10])
    plt.plot(step_val, cost_val, label='cost')
    plt.legend()
    plt.show()
    

    3.1 Predict Sample value

    In [54]:
    prediction = sess.run(hypothesis, feed_dict =
                          {X: [[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]}
                         )
    print(prediction, sess.run(tf.arg_max(prediction, 1)))
    
    [[  1.35056544e-02   9.86486673e-01   7.64170909e-06]
     [  7.79539943e-01   2.07502574e-01   1.29574556e-02]
     [  1.23593384e-08   3.46300978e-04   9.99653697e-01]] [1 0 2]
    

    3.2 Softmax with Fancy Way

    In [55]:
    xy = np.loadtxt('../data-04-zoo.csv', delimiter=',', dtype=np.float32)
    x_data = xy[:, 0:-1]
    y_data = xy[:, [-1]]
    
    In [56]:
    len(np.unique(y_data))
    
    Out[56]:
    7
    In [57]:
    att_len = x_data.shape[1]
    nb_classes = len(np.unique(y_data))
    # To check all classes in y_data, we use not shape of y_data, but length of unique y_data values
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.int32, [None, y_data.shape[1]])
    Y_onehot = tf.one_hot(Y, nb_classes)
    Y_onehot = tf.reshape(Y_onehot, [-1, nb_classes])
    # one-hot encoding tensorflow way
    
    W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
    b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
    
    logits = tf.matmul(X, W) + b
    hypothesis = tf.nn.softmax(logits)
    
    cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels=Y_onehot)
    # cross entropy calculation with tensorflow
    cost = tf.reduce_mean(cost_i)
    
    train = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
    prediction = tf.argmax(hypothesis, 1)
    correction = tf.equal(prediction, tf.argmax(Y_onehot, 1))
    # prediction/correction with function argmax, equal
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [58]:
    step_val = list()
    loss_val = list()
    acc_val = list()
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(2001):
            sess.run(train, feed_dict = {X: x_data, Y: y_data})
            if step % 20 == 0:
                loss, acc = sess.run([cost, accuracy], feed_dict = {X: x_data, Y: y_data})
                step_val.append(step)
                loss_val.append(loss)
                acc_val.append(acc)
    
    In [59]:
    fig = plt.figure(figsize = [12, 14])
    ax_loss = plt.subplot(2, 1, 1)
    ax_loss.plot(step_val, loss_val, label='cost')
    ax_loss.legend()
    
    ax_acc = plt.subplot(2, 1, 2)
    ax_acc.plot(step_val, acc_val, c='red', label='accuracy')
    ax_acc.legend()
    
    plt.show()
    

    Lab07. Applications and Tips

    for simplified exercise, omitted some processes

    1. Applications and Tips

    In [60]:
    from sklearn.preprocessing import MinMaxScaler
    
    In [61]:
    x_data = np.array([[1, 200, 1],
              [1, 300, 2],
              [1, 300, 4],
              [1, 500, 5],
              [1, 700, 5],
              [1, 200, 5],
              [1, 600, 6],
              [1, 700, 7]])
    y_data = np.array([[0, 0, 1],
              [0, 0, 1],
              [0, 0, 1],
              [0, 1, 0],
              [0, 1, 0],
              [0, 1, 0],
              [1, 0, 0],
              [1, 0, 0]])
    x_test = np.array([[2, 100, 1],
              [3, 100, 2],
              [3, 300, 4]])
    y_test = np.array([[0, 0, 1],
              [0, 0, 1],
              [0, 0, 1]])
    
    # Spliting train set and test set is important
    
    In [62]:
    scaler = MinMaxScaler()
    scaler.fit(x_data)
    x_data_scaled = scaler.transform(x_data)
    x_test_scaled = scaler.transform(x_test)
    # Normalizing by MinMaxScaler
    
    /home/rpblic-ubuntu/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py:429: DataConversionWarning: Data with input dtype int64 was converted to float64 by MinMaxScaler.
      warnings.warn(msg, _DataConversionWarning)
    
    In [63]:
    att_len = x_data_scaled.shape[1]
    nb_classes = y_data.shape[1]
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, nb_classes])
    W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
    b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
    
    hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
    cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
    
    optimizer_small_rate = tf.train.GradientDescentOptimizer(learning_rate=1e-5).minimize(cost)
    optimizer_moderate_rate = tf.train.GradientDescentOptimizer(learning_rate=3e-1).minimize(cost)
    optimizer_huge_rate = tf.train.GradientDescentOptimizer(learning_rate=3e+1).minimize(cost)
    
    prediction = tf.arg_max(hypothesis, 1)
    correction = tf.equal(prediction, tf.arg_max(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [64]:
    fig = plt.figure(figsize = [12, 14])
    ax_loss = plt.subplot(2, 1, 1)
    ax_acc = plt.subplot(2, 1, 2)
    
    # Three Session with different learning rate: 1e-5, 3e-1, 3e+1
    
    step_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(2001):
            curr_cost, curr_acc, _ = sess.run(
                [cost, accuracy, optimizer_small_rate], feed_dict={X: x_data_scaled, Y: y_data})
            if step % 20 == 0:
                step_val.append(step)
                cost_val.append(curr_cost)
                acc_val.append(curr_acc)
        print('With Small Learning Rate:\t\t Prediction: {}\t\t Accuracy: {}'.format(
            sess.run(prediction, feed_dict={X:x_test_scaled}),
            sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
        ))
    ax_loss.plot(step_val, cost_val, label='1e-5')
    ax_acc.plot(step_val, acc_val, label='1e-5')
    
    step_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(2001):
            curr_cost, curr_acc, _ = sess.run(
                [cost, accuracy, optimizer_moderate_rate], feed_dict={X: x_data_scaled, Y: y_data})
            if step % 20 == 0:
                step_val.append(step)
                cost_val.append(curr_cost)
                acc_val.append(curr_acc)
        print('With Moderate Learning Rate:\t Prediction: {}\t\t Accuracy: {}'.format(
            sess.run(prediction, feed_dict={X:x_test_scaled}),
            sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
        ))
    ax_loss.plot(step_val, cost_val, label='3e-1')
    ax_acc.plot(step_val, acc_val, label='3e-1')
    
    step_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(2001):
            curr_cost, curr_acc, _ = sess.run(
                [cost, accuracy, optimizer_huge_rate], feed_dict={X: x_data_scaled, Y: y_data})
            if step % 20 == 0:
                step_val.append(step)
                cost_val.append(curr_cost)
                acc_val.append(curr_acc)
        print('With Huge Learning Rate:\t\t Prediction: {}\t\t Accuracy: {}'.format(
            sess.run(prediction, feed_dict={X:x_test_scaled}),
            sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
        ))
    ax_loss.plot(step_val, cost_val, label='3e+1')
    ax_acc.plot(step_val, acc_val, label='3e+1')
    
    ax_loss.legend()
    ax_loss.set_title('Cost with different learning rate')
    ax_acc.legend()
    ax_acc.set_title('Accuracy with different learning rate')
    
    plt.show()
    
    With Small Learning Rate:		 Prediction: [0 0 2]		 Accuracy: 0.3333333432674408
    With Moderate Learning Rate:	 Prediction: [2 2 2]		 Accuracy: 1.0
    With Huge Learning Rate:		 Prediction: [0 0 0]		 Accuracy: 0.0
    

    2. MNIST

    In [65]:
    from tensorflow.examples.tutorials.mnist import input_data as mnist_data
    
    In [66]:
    mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
    
    Extracting MNIST_data/train-images-idx3-ubyte.gz
    Extracting MNIST_data/train-labels-idx1-ubyte.gz
    Extracting MNIST_data/t10k-images-idx3-ubyte.gz
    Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
    
    In [67]:
    att_len = 28 * 28
    nb_classes = 10
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, nb_classes])
    W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
    b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
    
    hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
    cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
    
    prediction = tf.arg_max(hypothesis, 1)
    correction = tf.equal(prediction, tf.arg_max(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [68]:
    training_epoches = 30
    batch_size = 200
    
    epoch_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(training_epoches):
            avg_cost = 0
            total_batch = int(mnist.train.num_examples / batch_size)
            
            for i in range(total_batch):
                batch_xs, batch_ys = mnist.train.next_batch(batch_size)
                curr_cost, _ = sess.run([cost, optimizer], feed_dict = {X: batch_xs, Y: batch_ys})
                avg_cost += curr_cost / total_batch
                
            epoch_val.append(epoch)
            cost_val.append(avg_cost)
            acc_val.append(accuracy.eval(
                session=sess, feed_dict = {X: mnist.test.images, Y:mnist.test.labels}
            ))
            
        # after every epoch, check for random value
        rand_int = random.randint(0, mnist.test.num_examples - 1)
        plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
                  cmap='Greys', interpolation='nearest'
                  )
        plt.show()
        print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
        sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
        sess.run(tf.argmax(hypothesis, 1), feed_dict = {X: mnist.test.images[rand_int: rand_int +1]})[0],
        ))
    
    Given Test Image:		Label: 9		Prediction: 9
    
    In [69]:
    fig = plt.figure(figsize = [12, 14])
    ax_cost = plt.subplot(2, 1, 1)
    ax_cost.plot(epoch_val, cost_val, label='cost')
    ax_cost.legend()
    
    ax_acc = plt.subplot(2, 1, 2)
    ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
    ax_acc.legend()
    
    plt.show()
    

    Lab09. Neural Net

    1. XOR Problem

    logistic classification vs neural net
    In [70]:
    x_data = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]], dtype=np.float32)
    y_data = np.array([[0],
              [1],
              [1],
              [0]], dtype=np.float32)
    
    In [71]:
    for i in range(2):
        plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
    plt.show()
    
    In [72]:
    att_len = x_data.shape[1]
    y_col_len = y_data.shape[1]
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, y_col_len])
    W = tf.Variable(tf.random_normal([att_len, y_col_len]), name='weight')
    b = tf.Variable(tf.random_normal([y_col_len]), name = 'bias')
    
    hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
    cost = - tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=3e-2).minimize(cost)
    
    prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    correction = tf.equal(prediction, Y)
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [73]:
    b_val = list()
    W_val = np.empty((0, att_len), float)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
        
    for step in range(1001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
        if step % 150 == 0:
            curr_W, curr_b, curr_acc = sess.run([W, b, accuracy], feed_dict={X: x_data, Y: y_data})
            b_val.append(curr_b)
            W_val = np.vstack((W_val, curr_W.T))
            print('step {}'.format(step), curr_W.T.tolist()[0], curr_b, curr_acc)
    
    step 0 [-0.37594854831695557, -1.0930533409118652] [-0.50757593] 0.5
    step 150 [-0.03125479817390442, -0.5885366201400757] [ 0.10772913] 0.5
    step 300 [0.011368460953235626, -0.41002416610717773] [ 0.19267726] 0.5
    step 450 [0.002268127864226699, -0.31598976254463196] [ 0.17887698] 0.5
    step 600 [-0.009660548530519009, -0.24994920194149017] [ 0.15283661] 0.5
    step 750 [-0.01784520596265793, -0.19922980666160583] [ 0.12858529] 0.5
    step 900 [-0.022484343498945236, -0.15938805043697357] [ 0.10785353] 0.5
    
    In [74]:
    linx = np.linspace(-0.1, 1.1, 60)
    liny = np.linspace(-0.1, 1.1, 60)
    
    meshx, meshy = np.meshgrid(linx, liny)
    
    In [75]:
    fig = plt.figure(figsize = [12, 10])
    for i in range(2):
        plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
    for j in range(W_val.shape[0]):
        CS = plt.contour(meshx, meshy, (meshx* W_val[j, 0] + meshy * W_val[j, 1] + b_val[j]), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
        plt.clabel(CS)
        CS.collections[0].set_label('step {}'.format(1500*j))
    plt.legend()
    plt.show()
    
    In [76]:
    print("XOR with logistic regression:\t\t Accuracy: {}".format(sess.run(accuracy, feed_dict = {X: x_data, Y: y_data})))
    
    XOR with logistic regression:		 Accuracy: 0.5
    
    In [77]:
    # Poor Result.
    
    In [78]:
    att_len = x_data.shape[1]
    y_col_len = y_data.shape[1]
    hidden_layer_col_len = 3
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, y_col_len])
    
    W1 = tf.Variable(tf.random_normal([att_len, hidden_layer_col_len]), name='weight1')
    b1 = tf.Variable(tf.random_normal([hidden_layer_col_len]), name = 'bias1')
    layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
    
    W2 = tf.Variable(tf.random_normal([hidden_layer_col_len, y_col_len]), name='weight2')
    b2 = tf.Variable(tf.random_normal([y_col_len]), name = 'bias2')
    layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
    
    cost = - tf.reduce_mean(Y * tf.log(layer2) + (1-Y) * tf.log(1-layer2))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
    
    prediction = tf.cast(layer2 > 0.5, dtype=tf.float32)
    correction = tf.equal(prediction, Y)
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [79]:
    W1_val = np.empty((0, att_len), float)
    b1_val = np.empty((0, hidden_layer_col_len))
    W2_val = np.empty((0, hidden_layer_col_len), float)
    b2_val = list()
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
        
    for step in range(10001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
        if step % 1500 == 0:
            curr_W1, curr_W2, curr_b1, curr_b2, curr_acc = sess.run(
                [W1, W2, b1, b2, accuracy], feed_dict={X: x_data, Y: y_data}
            )
            b1_val = np.vstack((b1_val, curr_b1.T))
            W1_val = np.vstack((W1_val, curr_W1.T))
            b2_val.append(curr_b2[0])
            W2_val = np.vstack((W2_val, curr_W2.T))
            print('step {}'.format(step), curr_acc)
    
    step 0 0.5
    step 1500 0.75
    step 3000 1.0
    step 4500 1.0
    step 6000 1.0
    step 7500 1.0
    step 9000 1.0
    
    In [80]:
    W1_val = W1_val.reshape((-1, hidden_layer_col_len, att_len))
    
    In [81]:
    def sigmoid(arr):
        return 1/(1+np.exp(-arr))
    
    def f(j, meshx, meshy):
        return_value = np.empty((60, 60), float)
        for i in range(hidden_layer_col_len):
            return_value += sigmoid(meshx * W1_val[j, i, 0] + meshy * W1_val[j, i, 1] + b1_val[j, i]) * W2_val[j, i]
        return_value += b2_val[j]
        return sigmoid(return_value)
    
    In [82]:
    fig = plt.figure(figsize = [12, 10])
    for i in range(2):
        plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
    for j in range(W_val.shape[0]):
        CS = plt.contour(meshx, meshy, f(j, meshx, meshy), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
        plt.clabel(CS)
        CS.collections[0].set_label('step {}'.format(1500*j))
    plt.legend()
    plt.show()
    
    /home/rpblic-ubuntu/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
      
    
    In [83]:
    print("XOR with neural net:\t\t Accuracy: {}".format(sess.run(accuracy, feed_dict = {X: x_data, Y: y_data})))
    
    XOR with neural net:		 Accuracy: 1.0
    
    Addon: Tensorboard
    In [84]:
    tf.reset_default_graph()
    
    att_len = x_data.shape[1]
    y_col_len = y_data.shape[1]
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, y_col_len])
    
    with tf.name_scope('logistic_classification_layer'):
        W = tf.Variable(tf.random_normal([att_len, y_col_len]), name='weight')
        b = tf.Variable(tf.random_normal([y_col_len]), name = 'bias')
        hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
        
        W_hist = tf.summary.histogram('weight', W)    
        b_hist = tf.summary.histogram('bias', b)    
        hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)    
    
    with tf.name_scope('cost'):
        cost = - tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
        cost_scalar = tf.summary.scalar('cost', cost)
        
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=3e-2).minimize(cost)
    
    prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)
    correction = tf.equal(prediction, Y)
    
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
        accuracy_scalar = tf.summary.scalar('accuracy', accuracy)
    
    In [85]:
    with tf.Session() as sess:
        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter('./logs/lab09_xor_logistic')
        writer.add_graph(sess.graph)
        
        sess.run(tf.global_variables_initializer())
        
        for step in range(10001):
            summ, _ = sess.run(
                [merged_summary, optimizer],
                feed_dict={X: x_data, Y: y_data}
            )
            writer.add_summary(summ, global_step=step)
        writer.close()
    

    TensorBoard1

    In [86]:
    tf.reset_default_graph()
    
    att_len = x_data.shape[1]
    y_col_len = y_data.shape[1]
    hidden_layer_col_len = 3
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, y_col_len])
    
    with tf.name_scope('nn_layer1'):
        W1 = tf.Variable(tf.random_normal([att_len, hidden_layer_col_len]), name='weight1')
        b1 = tf.Variable(tf.random_normal([hidden_layer_col_len]), name = 'bias1')
        layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
        W1_hist = tf.summary.histogram('weight1', W1)    
        b1_hist = tf.summary.histogram('bias1', b1)    
        layer1_hist = tf.summary.histogram('layer1', layer1)  
        
    with tf.name_scope('nn_layer1'):
        W2 = tf.Variable(tf.random_normal([hidden_layer_col_len, y_col_len]), name='weight2')
        b2 = tf.Variable(tf.random_normal([y_col_len]), name = 'bias2')
        layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
        W2_hist = tf.summary.histogram('weight2', W2)    
        b2_hist = tf.summary.histogram('bias2', b2)    
        layer2_hist = tf.summary.histogram('layer2', layer2) 
    
    with tf.name_scope('cost'):
        cost = - tf.reduce_mean(Y * tf.log(layer2) + (1-Y) * tf.log(1-layer2))
        cost_scalar = tf.summary.scalar('cost', cost)
        
    with tf.name_scope('optimizer'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
    
    prediction = tf.cast(layer2 > 0.5, dtype=tf.float32)
    correction = tf.equal(prediction, Y)
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
        accuracy_scalar = tf.summary.scalar('accuracy', accuracy)
    
    In [87]:
    with tf.Session() as sess:
        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter('./logs/lab09_xor_neuralnet')
        writer.add_graph(sess.graph)
        
        sess.run(tf.global_variables_initializer())
        
        for step in range(10001):
            summ, _ = sess.run(
                [merged_summary, optimizer],
                feed_dict={X: x_data, Y: y_data}
            )
            writer.add_summary(summ, global_step=step)
        writer.close()
    

    TensorBoard

    2. MNIST prediction with NN and tips

    In [88]:
    mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
    
    Extracting MNIST_data/train-images-idx3-ubyte.gz
    Extracting MNIST_data/train-labels-idx1-ubyte.gz
    Extracting MNIST_data/t10k-images-idx3-ubyte.gz
    Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
    
    In [89]:
    tf.reset_default_graph()
    
    att_len = 28 * 28
    hidden_layer1_col_len = 256
    hidden_layer2_col_len = 256
    nb_classes = 10
    
    learning_rate = 1e-3
    
    X = tf.placeholder(tf.float32, [None, att_len])
    Y = tf.placeholder(tf.float32, [None, nb_classes])
    dropout_keep_prob = tf.placeholder(tf.float32)
    
    W1 = tf.get_variable(name='weight1', shape=[att_len, hidden_layer1_col_len], initializer= tf.contrib.layers.xavier_initializer())
    # Used tf.get_variable instead of tf.Variable.
    # See https://stackoverflow.com/questions/37098546/difference-between-variable-and-get-variable-in-tensorflow
    # Used tf.contrib.layers.xavier_initializer
    b1 = tf.get_variable(name = 'bias1', shape=[hidden_layer1_col_len])
    L1 = tf.nn.dropout(tf.nn.relu(tf.matmul(X, W1) + b1), keep_prob=dropout_keep_prob)
    # Used Dropout for each layers
    
    W2 = tf.get_variable(name='weight2', shape=[hidden_layer1_col_len, hidden_layer2_col_len], initializer= tf.contrib.layers.xavier_initializer())
    b2 = tf.get_variable(name = 'bias2', shape=[hidden_layer2_col_len])
    L2 = tf.nn.dropout(tf.nn.relu(tf.matmul(L1, W2) + b2), keep_prob=dropout_keep_prob)
    
    W3 = tf.get_variable(name='weight3', shape=[hidden_layer2_col_len, nb_classes], initializer= tf.contrib.layers.xavier_initializer())
    b3 = tf.get_variable(name = 'bias3', shape=[nb_classes])
    hypothesis = tf.matmul(L2, W3) + b3
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits = hypothesis, labels = Y
    ))
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    # Used tf.train.AdamOptimizer
    
    prediction = tf.arg_max(hypothesis, 1)
    correction = tf.equal(prediction, tf.arg_max(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [90]:
    training_epoches = 30
    batch_size = 200
    total_batch = int(mnist.train.num_examples / batch_size)
    
    epoch_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(training_epoches):
            avg_cost = 0
            for i in range(total_batch):
                batch_xs, batch_ys = mnist.train.next_batch(batch_size)
                curr_cost, _ = sess.run(
                    [cost, optimizer],
                    feed_dict = {X: batch_xs, Y: batch_ys, dropout_keep_prob: 0.7}
                )
                avg_cost += curr_cost / total_batch
                
            epoch_val.append(epoch)
            cost_val.append(avg_cost)
            acc_val.append(accuracy.eval(
                session=sess,
                feed_dict = {X: mnist.test.images, Y:mnist.test.labels, dropout_keep_prob: 1.0}
            ))
            
        # after every epoch, check for random value
        rand_int = random.randint(0, mnist.test.num_examples - 1)
        plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
                  cmap='Greys', interpolation='nearest'
                  )
        plt.show()
        print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
        sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
        sess.run(
            tf.argmax(hypothesis, 1),
            feed_dict = {X: mnist.test.images[rand_int: rand_int +1], dropout_keep_prob: 1.0})[0],
        ))
    
    Given Test Image:		Label: 5		Prediction: 5
    
    In [91]:
    fig = plt.figure(figsize = [12, 14])
    ax_cost = plt.subplot(2, 1, 1)
    ax_cost.plot(epoch_val, cost_val, label='cost')
    ax_cost.legend()
    
    ax_acc = plt.subplot(2, 1, 2)
    ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
    ax_acc.scatter(epoch_val, acc_val, c='b', s=12)
    for i, acc in enumerate(acc_val):
        ax_acc.annotate(acc, (epoch_val[i], acc))
    ax_acc.legend()
    
    plt.show()
    
    In [92]:
    # More Info:
    # http://localhost:8888/edit/lab-10-7-mnist_nn_higher_level_API.py
    # http://localhost:8888/edit/lab-10-8-mnist_nn_selu(wip).py
    # http://localhost:8888/edit/lab-10-X1-mnist_back_prop.py
    
    # Especially How to use Batch Normalization:
    # https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-10-6-mnist_nn_batchnorm.ipynb
    # http://openresearch.ai/t/topic/80
    

    Lab11. CNN

    1. MNIST Prediction with acc 0.994

    In [93]:
    mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
    
    Extracting MNIST_data/train-images-idx3-ubyte.gz
    Extracting MNIST_data/train-labels-idx1-ubyte.gz
    Extracting MNIST_data/t10k-images-idx3-ubyte.gz
    Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
    
    In [94]:
    import math
    
    In [95]:
    tf.reset_default_graph()
    
    img_len = 28
    att_len = img_len**2
    conv_size = 3
    maxpool_size = 2
    filter1_col_len = 32
    filter2_col_len = 64
    filter3_col_len = 128
    hidden_layer4_col_len = 625
    nb_classes = 10
    
    learning_rate = 1e-3
    
    X = tf.placeholder(tf.float32, [None, att_len])
    X_img = tf.reshape(X, [-1, img_len, img_len, 1]) # [#data, horizon, vertical, #color]
    Y = tf.placeholder(tf.float32, [None, nb_classes])
    dropout_keep_prob = tf.placeholder(tf.float32)
    
    W1 = tf.Variable(tf.random_normal([conv_size, conv_size, 1, filter1_col_len], stddev=0.01))
    L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
    L1 = tf.nn.relu(L1)
    L1 = tf.nn.max_pool(L1,
                ksize = [1, maxpool_size, maxpool_size, 1],
                strides= [1, maxpool_size, maxpool_size, 1],
                padding='SAME'
            ) # [#data, 14, 14, 32]
    L1 = tf.nn.dropout(L1, keep_prob=dropout_keep_prob)
    
    W2 = tf.Variable(tf.random_normal([conv_size, conv_size, filter1_col_len, filter2_col_len], stddev=0.01))
    L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
    L2 = tf.nn.relu(L2)
    L2 = tf.nn.max_pool(L2,
                ksize = [1, maxpool_size, maxpool_size, 1],
                strides= [1, maxpool_size, maxpool_size, 1],
                padding='SAME'
            ) # [#data, 7, 7, 64]
    L2 = tf.nn.dropout(L2, keep_prob=dropout_keep_prob)
    
    W3 = tf.Variable(tf.random_normal([conv_size, conv_size, filter2_col_len, filter3_col_len], stddev=0.01))
    L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
    L3 = tf.nn.relu(L3)
    L3 = tf.nn.max_pool(L3,
                ksize = [1, maxpool_size, maxpool_size, 1],
                strides= [1, maxpool_size, maxpool_size, 1],
                padding='SAME'
            ) # [#data, 4, 4, 128]
    L3 = tf.nn.dropout(L3, keep_prob=dropout_keep_prob)
    
    L3 = tf.reshape(L3, [-1, math.ceil(img_len/8)*math.ceil(img_len/8)*filter3_col_len])
    
    
    W4 = tf.get_variable(name='weight4',
                shape=[math.ceil(img_len/8)*math.ceil(img_len/8)*filter3_col_len, hidden_layer4_col_len],
                initializer= tf.contrib.layers.xavier_initializer()
            )
    b4 = tf.get_variable(name = 'bias4', shape=[hidden_layer4_col_len])
    L4 = tf.nn.dropout(
        tf.nn.relu(tf.matmul(L3, W4) + b4),
        keep_prob=dropout_keep_prob
    )
    
    W5 = tf.get_variable(name='weight5',
                shape=[hidden_layer4_col_len, nb_classes],
                initializer= tf.contrib.layers.xavier_initializer()
            )
    b5 = tf.get_variable(name = 'bias5', shape=[nb_classes])
    hypothesis = tf.matmul(L4, W5) + b5
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits = hypothesis, labels = Y
    ))
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    prediction = tf.arg_max(hypothesis, 1)
    correction = tf.equal(prediction, tf.arg_max(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    
    In [96]:
    training_epoches = 30
    batch_size = 200
    total_batch = int(mnist.train.num_examples / batch_size)
    
    epoch_val = list()
    cost_val = list()
    acc_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(training_epoches):
            avg_cost = 0
            for i in range(total_batch):
                batch_xs, batch_ys = mnist.train.next_batch(batch_size)
                curr_cost, _ = sess.run(
                    [cost, optimizer],
                    feed_dict = {X: batch_xs, Y: batch_ys, dropout_keep_prob: 0.7}
                )
                avg_cost += curr_cost / total_batch
                
            epoch_val.append(epoch)
            cost_val.append(avg_cost)
            acc_val.append(accuracy.eval(
                session=sess,
                feed_dict = {X: mnist.test.images, Y:mnist.test.labels, dropout_keep_prob: 1.0}
            ))
            
        # after every epoch, check for random value
        rand_int = random.randint(0, mnist.test.num_examples - 1)
        plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
                  cmap='Greys', interpolation='nearest'
                  )
        plt.show()
        print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
        sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
        sess.run(
            tf.argmax(hypothesis, 1),
            feed_dict = {X: mnist.test.images[rand_int: rand_int +1], dropout_keep_prob: 1.0})[0],
        ))
    
    Given Test Image:		Label: 7		Prediction: 7
    
    In [97]:
    fig = plt.figure(figsize = [12, 14])
    ax_cost = plt.subplot(2, 1, 1)
    ax_cost.plot(epoch_val, cost_val, label='cost')
    ax_cost.legend()
    
    ax_acc = plt.subplot(2, 1, 2)
    ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
    ax_acc.scatter(epoch_val, acc_val, c='black', s=12)
    for i, acc in enumerate(acc_val):
        ax_acc.annotate(acc, (epoch_val[i], acc))
    ax_acc.legend()
    
    plt.show()
    
    Addon: Tensorflow with python class and tf.layers & Ensemble
    In [98]:
    class Model:
    
        def __init__(self, sess, name):
            self.sess = sess
            self.name = name
            self._build_net()
    
        def _build_net(self):
            with tf.variable_scope(self.name):
                self.training = tf.placeholder(tf.bool)
    
                self.X = tf.placeholder(tf.float32, [None, 784])
                X_img = tf.reshape(self.X, [-1, 28, 28, 1])
                self.Y = tf.placeholder(tf.float32, [None, 10])
    
                # Convolutional Layer #1
                conv1 = tf.layers.conv2d(inputs=X_img, filters=32, kernel_size=[3, 3],
                                         padding="SAME", activation=tf.nn.relu)
                # Pooling Layer #1
                pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                                padding="SAME", strides=2)
                dropout1 = tf.layers.dropout(inputs=pool1,
                                             rate=0.3, training=self.training)
                # Used tf.layers.conv2d, tf.layers.max_polling2d, tf.layers.dropout
                # instead of tf.nn. It is much more convinient.
    
                # Convolutional Layer #2 and Pooling Layer #2
                conv2 = tf.layers.conv2d(inputs=dropout1, filters=64, kernel_size=[3, 3],
                                         padding="SAME", activation=tf.nn.relu)
                pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                                padding="SAME", strides=2)
                dropout2 = tf.layers.dropout(inputs=pool2,
                                             rate=0.3, training=self.training)
    
                # Convolutional Layer #2 and Pooling Layer #2
                conv3 = tf.layers.conv2d(inputs=dropout2, filters=128, kernel_size=[3, 3],
                                         padding="same", activation=tf.nn.relu)
                pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2],
                                                padding="same", strides=2)
                dropout3 = tf.layers.dropout(inputs=pool3,
                                             rate=0.3, training=self.training)
    
                # Dense Layer with Relu
                flat = tf.reshape(dropout3, [-1, 128 * 4 * 4])
                dense4 = tf.layers.dense(inputs=flat,
                                         units=625, activation=tf.nn.relu)
                dropout4 = tf.layers.dropout(inputs=dense4,
                                             rate=0.5, training=self.training)
    
                # Logits (no activation) Layer: L5 Final FC 625 inputs -> 10 outputs
                self.logits = tf.layers.dense(inputs=dropout4, units=10)
    
            # define cost/loss & optimizer
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits, labels=self.Y))
            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(self.cost)
    
            correct_prediction = tf.equal(
                tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
        def predict(self, x_test, training=False):
            return self.sess.run(self.logits,
                                 feed_dict={self.X: x_test, self.training: training})
    
        def get_accuracy(self, x_test, y_test, training=False):
            return self.sess.run(self.accuracy,
                                 feed_dict={self.X: x_test,
                                            self.Y: y_test, self.training: training})
    
        def train(self, x_data, y_data, training=True):
            return self.sess.run([self.cost, self.optimizer], feed_dict={
                self.X: x_data, self.Y: y_data, self.training: training})
    
    In [99]:
    sess = tf.Session()
    m1 = Model(sess, "m1")
    
    sess.run(tf.global_variables_initializer())
    
    In [100]:
    learning_rate = 0.001
    training_epochs = 15
    batch_size = 100
    
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = int(mnist.train.num_examples / batch_size)
    
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            c, _ = m1.train(batch_xs, batch_ys)
            avg_cost += c / total_batch
    
        print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
    
    Epoch: 0001 cost = 0.277208865
    Epoch: 0002 cost = 0.087663636
    Epoch: 0003 cost = 0.063976280
    Epoch: 0004 cost = 0.056294738
    Epoch: 0005 cost = 0.047856958
    Epoch: 0006 cost = 0.043540149
    Epoch: 0007 cost = 0.039929367
    Epoch: 0008 cost = 0.038613909
    Epoch: 0009 cost = 0.034379170
    Epoch: 0010 cost = 0.032627232
    Epoch: 0011 cost = 0.031736523
    Epoch: 0012 cost = 0.029701989
    Epoch: 0013 cost = 0.028531433
    Epoch: 0014 cost = 0.027346463
    Epoch: 0015 cost = 0.027297274
    
    In [101]:
    print('Accuracy:', m1.get_accuracy(mnist.test.images, mnist.test.labels))
    
    Accuracy: 0.9943
    

    Lab12. RNN

    Data we use:

    In [102]:
    sentence = ("if you want to build a ship, don't drum up people together to "
               "collect wood and don't assign them tasks and work, but rather "
               "teach them to long for the endless immensity of the sea.")
    
    In [103]:
    idx2char = list(set(sentence))
    char2idx = {char: idx for idx, char in enumerate(idx2char)}
    sentence_idx = [char2idx[char] for char in sentence]
    x_data = [sentence_idx[:-1]]
    y_data = [sentence_idx[1:]]
    

    1. Prediction with RNN

    In [104]:
    tf.reset_default_graph()
    
    len_of_sentence = len(sentence) - 1
    num_of_chars = len(char2idx)
    batch_size = 1
    
    X = tf.placeholder(tf.int32, [None, len_of_sentence])
    Y = tf.placeholder(tf.int32, [None, len_of_sentence])
    X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
    
    cell = tf.contrib.rnn.BasicRNNCell(num_units=num_of_chars)
    initial_state = cell.zero_state(batch_size, tf.float32)\
    
    outputs, _states = tf.nn.dynamic_rnn(
        cell, X_one_hot, initial_state = initial_state, dtype = tf.float32
    )
    
    weights = tf.ones([batch_size, len_of_sentence])
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
    loss = tf.reduce_mean(sequence_loss)
    train = tf.train.AdamOptimizer(learning_rate=3e-3).minimize(loss)
    
    prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
    
    In [105]:
    step_val = list()
    cost_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(5000):
            curr_loss, _ = sess.run([loss, train],
                                    feed_dict = {X: x_data, Y: y_data})
            step_val.append(i)
            cost_val.append(curr_loss)
            if i%250 == 0:
                curr_prediction = sess.run(prediction, feed_dict = {X: x_data})
                result_str = [idx2char[idx] for idx in np.squeeze(curr_prediction)]
                print("Step {}: {}".format(i, ''.join(result_str)))
    
    Step 0: twhwgesf tplmeasoag sh.wr.e.mwam,.hwefaphy.l.ckshdp ,emr.lllelfewe.ilhl ',.htwrwe',.hltrhefr.a.hlc .e.rf srm ,,'mfoph tlrllllhlfarlrlhlce'sam,.c mi.r.hlh,shmt.he.ict.aw,wt.ma.hbhl
    Step 250: , doo t nt to bt lh t soip  ton'  toum tp t o le tog ther to to lest too  tnd ton't eosetn themht l o and soo t brt tat er to l  them to le g sor t el nd ess inm ndit  oo t e sei.
    Step 500: , you tand to  u nd tisoim  ton't toum np t l le to  ther te le lest tan  tod ton t aod gn them tel o and woo t soimtnl er to ct them to le g sor teeltod ess inm ndity to t e sea.
    Step 750: t you tant to bu nd t soip  ton'  toum tp t o le tog ther to lo lest tan  tnd ton't todign them tel o and woo t bot tat er to ch them to leng sor the tod ess apm ndtty to t e sean
    Step 1000: t you tant to bu nd t soip  ton'  toum tp t o le tog ther to lo lect tan  tnd ton't todign them teldo and woo t but tat er to ch them to leng sor tee tnd ess amm nsity to t e sei.
    Step 1250: f you oandlac l nllac legl  ao l lao t so t l llltlll lelllllllll cllealllacglalgll lagwg,hloelllll scdnd loo t bhc lac ellllllhellel llllellesog tlllllglellggmmendcaldap lle lli 
    Step 1500: t you want to bu ld t soim  ton't toum tp t o le tog ther to lo lect ton  tnd ton't ansign them tel o and wog t but tnt er to ct them to leng sor the tod ess am  nsity to t e sei.
    Step 1750: t you want to bu ld t shim  ton't toum tp t o le tog ther to lo lect too  tnd ton't ansign them tes o and wor t but tnt er to ct them to leng sor the tod essiam  nsity to t e sei.
    Step 2000: t you want to bu ld t soim  ton't toum tp t o le tog ther to lo lest won  tnd ton't tnsign them tes o and wog t bht tat er to ch them to leng sor the tod essiam  nsity to the sei.
    Step 2250: f you want to bu ld t shim  ton't toum tp t o le tog ther to lo lest won  tnd ton't tndign them tes o and woo t but tat er to ch them to lend for the tod essiam  nsity to the sei.
    Step 2500: t yooddoodhdhllhdllldldllldldohlhldldd dodhlo loltlldlhelddodlhdldlhhllodldldldlldlldodlleldldl lhhllldhdldoodo dhhldl.hdlddolchellll lhdloldelooddodldldlllhldl  lhllldtootodlhllh
    Step 2750: t yooddoodhdhdlhdllldldllldldohlhldldc dodhlo .oltlldlhelddodlhdldlhhllodldodldlldlldodlleldldl lhdllldhdldoodo hhhldl.hdlddolchelldl lhdloldelooddodldldlelhldl   hllldtootodlhllh
    Step 3000: t yooddoodhdhdlhdloldldllldldohlhldldc dodhdo .oltlldlhelddodlh lolhhllodldodldlldlldodlleldldl lhdldldodldoodo hhdldl.hdlddolchelldl lhdloldelooddodldldlelhldl   hllldtootodlhllh
    Step 3250: t yooddoodhdhdlhdloldldllldldohlhldldc dodh.o .oltlldlhelddodlh lolhhloodldodldlddlldodlleldldl lhdldododldoodo hhhldl.hdlddolchelldl lhdloldedooododldldlelhldl   hhlldtootodlhllh
    Step 3500: t yooddoodhdhdlhdloldldldldldohlhldldc dodh.o .olt ldlhelddo lh lolhhloodldodldlddlltodlleldldl lhdlootod doodo  uhldl.hdlddolchelhdl lhdloldeloootodlhldlelhldl l hhlldtootodlhllh
    Step 3750: t yooddood dhdlhdloldldldldldohlhldldc dhdh.o .olt ldlhelddo lh lolhhloodldodldldllltodlleddldl lhdlootod doodo  uhldl.hdldlolchelhel lhdloldeloootodlhldlelhldl l hhlldtootodlhllh
    Step 4000: t yooddoot dhdlhdloldldldldldohltldldc dhdh.o .olt ldlhelddo lh lolhhloodldodldldllltodlleddldl lhdlootod doodo  uhldl.hdldlolchelhel lhdlo deloo todlhldlelhldl l hhlldtootodlhllh
    Step 4250: t yooddoot dhdlodloldldldldldlhlt dldc dhdh.o .olt ldlhelddo lh lolhhloolltodldldllltodlleddldl lhdlootod doodo  uhldl odldlolchelhel lhdlo deloo todlhldlelhldl l hhlldto todlhllh
    Step 4500: t yooddo t dhdlodloldldldldldlhlt dldc dhdh.o .olt l lhelddo lh lolhhloolltodldldltltodllelhlel lhdloopod doodo  mhltltodlhlolchelhel lhdlo deloo todlhldlelhldl l hllldto todlhllh
    Step 4750: t yooddo t thdlhdloltodldldldohlt dldc thdh.o .olt l lheldlo lo l  hhloolltod dldltltodlledhlel lhdloopod doodo  mdltltodlhlolch lhel lhdlo deloo thdldldlelhldl l hhlldtootodlhllh
    
    In [106]:
    fig = plt.figure(figsize = [12, 8])
    plt.plot(step_val, cost_val, label='cost')
    plt.legend()
    plt.show()
    

    2. Prediction with LSTM

    In [107]:
    x_data_for_lstm = list()
    y_data_for_lstm = list()
    len_of_seq = 12
    
    for i in range(len(sentence) - len_of_seq):
        x_str = sentence[i:i + len_of_seq]
        y_str = sentence[i+1: i + len_of_seq + 1]
        x_data_for_lstm.append([char2idx[char] for char in x_str])
        y_data_for_lstm.append([char2idx[char] for char in y_str])
    
    batch_size = len(x_data_for_lstm)
    len_of_sentence = len(x_data_for_lstm)
    num_of_chars = len(char2idx)
    
    In [108]:
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.int32, [None, len_of_seq])
    Y = tf.placeholder(tf.int32, [None, len_of_seq])
    X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
    
    cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_of_chars, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    outputs, _states = tf.nn.dynamic_rnn(
        cell, X_one_hot, initial_state = initial_state, dtype = tf.float32
    )
    # We could use argument for dynamic_rnn sequence_length = [len1, len2, ...] to make Dynamic RNN.
    # That is, we could put words of different size for x_data. The only thing we should do is to add
    # the length of each words to the argument(sequence_length) of function dynamic_rnn.
    
    weights = tf.ones([batch_size, len_of_seq])
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
    loss = tf.reduce_mean(sequence_loss)
    train = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(loss)
    
    prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
    
    In [109]:
    step_val = list()
    cost_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(5000):
            curr_loss, _ = sess.run([loss, train],
                                    feed_dict = {X: x_data_for_lstm, Y: y_data_for_lstm})
            step_val.append(i)
            cost_val.append(curr_loss)
            if i%250 == 0:
                curr_prediction = sess.run(prediction, feed_dict = {X: x_data_for_lstm})
                for j, prediction_value in enumerate(curr_prediction):
                    if j == 0:
                        result_str = [idx2char[idx] for idx in prediction_value]
                        print("Step {}: {}".format(i, ''.join(result_str)), end='')
                    else:
                        print(idx2char[prediction_value[-1]], end='')
                print('')
    
    Step 0: tttsooottggggoaaescs'''ggttttstpsaa''ssiieieittcccaottottaaaoagogcccnattotttgp'''paaagggs ggggagagggggggggggbpbttssiii'i'tiaactgtggtttaggaaaagaaaaaaaaaaasacssasssssssgsco aataaaaa
    Step 250: t too wont to build asssip  do  d do t tp pppp e to    e  t  to le t woo  dn  do  d ds  g tt e  tos s an  doo t tut aat e  t act t e  to lo   to  toe aae e s im     t  oo t e e   
    Step 500: t you want to build a ssip  d ndt dram tp pppple to  t e  to ct lect woo  and dondt d  ig  the  to  s and woo t tut rat e  thach the  to long to  the end e ssim  ns t  ao toe eea 
    Step 750: t you want to biild a ssip  d ndt dram tp pppple th  the  to ct lect woo  bnd wondt d sig  the  ta  s and wor t tut rat e  thach the  to long tor the end e s im  ns t  or the eea 
    Step 1000: t you want to build a ssip, dondt drum tp pppple to  the  to collect woo  and dondt d sig  the  ta  s and tor t but rathe  thach the  to long tor the end e s im  ns t  oo the eea 
    Step 1250: f you want to build a ssip, d ndt drum tp pepple to  the  to collect wood and dondt d sig  the  ta  s and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 1500: t you want to build a ssip, dondt drum tp pepple togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 1750: t you want to butld a ssip, dondt drum tp pepple to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  ta ch the  to long tor the nnd e s im  nsst  o  the  e n
    Step 2000: t you want to butld a ssip, dondt drum tp pepple togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  ta ch the  to long tor the nnd e s im  nsst  o  the eea 
    Step 2250: t you want to build a ssip, dondt drum tp pepple togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 2500: t you want to build a ssip, dondt drum tp pepple to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 2750: t you want to build a ssip, dondt drum tp pepple togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 3000: t you want to build a ssip, dondt drum tp pepple to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  ssst  o  the eea 
    Step 3250: t you want to build a ssip, dondt drum tp pepple togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 3500: t you want to build a ssip, dondt drum tp people togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 3750: t you want to build a ssip, dondt drum tp people togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 4000: t you want to build a ssip, dondt drum tp pepple to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    Step 4250: t you want to build a ssip, dondt drum tp people togethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the tea 
    Step 4500: t you want to build a ssip, dondt drum tp pepple to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the tea 
    Step 4750: t you want to build a ssip, dondt drum tp people to ethe  to collect wood and dondt d sig  the  ta ss and wor t but rathe  th ch the  to long tor the end e s im  nsst  o  the eea 
    
    In [110]:
    fig = plt.figure(figsize = [12, 8])
    plt.plot(step_val, cost_val, label='cost')
    plt.legend()
    plt.show()
    

    3. Wider & Deeper: Stacked RNN(#LSTM=4) & FCNN(#FCNN=2)

    In [111]:
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.int32, [None, len_of_seq])
    Y = tf.placeholder(tf.int32, [None, len_of_seq])
    X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
    
    cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_of_chars, state_is_tuple=True)
    multi_rnn_cells= tf.contrib.rnn.MultiRNNCell([cell]*4, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    multi_rnn_outputs, _states = tf.nn.dynamic_rnn(
        multi_rnn_cells, X_one_hot, dtype = tf.float32
    )
    
    X_for_fc = tf.reshape(multi_rnn_outputs, [-1, num_of_chars])
    fcnn1 = tf.contrib.layers.fully_connected(
        X_for_fc, num_of_chars, activation_fn = None)
    outputs = tf.contrib.layers.fully_connected(
        fcnn1, num_of_chars, activation_fn = None)
    
    outputs = tf.reshape(outputs, [batch_size, len_of_seq, num_of_chars])
    
    weights = tf.ones([batch_size, len_of_seq])
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
    loss = tf.reduce_mean(sequence_loss)
    train = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(loss)
    
    prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
    
    In [112]:
    step_val = list()
    cost_val = list()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(1000):
            curr_loss, _ = sess.run([loss, train],
                                    feed_dict = {X: x_data_for_lstm, Y: y_data_for_lstm})
            step_val.append(i)
            cost_val.append(curr_loss)
            if i%50 == 0:
                curr_prediction = sess.run(prediction, feed_dict = {X: x_data_for_lstm})
                for j, prediction_value in enumerate(curr_prediction):
                    if j == 0:
                        result_str = [idx2char[idx] for idx in prediction_value]
                        print("Step {}: {}".format(i, ''.join(result_str)), end='')
                    else:
                        print(idx2char[prediction_value[-1]], end='')
                print('')
    
    Step 0:                                                                                                                                                                                    
    Step 50:                                     sssss                                      ssssssssss                                                                      sss                 
    Step 100:    nd oatt    bnidd a  mmiy don'  dnui  p  eolee ttoeteer t ocollecttwood and oon'  asssmn  hem t ssssind oorr,,,,  rathhrtt aceereem t ooond for   e taolessssmmiiiiiy of  hem tas
    Step 150:  ttdu want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sean
    Step 200: m you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 250: l you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 300: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 350: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 400: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 450: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 500: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 550: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 600: t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 650: t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 700: l you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 750: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 800: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 850: p you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 900: t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    Step 950: t you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
    
    In [113]:
    fig = plt.figure(figsize = [12, 8])
    plt.plot(step_val, cost_val, label='cost')
    plt.legend()
    plt.show()
    

    4. Predicting Time Series Stock Data(#LSTM Cell=1)

    In [114]:
    xy = np.loadtxt('../data-02-stock_daily.csv', delimiter=',')
    
    In [115]:
    from sklearn.preprocessing import MinMaxScaler
    
    In [116]:
    xy = xy[::-1]
    
    scaler = MinMaxScaler()
    xy = scaler.fit_transform(xy)
    # Normalizing by MinMaxScaler
    
    x = xy
    y = xy[:, [-1]]
    
    In [117]:
    xy.shape
    
    Out[117]:
    (732, 5)
    In [118]:
    len_of_seq = 7
    dimension = xy.shape[1]
    
    x_data = list()
    y_data = list()
    
    for i in range(len(y) - len_of_seq):
        x_data.append(x[i:i + len_of_seq])
        y_data.append(y[i+len_of_seq])
        
    train_size = int(len(y_data) * 0.8)
    test_size = len(y_data) - train_size
    
    x_train, x_test = np.array(x_data[:train_size]), np.array(x_data[train_size:])
    y_train, y_test = np.array(y_data[:train_size]), np.array(y_data[train_size:])
    
    In [119]:
    tf.reset_default_graph()
    
    X = tf.placeholder(tf.float32, [None, len_of_seq, dimension])
    Y = tf.placeholder(tf.float32, [None, 1])
    
    cell = tf.contrib.rnn.BasicLSTMCell(
        num_units=dimension, state_is_tuple=True, activation=tf.tanh)
    outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
    Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.nn.relu)
    
    loss = tf.reduce_sum(tf.square(Y_pred - Y))  # sum of the squares
    train = tf.train.AdamOptimizer(1e-2).minimize(loss)
    
    targets = tf.placeholder(tf.float32, [None, 1])
    predictions = tf.placeholder(tf.float32, [None, 1])
    rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
    
    In [120]:
    step_val = list()
    step50_val = list()
    cost_val = list()
    rmse_val = list()
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(1000):
            curr_loss, _ = sess.run([loss, train],
                                    feed_dict = {X: x_train, Y: y_train})
            step_val.append(i)
            cost_val.append(curr_loss)
            if i%50 == 0:
                predict_value = sess.run(Y_pred, feed_dict={X: x_test})
                step50_val.append(i)
                rmse_val.append(sess.run(rmse,
                                             feed_dict = {targets: y_test, predictions: predict_value}
                                        ))
        last_predict_value = sess.run(Y_pred, feed_dict = {X: x_test})
    
    In [121]:
    fig = plt.figure(figsize = [12, 20])
    ax_cost = plt.subplot(3, 1, 1)
    ax_cost.plot(step_val, cost_val, label='cost')
    ax_cost.legend()
    
    ax_rmse = plt.subplot(3, 1, 2)
    ax_rmse.plot(step50_val, rmse_val, c='red', label='RMSE')
    ax_rmse.scatter(step50_val, rmse_val, c='black', s=12)
    for i, curr_rmse in enumerate(rmse_val):
        ax_rmse.annotate(round(curr_rmse, 3), (step50_val[i], rmse_val[i]))
    ax_rmse.legend()
    
    ax_val = plt.subplot(3, 1, 3)
    ax_val.plot(y_test, label='Test Value')
    ax_val.plot(last_predict_value, label='Prediction')
    ax_val.legend()
    
    plt.show()