DeepLearningZeroToAll Lab Practice
Dl.tf.practice| 01 Sep 2018Import Libraries¶
In [1]:
import tensorflow as tf
tf.set_random_seed(1)
tf.__version__
Out[1]:
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import random
Lab01. Operations and Placeholders¶
1. Constant - Hello World!¶
In [3]:
words = tf.constant("Hello TF!")
sess = tf.Session()
print(sess.run(words))
2. Operation - Add¶
In [4]:
node1 = tf.constant(3.0, tf.float32)
node2 = tf.constant(4.0)
node_add = tf.add(node1, node2)
sess = tf.Session()
print(sess.run([node1, node2]))
print(sess.run(node_add))
Addon: Tensor Manipulation¶
- np.array.ndim : dimension of array
- np.array.shape / tf.shape(matrix) : shape of array or matrix
- tf.reduce_mean(matrix) : mean of all elements of matrix. If take axis=0 (or 1, -1) for argument, then mean of elements of given axis.
- tf.argmax(matrix, axis) : position of maximum value of given axis
- tf.reshape(matrix, shape) : change the shape of matrix by given shape
- tf.squeeze(matrix) : reduce the last dimension of matrix
- tf.expand_dims(matrix, dim) : expand the dimension by given value
- tf.one_hot(matrix, depth) : reshape matrix by one_hot encoding (of given depth) form. It expands the dimension of matrix.
- tf.cast(matrix, dtype) : change the data type of given matrix.
- tf.stack(matrix, axis=0) : stack multiple matrices by given axis
- tf.ones_like(matrix) : create matrix with same shape of given matrix, with value 1
- tf.zeros_like(matrix) : create matrix with same shape of given matrix, with value 0
- zip(*args)
3. Placeholder¶
In [5]:
ph1 = tf.placeholder(tf.float32)
ph2 = tf.placeholder(tf.float32)
node_add = ph1 + ph2
print(sess.run(node_add, feed_dict = {ph1 : 3, ph2 : 4.5}))
print(sess.run(node_add, feed_dict = {ph1 : [[1,4,5], [2,4,6]], ph2 : [[4,4,4], [1,3,5]]}))
Lab02-04. Linear Regression¶
1. Setting W, b, cost, optimizer, train¶
In [6]:
W = tf.Variable(tf.random_normal([1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
x_train, y_train = ([1,2,3,4,5], [3,5,7,9,11])
hypothesis = x_train * W + b
In [7]:
cost = tf.reduce_mean(tf.square(hypothesis - y_train))
In [8]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.03)
train = optimizer.minimize(cost)
2. Session run¶
In [9]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [10]:
for step in range(10001):
    sess.run(train)
    if step % 400 == 0 and step <= 2000:
        print(step, sess.run(cost), sess.run(W), sess.run(b))
3. Linear regression with placeholder¶
In [11]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(tf.random_normal([1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
hypothesis = X * W + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.03)
train = optimizer.minimize(cost)
In [12]:
sess_using_ph = tf.Session()
sess_using_ph.run(tf.global_variables_initializer())
In [13]:
for step in range(10001):
    cost_val , W_val, b_val, _ = sess_using_ph.run(
       [cost, W, b, train], feed_dict = {X: x_train, Y: y_train}
    )
    if step % 400 == 0 and step <= 2000:
        print(step, cost_val, W_val, b_val)
3.1 Get Prediction value by feeding placeholder¶
In [14]:
print(sess_using_ph.run(hypothesis, feed_dict = {X: [100, 200, 300]}))
4. Visualizing cost¶
In [15]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.placeholder(tf.float32)
hypothesis = X * W
cost = tf.reduce_mean(tf.square(hypothesis - Y))
In [16]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [17]:
W_val = list()
cost_val = list()
for i in range(-10, 110):
    feed_W = i * 0.05
    curr_cost, curr_W = sess.run([cost, W], feed_dict={X: [1,2,3], Y: [3,5,7], W: feed_W})
    W_val.append(curr_W)
    cost_val.append(curr_cost)
In [18]:
plt.plot(W_val, cost_val)
plt.show()
5. Gradient Descent by hand¶
In [19]:
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(tf.random_normal([1]), name = 'weight')
hypothesis = X * W
cost = tf.reduce_mean(tf.square(hypothesis - Y))
In [20]:
learning_rate = 0.03
gradient = tf.reduce_mean((hypothesis - Y)*X)
update = W.assign(W - learning_rate*gradient)
# For automatic way, we use GradientDescentOptimizer, with this way:
#     optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)
#     train = optimizer.minimize(cost)
    
# We could modify or check the gradient value with this way:
#     gvs = optimizer.compute_gradients(cost)
#         compute gradient of input function, and return (gradient_val, W_val)
#     apply_gradients = optimizer.apply_gradients(gvs)
#     sess.run(apply_gradients)
#         function that applying updated gradient value(even if we modify gvs value?!)
In [21]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [22]:
step_val = list()
W_val = list()
cost_val = list()
for step in range(41):
    curr_W, curr_cost, _ = sess.run([W, cost, update], feed_dict = {X: [1,2,3], Y: [3,5,7]})
    step_val.append(step)
    W_val.append(curr_W)
    cost_val.append(curr_cost)
In [23]:
fig, ax = plt.subplots(figsize = [12,12])
ax.plot(W_val, cost_val)
ax.scatter(W_val, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax.annotate(step, (W_val[i], cost_val[i]))
plt.show()
6. Multivariate Linear Regression (using numpy)¶
In [24]:
xy = np.loadtxt('../data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
In [25]:
x_data.shape
Out[25]:
In [26]:
y_data.shape
Out[26]:
In [27]:
X = tf.placeholder(tf.float32, shape = [None, 3])
Y = tf.placeholder(tf.float32, shape = [None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
hypothesis = tf.matmul(X, W) + b
# Do not use *; use tf.matmul instead.
# If we use * between two different matrices, then Tensorflow
# do operate multiplication by Broadcasting, not matrix multiplication.
cost = tf.reduce_mean(tf.square(hypothesis - Y)) 
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 1e-6)
train = optimizer.minimize(cost)
In [28]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [29]:
step_val = list()
cost_val = list()
W_val = np.empty((0,3), float)
for step in range(201):
    curr_cost, curr_W, _ = sess.run(
        [cost, W, train], feed_dict = {X: x_data, Y: y_data}
    )
    if step % 5 == 0:
        step_val.append(step)
        cost_val.append(curr_cost)
        W_val = np.vstack((W_val, curr_W.T))
#         Used np.vstack for collect W value; But it is known to be depreciated
In [30]:
W0_val_list = W_val[:, 0].tolist()
W1_val_list = W_val[:, 1].tolist()
W2_val_list = W_val[:, 2].tolist()
In [31]:
# Show three subplots for check if each W value converges
fig = plt.figure(figsize = [12,18])
ax0 = fig.add_subplot(3, 1, 1)
ax0.plot(W0_val_list, cost_val)
ax0.scatter(W0_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax0.annotate(step, (W0_val_list[i], cost_val[i]))
ax0.set_title('W[:, 0] attribute gradient descent')
ax1 = fig.add_subplot(3, 1, 2)
ax1.plot(W1_val_list, cost_val)
ax1.scatter(W1_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax1.annotate(step, (W1_val_list[i], cost_val[i]))
ax1.set_title('W[:, 1] attribute gradient descent')
ax2 = fig.add_subplot(3, 1, 3)
ax2.plot(W2_val_list, cost_val)
ax2.scatter(W2_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax2.annotate(step, (W2_val_list[i], cost_val[i]))
ax2.set_title('W[:, 2] attribute gradient descent')
plt.show()
6.1 Using AdamOptimizer instead¶
In [32]:
X = tf.placeholder(tf.float32, shape = [None, 3])
Y = tf.placeholder(tf.float32, shape = [None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-2)
# Used AdamOptimizer instead of GradientDescentOptimizer; Watch the difference!
train = optimizer.minimize(cost)
In [33]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
In [34]:
step_val = list()
cost_val = list()
W_val = np.empty((0,3), float)
for step in range(201):
    curr_cost, curr_W, _ = sess.run(
        [cost, W, train], feed_dict = {X: x_data, Y: y_data}
    )
    if step % 5 == 0:
        step_val.append(step)
        cost_val.append(curr_cost)
        W_val = np.vstack((W_val, curr_W.T))
In [35]:
W0_val_list = W_val[:, 0].tolist()
W1_val_list = W_val[:, 1].tolist()
W2_val_list = W_val[:, 2].tolist()
In [36]:
fig = plt.figure(figsize = [12,18])
ax0 = fig.add_subplot(3, 1, 1)
ax0.plot(W0_val_list, cost_val)
ax0.scatter(W0_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax0.annotate(step, (W0_val_list[i], cost_val[i]))
ax0.set_title('W[:, 0] attribute convergence by AdamOptimizer')
ax1 = fig.add_subplot(3, 1, 2)
ax1.plot(W1_val_list, cost_val)
ax1.scatter(W1_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax1.annotate(step, (W1_val_list[i], cost_val[i]))
ax1.set_title('W[:, 1] attribute convergence by AdamOptimizer')
ax2 = fig.add_subplot(3, 1, 3)
ax2.plot(W2_val_list, cost_val)
ax2.scatter(W2_val_list, cost_val, c='r', s=12)
for i, step in enumerate(step_val):
    ax2.annotate(step, (W2_val_list[i], cost_val[i]))
ax2.set_title('W[:, 2] attribute convergence by AdamOptimizer')
plt.show()
Addon: Tensorflow Queue Runner¶

Lab05-06. Logistic Classification & Softmax Classification¶
1. Simple Example¶
In [37]:
x_data = np.array([
    [1,2],
    [2,3],
    [3,1],
    [4,3],
    [5,3],
    [6,2],
])
y_data = np.array([
    [0],[0],[0],[1],[1],[1],
])
In [38]:
for i in range(2):
    plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
plt.show()
In [39]:
X = tf.placeholder(tf.float32, shape = [None, 2])
Y = tf.placeholder(tf.float32, shape = [None, 1])
W = tf.Variable(tf.random_normal([2, 1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
# Change hypothesis to sigmoid function
cost = - tf.reduce_mean(Y*tf.log(hypothesis) + (1-Y)*tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate = 1e-2).minimize(cost)
In [40]:
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
# tf.cast: return True/False value of given condition. if dtype is tf.float32, then return 1/0 value.
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
In [41]:
b_val = list()
W_val = np.empty((0, 2), float)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 1500 == 0:
            curr_W, curr_b, curr_acc = sess.run([W, b, accuracy], feed_dict={X: x_data, Y: y_data})
            b_val.append(curr_b)
            W_val = np.vstack((W_val, curr_W.T))
            print('step {}'.format(step), curr_W.T.tolist()[0], curr_b, curr_acc)
In [42]:
linx = np.linspace(0, 7, 70)
liny = np.linspace(0, 3.2, 32)
meshx, meshy = np.meshgrid(linx, liny)
In [43]:
fig = plt.figure(figsize = [12, 10])
for i in range(2):
    plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
for j in range(W_val.shape[0]):
    CS = plt.contour(meshx, meshy, (meshx* W_val[j, 0] + meshy * W_val[j, 1] + b_val[j]), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
    plt.clabel(CS)
    CS.collections[0].set_label('step {}'.format(1500*j))
plt.legend()
plt.show()
In [44]:
# Why step 4500 has accuracy 0.83?
2. Diabetes Classification¶
In [45]:
xy = np.loadtxt('../data-03-diabetes.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
In [46]:
x_data.shape
Out[46]:
In [47]:
X = tf.placeholder(tf.float32, shape = [None, 8])
Y = tf.placeholder(tf.float32, shape = [None, 1])
W = tf.Variable(tf.random_normal([8, 1]), name = 'weight')
b = tf.Variable(tf.random_normal([1]), name = 'bias')
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
cost = - tf.reduce_mean(Y*tf.log(hypothesis) + (1-Y)*tf.log(1-hypothesis))
train = tf.train.GradientDescentOptimizer(learning_rate = 1e-2).minimize(cost)
In [48]:
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
In [49]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(10001):
        sess.run(train, feed_dict={X: x_data, Y: y_data})
        if step % 1500 == 0:
            curr_cost, curr_acc = sess.run([cost, accuracy], feed_dict={X: x_data, Y: y_data})
            print('step {:8d} :\t\t cost {:.6f}\tacc {:.6f}'.format(step, curr_cost, curr_acc))
3. Softmax Multi-label Classification¶
In [50]:
x_data = np.array([[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]])
y_data = np.array([[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]])
In [51]:
att_len = x_data.shape[1]
nb_classes = y_data.shape[1]
X = tf.placeholder('float', [None, att_len])
Y = tf.placeholder('float', [None, nb_classes])
W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
cost = tf.reduce_mean(-tf.reduce_sum(Y*tf.log(hypothesis), axis=1))
train = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
In [52]:
step_val = list()
cost_val = list()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2002):
    sess.run(train, feed_dict={X: x_data, Y: y_data})
    if step % 10 == 1:
        step_val.append(step)
        cost_val.append(sess.run(cost, feed_dict={X: x_data, Y: y_data}))
In [53]:
fig = plt.figure(figsize =[12, 10])
plt.plot(step_val, cost_val, label='cost')
plt.legend()
plt.show()
3.1 Predict Sample value¶
In [54]:
prediction = sess.run(hypothesis, feed_dict =
                      {X: [[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]}
                     )
print(prediction, sess.run(tf.arg_max(prediction, 1)))
3.2 Softmax with Fancy Way¶
In [55]:
xy = np.loadtxt('../data-04-zoo.csv', delimiter=',', dtype=np.float32)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
In [56]:
len(np.unique(y_data))
Out[56]:
In [57]:
att_len = x_data.shape[1]
nb_classes = len(np.unique(y_data))
# To check all classes in y_data, we use not shape of y_data, but length of unique y_data values
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.int32, [None, y_data.shape[1]])
Y_onehot = tf.one_hot(Y, nb_classes)
Y_onehot = tf.reshape(Y_onehot, [-1, nb_classes])
# one-hot encoding tensorflow way
W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels=Y_onehot)
# cross entropy calculation with tensorflow
cost = tf.reduce_mean(cost_i)
train = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
prediction = tf.argmax(hypothesis, 1)
correction = tf.equal(prediction, tf.argmax(Y_onehot, 1))
# prediction/correction with function argmax, equal
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [58]:
step_val = list()
loss_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(2001):
        sess.run(train, feed_dict = {X: x_data, Y: y_data})
        if step % 20 == 0:
            loss, acc = sess.run([cost, accuracy], feed_dict = {X: x_data, Y: y_data})
            step_val.append(step)
            loss_val.append(loss)
            acc_val.append(acc)
In [59]:
fig = plt.figure(figsize = [12, 14])
ax_loss = plt.subplot(2, 1, 1)
ax_loss.plot(step_val, loss_val, label='cost')
ax_loss.legend()
ax_acc = plt.subplot(2, 1, 2)
ax_acc.plot(step_val, acc_val, c='red', label='accuracy')
ax_acc.legend()
plt.show()
1. Applications and Tips¶
In [60]:
from sklearn.preprocessing import MinMaxScaler
In [61]:
x_data = np.array([[1, 200, 1],
          [1, 300, 2],
          [1, 300, 4],
          [1, 500, 5],
          [1, 700, 5],
          [1, 200, 5],
          [1, 600, 6],
          [1, 700, 7]])
y_data = np.array([[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]])
x_test = np.array([[2, 100, 1],
          [3, 100, 2],
          [3, 300, 4]])
y_test = np.array([[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1]])
# Spliting train set and test set is important
In [62]:
scaler = MinMaxScaler()
scaler.fit(x_data)
x_data_scaled = scaler.transform(x_data)
x_test_scaled = scaler.transform(x_test)
# Normalizing by MinMaxScaler
In [63]:
att_len = x_data_scaled.shape[1]
nb_classes = y_data.shape[1]
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, nb_classes])
W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
optimizer_small_rate = tf.train.GradientDescentOptimizer(learning_rate=1e-5).minimize(cost)
optimizer_moderate_rate = tf.train.GradientDescentOptimizer(learning_rate=3e-1).minimize(cost)
optimizer_huge_rate = tf.train.GradientDescentOptimizer(learning_rate=3e+1).minimize(cost)
prediction = tf.arg_max(hypothesis, 1)
correction = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [64]:
fig = plt.figure(figsize = [12, 14])
ax_loss = plt.subplot(2, 1, 1)
ax_acc = plt.subplot(2, 1, 2)
# Three Session with different learning rate: 1e-5, 3e-1, 3e+1
step_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(2001):
        curr_cost, curr_acc, _ = sess.run(
            [cost, accuracy, optimizer_small_rate], feed_dict={X: x_data_scaled, Y: y_data})
        if step % 20 == 0:
            step_val.append(step)
            cost_val.append(curr_cost)
            acc_val.append(curr_acc)
    print('With Small Learning Rate:\t\t Prediction: {}\t\t Accuracy: {}'.format(
        sess.run(prediction, feed_dict={X:x_test_scaled}),
        sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
    ))
ax_loss.plot(step_val, cost_val, label='1e-5')
ax_acc.plot(step_val, acc_val, label='1e-5')
step_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(2001):
        curr_cost, curr_acc, _ = sess.run(
            [cost, accuracy, optimizer_moderate_rate], feed_dict={X: x_data_scaled, Y: y_data})
        if step % 20 == 0:
            step_val.append(step)
            cost_val.append(curr_cost)
            acc_val.append(curr_acc)
    print('With Moderate Learning Rate:\t Prediction: {}\t\t Accuracy: {}'.format(
        sess.run(prediction, feed_dict={X:x_test_scaled}),
        sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
    ))
ax_loss.plot(step_val, cost_val, label='3e-1')
ax_acc.plot(step_val, acc_val, label='3e-1')
step_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(2001):
        curr_cost, curr_acc, _ = sess.run(
            [cost, accuracy, optimizer_huge_rate], feed_dict={X: x_data_scaled, Y: y_data})
        if step % 20 == 0:
            step_val.append(step)
            cost_val.append(curr_cost)
            acc_val.append(curr_acc)
    print('With Huge Learning Rate:\t\t Prediction: {}\t\t Accuracy: {}'.format(
        sess.run(prediction, feed_dict={X:x_test_scaled}),
        sess.run(accuracy, feed_dict={X:x_test_scaled, Y:y_test})
    ))
ax_loss.plot(step_val, cost_val, label='3e+1')
ax_acc.plot(step_val, acc_val, label='3e+1')
ax_loss.legend()
ax_loss.set_title('Cost with different learning rate')
ax_acc.legend()
ax_acc.set_title('Accuracy with different learning rate')
plt.show()
2. MNIST¶
In [65]:
from tensorflow.examples.tutorials.mnist import input_data as mnist_data
In [66]:
mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
In [67]:
att_len = 28 * 28
nb_classes = 10
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, nb_classes])
W = tf.Variable(tf.random_normal([att_len, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name = 'bias')
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
prediction = tf.arg_max(hypothesis, 1)
correction = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [68]:
training_epoches = 30
batch_size = 200
epoch_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(training_epoches):
        avg_cost = 0
        total_batch = int(mnist.train.num_examples / batch_size)
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            curr_cost, _ = sess.run([cost, optimizer], feed_dict = {X: batch_xs, Y: batch_ys})
            avg_cost += curr_cost / total_batch
            
        epoch_val.append(epoch)
        cost_val.append(avg_cost)
        acc_val.append(accuracy.eval(
            session=sess, feed_dict = {X: mnist.test.images, Y:mnist.test.labels}
        ))
        
    # after every epoch, check for random value
    rand_int = random.randint(0, mnist.test.num_examples - 1)
    plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
              cmap='Greys', interpolation='nearest'
              )
    plt.show()
    print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
    sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
    sess.run(tf.argmax(hypothesis, 1), feed_dict = {X: mnist.test.images[rand_int: rand_int +1]})[0],
    ))
In [69]:
fig = plt.figure(figsize = [12, 14])
ax_cost = plt.subplot(2, 1, 1)
ax_cost.plot(epoch_val, cost_val, label='cost')
ax_cost.legend()
ax_acc = plt.subplot(2, 1, 2)
ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
ax_acc.legend()
plt.show()
Lab09. Neural Net¶
In [70]:
x_data = np.array([[0, 0],
          [0, 1],
          [1, 0],
          [1, 1]], dtype=np.float32)
y_data = np.array([[0],
          [1],
          [1],
          [0]], dtype=np.float32)
In [71]:
for i in range(2):
    plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
plt.show()
In [72]:
att_len = x_data.shape[1]
y_col_len = y_data.shape[1]
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, y_col_len])
W = tf.Variable(tf.random_normal([att_len, y_col_len]), name='weight')
b = tf.Variable(tf.random_normal([y_col_len]), name = 'bias')
hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
cost = - tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=3e-2).minimize(cost)
prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)
correction = tf.equal(prediction, Y)
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [73]:
b_val = list()
W_val = np.empty((0, att_len), float)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for step in range(1001):
    sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
    if step % 150 == 0:
        curr_W, curr_b, curr_acc = sess.run([W, b, accuracy], feed_dict={X: x_data, Y: y_data})
        b_val.append(curr_b)
        W_val = np.vstack((W_val, curr_W.T))
        print('step {}'.format(step), curr_W.T.tolist()[0], curr_b, curr_acc)
In [74]:
linx = np.linspace(-0.1, 1.1, 60)
liny = np.linspace(-0.1, 1.1, 60)
meshx, meshy = np.meshgrid(linx, liny)
In [75]:
fig = plt.figure(figsize = [12, 10])
for i in range(2):
    plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
for j in range(W_val.shape[0]):
    CS = plt.contour(meshx, meshy, (meshx* W_val[j, 0] + meshy * W_val[j, 1] + b_val[j]), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
    plt.clabel(CS)
    CS.collections[0].set_label('step {}'.format(1500*j))
plt.legend()
plt.show()
In [76]:
print("XOR with logistic regression:\t\t Accuracy: {}".format(sess.run(accuracy, feed_dict = {X: x_data, Y: y_data})))
In [77]:
# Poor Result.
In [78]:
att_len = x_data.shape[1]
y_col_len = y_data.shape[1]
hidden_layer_col_len = 3
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, y_col_len])
W1 = tf.Variable(tf.random_normal([att_len, hidden_layer_col_len]), name='weight1')
b1 = tf.Variable(tf.random_normal([hidden_layer_col_len]), name = 'bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
W2 = tf.Variable(tf.random_normal([hidden_layer_col_len, y_col_len]), name='weight2')
b2 = tf.Variable(tf.random_normal([y_col_len]), name = 'bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
cost = - tf.reduce_mean(Y * tf.log(layer2) + (1-Y) * tf.log(1-layer2))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
prediction = tf.cast(layer2 > 0.5, dtype=tf.float32)
correction = tf.equal(prediction, Y)
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [79]:
W1_val = np.empty((0, att_len), float)
b1_val = np.empty((0, hidden_layer_col_len))
W2_val = np.empty((0, hidden_layer_col_len), float)
b2_val = list()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
    
for step in range(10001):
    sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
    if step % 1500 == 0:
        curr_W1, curr_W2, curr_b1, curr_b2, curr_acc = sess.run(
            [W1, W2, b1, b2, accuracy], feed_dict={X: x_data, Y: y_data}
        )
        b1_val = np.vstack((b1_val, curr_b1.T))
        W1_val = np.vstack((W1_val, curr_W1.T))
        b2_val.append(curr_b2[0])
        W2_val = np.vstack((W2_val, curr_W2.T))
        print('step {}'.format(step), curr_acc)
In [80]:
W1_val = W1_val.reshape((-1, hidden_layer_col_len, att_len))
In [81]:
def sigmoid(arr):
    return 1/(1+np.exp(-arr))
def f(j, meshx, meshy):
    return_value = np.empty((60, 60), float)
    for i in range(hidden_layer_col_len):
        return_value += sigmoid(meshx * W1_val[j, i, 0] + meshy * W1_val[j, i, 1] + b1_val[j, i]) * W2_val[j, i]
    return_value += b2_val[j]
    return sigmoid(return_value)
In [82]:
fig = plt.figure(figsize = [12, 10])
for i in range(2):
    plt.scatter(x_data[(y_data == i).reshape(-1), 0], x_data[(y_data == i).reshape(-1), 1], s=12, label = i)
for j in range(W_val.shape[0]):
    CS = plt.contour(meshx, meshy, f(j, meshx, meshy), colors='black', alpha=(j*0.1 + 0.3), linewidth=.02, levels=[0.5])
    plt.clabel(CS)
    CS.collections[0].set_label('step {}'.format(1500*j))
plt.legend()
plt.show()
In [83]:
print("XOR with neural net:\t\t Accuracy: {}".format(sess.run(accuracy, feed_dict = {X: x_data, Y: y_data})))
Addon: Tensorboard¶
In [84]:
tf.reset_default_graph()
att_len = x_data.shape[1]
y_col_len = y_data.shape[1]
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, y_col_len])
with tf.name_scope('logistic_classification_layer'):
    W = tf.Variable(tf.random_normal([att_len, y_col_len]), name='weight')
    b = tf.Variable(tf.random_normal([y_col_len]), name = 'bias')
    hypothesis = tf.sigmoid(tf.matmul(X, W) + b)
    
    W_hist = tf.summary.histogram('weight', W)    
    b_hist = tf.summary.histogram('bias', b)    
    hypothesis_hist = tf.summary.histogram('hypothesis', hypothesis)    
with tf.name_scope('cost'):
    cost = - tf.reduce_mean(Y * tf.log(hypothesis) + (1-Y) * tf.log(1-hypothesis))
    cost_scalar = tf.summary.scalar('cost', cost)
    
with tf.name_scope('optimizer'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=3e-2).minimize(cost)
prediction = tf.cast(hypothesis > 0.5, dtype=tf.float32)
correction = tf.equal(prediction, Y)
with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    accuracy_scalar = tf.summary.scalar('accuracy', accuracy)
In [85]:
with tf.Session() as sess:
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter('./logs/lab09_xor_logistic')
    writer.add_graph(sess.graph)
    
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        summ, _ = sess.run(
            [merged_summary, optimizer],
            feed_dict={X: x_data, Y: y_data}
        )
        writer.add_summary(summ, global_step=step)
    writer.close()

In [86]:
tf.reset_default_graph()
att_len = x_data.shape[1]
y_col_len = y_data.shape[1]
hidden_layer_col_len = 3
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, y_col_len])
with tf.name_scope('nn_layer1'):
    W1 = tf.Variable(tf.random_normal([att_len, hidden_layer_col_len]), name='weight1')
    b1 = tf.Variable(tf.random_normal([hidden_layer_col_len]), name = 'bias1')
    layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
    W1_hist = tf.summary.histogram('weight1', W1)    
    b1_hist = tf.summary.histogram('bias1', b1)    
    layer1_hist = tf.summary.histogram('layer1', layer1)  
    
with tf.name_scope('nn_layer1'):
    W2 = tf.Variable(tf.random_normal([hidden_layer_col_len, y_col_len]), name='weight2')
    b2 = tf.Variable(tf.random_normal([y_col_len]), name = 'bias2')
    layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
    W2_hist = tf.summary.histogram('weight2', W2)    
    b2_hist = tf.summary.histogram('bias2', b2)    
    layer2_hist = tf.summary.histogram('layer2', layer2) 
with tf.name_scope('cost'):
    cost = - tf.reduce_mean(Y * tf.log(layer2) + (1-Y) * tf.log(1-layer2))
    cost_scalar = tf.summary.scalar('cost', cost)
    
with tf.name_scope('optimizer'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1).minimize(cost)
prediction = tf.cast(layer2 > 0.5, dtype=tf.float32)
correction = tf.equal(prediction, Y)
with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
    accuracy_scalar = tf.summary.scalar('accuracy', accuracy)
In [87]:
with tf.Session() as sess:
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter('./logs/lab09_xor_neuralnet')
    writer.add_graph(sess.graph)
    
    sess.run(tf.global_variables_initializer())
    
    for step in range(10001):
        summ, _ = sess.run(
            [merged_summary, optimizer],
            feed_dict={X: x_data, Y: y_data}
        )
        writer.add_summary(summ, global_step=step)
    writer.close()

2. MNIST prediction with NN and tips¶
In [88]:
mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
In [89]:
tf.reset_default_graph()
att_len = 28 * 28
hidden_layer1_col_len = 256
hidden_layer2_col_len = 256
nb_classes = 10
learning_rate = 1e-3
X = tf.placeholder(tf.float32, [None, att_len])
Y = tf.placeholder(tf.float32, [None, nb_classes])
dropout_keep_prob = tf.placeholder(tf.float32)
W1 = tf.get_variable(name='weight1', shape=[att_len, hidden_layer1_col_len], initializer= tf.contrib.layers.xavier_initializer())
# Used tf.get_variable instead of tf.Variable.
# See https://stackoverflow.com/questions/37098546/difference-between-variable-and-get-variable-in-tensorflow
# Used tf.contrib.layers.xavier_initializer
b1 = tf.get_variable(name = 'bias1', shape=[hidden_layer1_col_len])
L1 = tf.nn.dropout(tf.nn.relu(tf.matmul(X, W1) + b1), keep_prob=dropout_keep_prob)
# Used Dropout for each layers
W2 = tf.get_variable(name='weight2', shape=[hidden_layer1_col_len, hidden_layer2_col_len], initializer= tf.contrib.layers.xavier_initializer())
b2 = tf.get_variable(name = 'bias2', shape=[hidden_layer2_col_len])
L2 = tf.nn.dropout(tf.nn.relu(tf.matmul(L1, W2) + b2), keep_prob=dropout_keep_prob)
W3 = tf.get_variable(name='weight3', shape=[hidden_layer2_col_len, nb_classes], initializer= tf.contrib.layers.xavier_initializer())
b3 = tf.get_variable(name = 'bias3', shape=[nb_classes])
hypothesis = tf.matmul(L2, W3) + b3
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits = hypothesis, labels = Y
))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Used tf.train.AdamOptimizer
prediction = tf.arg_max(hypothesis, 1)
correction = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [90]:
training_epoches = 30
batch_size = 200
total_batch = int(mnist.train.num_examples / batch_size)
epoch_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(training_epoches):
        avg_cost = 0
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            curr_cost, _ = sess.run(
                [cost, optimizer],
                feed_dict = {X: batch_xs, Y: batch_ys, dropout_keep_prob: 0.7}
            )
            avg_cost += curr_cost / total_batch
            
        epoch_val.append(epoch)
        cost_val.append(avg_cost)
        acc_val.append(accuracy.eval(
            session=sess,
            feed_dict = {X: mnist.test.images, Y:mnist.test.labels, dropout_keep_prob: 1.0}
        ))
        
    # after every epoch, check for random value
    rand_int = random.randint(0, mnist.test.num_examples - 1)
    plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
              cmap='Greys', interpolation='nearest'
              )
    plt.show()
    print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
    sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
    sess.run(
        tf.argmax(hypothesis, 1),
        feed_dict = {X: mnist.test.images[rand_int: rand_int +1], dropout_keep_prob: 1.0})[0],
    ))
In [91]:
fig = plt.figure(figsize = [12, 14])
ax_cost = plt.subplot(2, 1, 1)
ax_cost.plot(epoch_val, cost_val, label='cost')
ax_cost.legend()
ax_acc = plt.subplot(2, 1, 2)
ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
ax_acc.scatter(epoch_val, acc_val, c='b', s=12)
for i, acc in enumerate(acc_val):
    ax_acc.annotate(acc, (epoch_val[i], acc))
ax_acc.legend()
plt.show()
In [92]:
# More Info:
# http://localhost:8888/edit/lab-10-7-mnist_nn_higher_level_API.py
# http://localhost:8888/edit/lab-10-8-mnist_nn_selu(wip).py
# http://localhost:8888/edit/lab-10-X1-mnist_back_prop.py
# Especially How to use Batch Normalization:
# https://github.com/hunkim/DeepLearningZeroToAll/blob/master/lab-10-6-mnist_nn_batchnorm.ipynb
# http://openresearch.ai/t/topic/80
Lab11. CNN¶
1. MNIST Prediction with acc 0.994¶
In [93]:
mnist = mnist_data.read_data_sets('MNIST_data/', one_hot=True)
In [94]:
import math
In [95]:
tf.reset_default_graph()
img_len = 28
att_len = img_len**2
conv_size = 3
maxpool_size = 2
filter1_col_len = 32
filter2_col_len = 64
filter3_col_len = 128
hidden_layer4_col_len = 625
nb_classes = 10
learning_rate = 1e-3
X = tf.placeholder(tf.float32, [None, att_len])
X_img = tf.reshape(X, [-1, img_len, img_len, 1]) # [#data, horizon, vertical, #color]
Y = tf.placeholder(tf.float32, [None, nb_classes])
dropout_keep_prob = tf.placeholder(tf.float32)
W1 = tf.Variable(tf.random_normal([conv_size, conv_size, 1, filter1_col_len], stddev=0.01))
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1,
            ksize = [1, maxpool_size, maxpool_size, 1],
            strides= [1, maxpool_size, maxpool_size, 1],
            padding='SAME'
        ) # [#data, 14, 14, 32]
L1 = tf.nn.dropout(L1, keep_prob=dropout_keep_prob)
W2 = tf.Variable(tf.random_normal([conv_size, conv_size, filter1_col_len, filter2_col_len], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2,
            ksize = [1, maxpool_size, maxpool_size, 1],
            strides= [1, maxpool_size, maxpool_size, 1],
            padding='SAME'
        ) # [#data, 7, 7, 64]
L2 = tf.nn.dropout(L2, keep_prob=dropout_keep_prob)
W3 = tf.Variable(tf.random_normal([conv_size, conv_size, filter2_col_len, filter3_col_len], stddev=0.01))
L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME') # Or, VALID
L3 = tf.nn.relu(L3)
L3 = tf.nn.max_pool(L3,
            ksize = [1, maxpool_size, maxpool_size, 1],
            strides= [1, maxpool_size, maxpool_size, 1],
            padding='SAME'
        ) # [#data, 4, 4, 128]
L3 = tf.nn.dropout(L3, keep_prob=dropout_keep_prob)
L3 = tf.reshape(L3, [-1, math.ceil(img_len/8)*math.ceil(img_len/8)*filter3_col_len])
W4 = tf.get_variable(name='weight4',
            shape=[math.ceil(img_len/8)*math.ceil(img_len/8)*filter3_col_len, hidden_layer4_col_len],
            initializer= tf.contrib.layers.xavier_initializer()
        )
b4 = tf.get_variable(name = 'bias4', shape=[hidden_layer4_col_len])
L4 = tf.nn.dropout(
    tf.nn.relu(tf.matmul(L3, W4) + b4),
    keep_prob=dropout_keep_prob
)
W5 = tf.get_variable(name='weight5',
            shape=[hidden_layer4_col_len, nb_classes],
            initializer= tf.contrib.layers.xavier_initializer()
        )
b5 = tf.get_variable(name = 'bias5', shape=[nb_classes])
hypothesis = tf.matmul(L4, W5) + b5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits = hypothesis, labels = Y
))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
prediction = tf.arg_max(hypothesis, 1)
correction = tf.equal(prediction, tf.arg_max(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))
In [96]:
training_epoches = 30
batch_size = 200
total_batch = int(mnist.train.num_examples / batch_size)
epoch_val = list()
cost_val = list()
acc_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(training_epoches):
        avg_cost = 0
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            curr_cost, _ = sess.run(
                [cost, optimizer],
                feed_dict = {X: batch_xs, Y: batch_ys, dropout_keep_prob: 0.7}
            )
            avg_cost += curr_cost / total_batch
            
        epoch_val.append(epoch)
        cost_val.append(avg_cost)
        acc_val.append(accuracy.eval(
            session=sess,
            feed_dict = {X: mnist.test.images, Y:mnist.test.labels, dropout_keep_prob: 1.0}
        ))
        
    # after every epoch, check for random value
    rand_int = random.randint(0, mnist.test.num_examples - 1)
    plt.imshow(mnist.test.images[rand_int: rand_int + 1].reshape(28, 28),
              cmap='Greys', interpolation='nearest'
              )
    plt.show()
    print("Given Test Image:\t\tLabel: {}\t\tPrediction: {}".format(
    sess.run(tf.argmax(mnist.test.labels[rand_int: rand_int +1], 1))[0],
    sess.run(
        tf.argmax(hypothesis, 1),
        feed_dict = {X: mnist.test.images[rand_int: rand_int +1], dropout_keep_prob: 1.0})[0],
    ))
In [97]:
fig = plt.figure(figsize = [12, 14])
ax_cost = plt.subplot(2, 1, 1)
ax_cost.plot(epoch_val, cost_val, label='cost')
ax_cost.legend()
ax_acc = plt.subplot(2, 1, 2)
ax_acc.plot(epoch_val, acc_val, c='red', label='accuracy')
ax_acc.scatter(epoch_val, acc_val, c='black', s=12)
for i, acc in enumerate(acc_val):
    ax_acc.annotate(acc, (epoch_val[i], acc))
ax_acc.legend()
plt.show()
Addon: Tensorflow with python class and tf.layers & Ensemble¶
In [98]:
class Model:
    def __init__(self, sess, name):
        self.sess = sess
        self.name = name
        self._build_net()
    def _build_net(self):
        with tf.variable_scope(self.name):
            self.training = tf.placeholder(tf.bool)
            self.X = tf.placeholder(tf.float32, [None, 784])
            X_img = tf.reshape(self.X, [-1, 28, 28, 1])
            self.Y = tf.placeholder(tf.float32, [None, 10])
            # Convolutional Layer #1
            conv1 = tf.layers.conv2d(inputs=X_img, filters=32, kernel_size=[3, 3],
                                     padding="SAME", activation=tf.nn.relu)
            # Pooling Layer #1
            pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2],
                                            padding="SAME", strides=2)
            dropout1 = tf.layers.dropout(inputs=pool1,
                                         rate=0.3, training=self.training)
            # Used tf.layers.conv2d, tf.layers.max_polling2d, tf.layers.dropout
            # instead of tf.nn. It is much more convinient.
            # Convolutional Layer #2 and Pooling Layer #2
            conv2 = tf.layers.conv2d(inputs=dropout1, filters=64, kernel_size=[3, 3],
                                     padding="SAME", activation=tf.nn.relu)
            pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2],
                                            padding="SAME", strides=2)
            dropout2 = tf.layers.dropout(inputs=pool2,
                                         rate=0.3, training=self.training)
            # Convolutional Layer #2 and Pooling Layer #2
            conv3 = tf.layers.conv2d(inputs=dropout2, filters=128, kernel_size=[3, 3],
                                     padding="same", activation=tf.nn.relu)
            pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2],
                                            padding="same", strides=2)
            dropout3 = tf.layers.dropout(inputs=pool3,
                                         rate=0.3, training=self.training)
            # Dense Layer with Relu
            flat = tf.reshape(dropout3, [-1, 128 * 4 * 4])
            dense4 = tf.layers.dense(inputs=flat,
                                     units=625, activation=tf.nn.relu)
            dropout4 = tf.layers.dropout(inputs=dense4,
                                         rate=0.5, training=self.training)
            # Logits (no activation) Layer: L5 Final FC 625 inputs -> 10 outputs
            self.logits = tf.layers.dense(inputs=dropout4, units=10)
        # define cost/loss & optimizer
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=self.logits, labels=self.Y))
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.cost)
        correct_prediction = tf.equal(
            tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    def predict(self, x_test, training=False):
        return self.sess.run(self.logits,
                             feed_dict={self.X: x_test, self.training: training})
    def get_accuracy(self, x_test, y_test, training=False):
        return self.sess.run(self.accuracy,
                             feed_dict={self.X: x_test,
                                        self.Y: y_test, self.training: training})
    def train(self, x_data, y_data, training=True):
        return self.sess.run([self.cost, self.optimizer], feed_dict={
            self.X: x_data, self.Y: y_data, self.training: training})
In [99]:
sess = tf.Session()
m1 = Model(sess, "m1")
sess.run(tf.global_variables_initializer())
In [100]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)
    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        c, _ = m1.train(batch_xs, batch_ys)
        avg_cost += c / total_batch
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))
In [101]:
print('Accuracy:', m1.get_accuracy(mnist.test.images, mnist.test.labels))
Lab12. RNN¶
Data we use:¶
In [102]:
sentence = ("if you want to build a ship, don't drum up people together to "
           "collect wood and don't assign them tasks and work, but rather "
           "teach them to long for the endless immensity of the sea.")
In [103]:
idx2char = list(set(sentence))
char2idx = {char: idx for idx, char in enumerate(idx2char)}
sentence_idx = [char2idx[char] for char in sentence]
x_data = [sentence_idx[:-1]]
y_data = [sentence_idx[1:]]
1. Prediction with RNN¶
In [104]:
tf.reset_default_graph()
len_of_sentence = len(sentence) - 1
num_of_chars = len(char2idx)
batch_size = 1
X = tf.placeholder(tf.int32, [None, len_of_sentence])
Y = tf.placeholder(tf.int32, [None, len_of_sentence])
X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
cell = tf.contrib.rnn.BasicRNNCell(num_units=num_of_chars)
initial_state = cell.zero_state(batch_size, tf.float32)\
outputs, _states = tf.nn.dynamic_rnn(
    cell, X_one_hot, initial_state = initial_state, dtype = tf.float32
)
weights = tf.ones([batch_size, len_of_sentence])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=3e-3).minimize(loss)
prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
In [105]:
step_val = list()
cost_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(5000):
        curr_loss, _ = sess.run([loss, train],
                                feed_dict = {X: x_data, Y: y_data})
        step_val.append(i)
        cost_val.append(curr_loss)
        if i%250 == 0:
            curr_prediction = sess.run(prediction, feed_dict = {X: x_data})
            result_str = [idx2char[idx] for idx in np.squeeze(curr_prediction)]
            print("Step {}: {}".format(i, ''.join(result_str)))
In [106]:
fig = plt.figure(figsize = [12, 8])
plt.plot(step_val, cost_val, label='cost')
plt.legend()
plt.show()
2. Prediction with LSTM¶
In [107]:
x_data_for_lstm = list()
y_data_for_lstm = list()
len_of_seq = 12
for i in range(len(sentence) - len_of_seq):
    x_str = sentence[i:i + len_of_seq]
    y_str = sentence[i+1: i + len_of_seq + 1]
    x_data_for_lstm.append([char2idx[char] for char in x_str])
    y_data_for_lstm.append([char2idx[char] for char in y_str])
batch_size = len(x_data_for_lstm)
len_of_sentence = len(x_data_for_lstm)
num_of_chars = len(char2idx)
In [108]:
tf.reset_default_graph()
X = tf.placeholder(tf.int32, [None, len_of_seq])
Y = tf.placeholder(tf.int32, [None, len_of_seq])
X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_of_chars, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(
    cell, X_one_hot, initial_state = initial_state, dtype = tf.float32
)
# We could use argument for dynamic_rnn sequence_length = [len1, len2, ...] to make Dynamic RNN.
# That is, we could put words of different size for x_data. The only thing we should do is to add
# the length of each words to the argument(sequence_length) of function dynamic_rnn.
weights = tf.ones([batch_size, len_of_seq])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(loss)
prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
In [109]:
step_val = list()
cost_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(5000):
        curr_loss, _ = sess.run([loss, train],
                                feed_dict = {X: x_data_for_lstm, Y: y_data_for_lstm})
        step_val.append(i)
        cost_val.append(curr_loss)
        if i%250 == 0:
            curr_prediction = sess.run(prediction, feed_dict = {X: x_data_for_lstm})
            for j, prediction_value in enumerate(curr_prediction):
                if j == 0:
                    result_str = [idx2char[idx] for idx in prediction_value]
                    print("Step {}: {}".format(i, ''.join(result_str)), end='')
                else:
                    print(idx2char[prediction_value[-1]], end='')
            print('')
In [110]:
fig = plt.figure(figsize = [12, 8])
plt.plot(step_val, cost_val, label='cost')
plt.legend()
plt.show()
3. Wider & Deeper: Stacked RNN(#LSTM=4) & FCNN(#FCNN=2)¶
In [111]:
tf.reset_default_graph()
X = tf.placeholder(tf.int32, [None, len_of_seq])
Y = tf.placeholder(tf.int32, [None, len_of_seq])
X_one_hot = tf.one_hot(X, num_of_chars) # [#data(==1), len_of_sentence, num_of_chars]
cell = tf.contrib.rnn.BasicLSTMCell(num_units=num_of_chars, state_is_tuple=True)
multi_rnn_cells= tf.contrib.rnn.MultiRNNCell([cell]*4, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
multi_rnn_outputs, _states = tf.nn.dynamic_rnn(
    multi_rnn_cells, X_one_hot, dtype = tf.float32
)
X_for_fc = tf.reshape(multi_rnn_outputs, [-1, num_of_chars])
fcnn1 = tf.contrib.layers.fully_connected(
    X_for_fc, num_of_chars, activation_fn = None)
outputs = tf.contrib.layers.fully_connected(
    fcnn1, num_of_chars, activation_fn = None)
outputs = tf.reshape(outputs, [batch_size, len_of_seq, num_of_chars])
weights = tf.ones([batch_size, len_of_seq])
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits = outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
train = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(loss)
prediction = tf.argmax(outputs, axis = 2) # Shape of X_one_hot
In [112]:
step_val = list()
cost_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(1000):
        curr_loss, _ = sess.run([loss, train],
                                feed_dict = {X: x_data_for_lstm, Y: y_data_for_lstm})
        step_val.append(i)
        cost_val.append(curr_loss)
        if i%50 == 0:
            curr_prediction = sess.run(prediction, feed_dict = {X: x_data_for_lstm})
            for j, prediction_value in enumerate(curr_prediction):
                if j == 0:
                    result_str = [idx2char[idx] for idx in prediction_value]
                    print("Step {}: {}".format(i, ''.join(result_str)), end='')
                else:
                    print(idx2char[prediction_value[-1]], end='')
            print('')
In [113]:
fig = plt.figure(figsize = [12, 8])
plt.plot(step_val, cost_val, label='cost')
plt.legend()
plt.show()
4. Predicting Time Series Stock Data(#LSTM Cell=1)¶
In [114]:
xy = np.loadtxt('../data-02-stock_daily.csv', delimiter=',')
In [115]:
from sklearn.preprocessing import MinMaxScaler
In [116]:
xy = xy[::-1]
scaler = MinMaxScaler()
xy = scaler.fit_transform(xy)
# Normalizing by MinMaxScaler
x = xy
y = xy[:, [-1]]
In [117]:
xy.shape
Out[117]:
In [118]:
len_of_seq = 7
dimension = xy.shape[1]
x_data = list()
y_data = list()
for i in range(len(y) - len_of_seq):
    x_data.append(x[i:i + len_of_seq])
    y_data.append(y[i+len_of_seq])
    
train_size = int(len(y_data) * 0.8)
test_size = len(y_data) - train_size
x_train, x_test = np.array(x_data[:train_size]), np.array(x_data[train_size:])
y_train, y_test = np.array(y_data[:train_size]), np.array(y_data[train_size:])
In [119]:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, len_of_seq, dimension])
Y = tf.placeholder(tf.float32, [None, 1])
cell = tf.contrib.rnn.BasicLSTMCell(
    num_units=dimension, state_is_tuple=True, activation=tf.tanh)
outputs, _states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
Y_pred = tf.contrib.layers.fully_connected(outputs[:, -1], 1, activation_fn=tf.nn.relu)
loss = tf.reduce_sum(tf.square(Y_pred - Y))  # sum of the squares
train = tf.train.AdamOptimizer(1e-2).minimize(loss)
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))
In [120]:
step_val = list()
step50_val = list()
cost_val = list()
rmse_val = list()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(1000):
        curr_loss, _ = sess.run([loss, train],
                                feed_dict = {X: x_train, Y: y_train})
        step_val.append(i)
        cost_val.append(curr_loss)
        if i%50 == 0:
            predict_value = sess.run(Y_pred, feed_dict={X: x_test})
            step50_val.append(i)
            rmse_val.append(sess.run(rmse,
                                         feed_dict = {targets: y_test, predictions: predict_value}
                                    ))
    last_predict_value = sess.run(Y_pred, feed_dict = {X: x_test})
In [121]:
fig = plt.figure(figsize = [12, 20])
ax_cost = plt.subplot(3, 1, 1)
ax_cost.plot(step_val, cost_val, label='cost')
ax_cost.legend()
ax_rmse = plt.subplot(3, 1, 2)
ax_rmse.plot(step50_val, rmse_val, c='red', label='RMSE')
ax_rmse.scatter(step50_val, rmse_val, c='black', s=12)
for i, curr_rmse in enumerate(rmse_val):
    ax_rmse.annotate(round(curr_rmse, 3), (step50_val[i], rmse_val[i]))
ax_rmse.legend()
ax_val = plt.subplot(3, 1, 3)
ax_val.plot(y_test, label='Test Value')
ax_val.plot(last_predict_value, label='Prediction')
ax_val.legend()
plt.show()