非常高兴我们终于来到了第四周课程的学习,这也意味着我们的Tensorflow学习进入了尾声。在这一周我们要学的是如何把我们之前学到的知识应用到实际生活中,现实生活中的图片大小和复杂程度和我们之前用的训练集根本不是一个维度的东西,这就是我们这一周要解决的问题。
先附一个有趣的youtube视频哈哈哈
1.从第一份ipython 笔记本讲起
注意! 这是再jupyter或google的colab中运行的版本,如果用本地IDE运行可能会报错,不过差异应该不大,大家可以自己寻找解决方案
第一步:下载数据集
!wget --no-check-certificate \
https://storage.googleapis.com/laurencemoroney-blog.appspot.com/horse-or-human.zip \
-O /tmp/horse-or-human.zip
第二步:解压
import os
import zipfile
local_zip = '/tmp/horse-or-human.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/horse-or-human')
zip_ref.close()
解压结果:
第三步:为每个类创建文件夹
# Directory with our training horse pictures
train_horse_dir = os.path.join('/tmp/horse-or-human/horses')
# Directory with our training human pictures
train_human_dir = os.path.join('/tmp/horse-or-human/humans')
第四步:查看训练集信息
这里os.listdir("········")的意思就是包含这个路径(文件名包括)的所有文件组成的列表
train_horse_names = os.listdir(train_horse_dir)
print(train_horse_names[:10])
train_human_names = os.listdir(train_human_dir)
print(train_human_names[:10])
print('total training horse images:', len(os.listdir(train_horse_dir)))
print('total training human images:', len(os.listdir(train_human_dir)))
第五步:康康图片长啥样
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4
# Index for iterating over images
pic_index = 0
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols * 4, nrows * 4)
pic_index += 8
next_horse_pix = [os.path.join(train_horse_dir, fname)
for fname in train_horse_names[pic_index-8:pic_index]]
next_human_pix = [os.path.join(train_human_dir, fname)
for fname in train_human_names[pic_index-8:pic_index]]
for i, img_path in enumerate(next_horse_pix+next_human_pix):
# Set up subplot; subplot indices start at 1
sp = plt.subplot(nrows, ncols, i + 1)
sp.axis('Off') # Don't show axes (or gridlines)
img = mpimg.imread(img_path)
plt.imshow(img)
plt.show()
然后结果大概应该是这样的
下面我们就开始正式编写我们的卷积网络了
第一步:设置模型参数
import tensorflow as tf
model = tf.keras.models.Sequential([
# Note the input shape is the desired size of the image 300x300 with 3 bytes color
# This is the first convolution
tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(300, 300, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fifth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
# Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('horses') and 1 for the other ('humans')
tf.keras.layers.Dense(1, activation='sigmoid')
])
第二步:康康模型长啥样
model.summary()
然后结果是这样的
第三步:为我们的网络设置loss(目标函数),优化器(lr指学习率learning rate),metrics是评估标准,‘acc’指准确率,在有的版本中它要写成’accuracy’
附上关于RMSprop的参考:文档[1] 以及 简介[2] 以及 PPT[3]
from tensorflow.keras.optimizers import RMSprop
model.compile(loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc'])
第四步:从文件系统中提取图像
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1/255)
# Flow training images in batches of 128 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
'/tmp/horse-or-human/', # This is the source directory for training images
target_size=(300, 300), # All images will be resized to 150x150
batch_size=128,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
第五步:训练网络
history = model.fit_generator(
train_generator,
steps_per_epoch=8,
epochs=15,
verbose=1)
下面是结果
第六步:上传真实图片检测结果
import numpy as np
from google.colab import files
from keras.preprocessing import image
uploaded = files.upload()
for fn in uploaded.keys():
# predicting images
path = '/content/' + fn
img = image.load_img(path, target_size=(300, 300))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes[0])
if classes[0]>0.5:
print(fn + " is a human")
else:
print(fn + " is a horse")
这里我选择了pixabay上搜索的马图并下载下来然后上传
得到结果:
可以看到我们的神经网络识别正确
第七步:可视化训练过程
import numpy as np
import random
from tensorflow.keras.preprocessing.image import img_to_array, load_img
# Let's define a new Model that will take an image as input, and will output
# intermediate representations for all layers in the previous model after
# the first.
successive_outputs = [layer.output for layer in model.layers[1:]]
#visualization_model = Model(img_input, successive_outputs)
visualization_model = tf.keras.models.Model(inputs = model.input, outputs = successive_outputs)
# Let's prepare a random input image from the training set.
horse_img_files = [os.path.join(train_horse_dir, f) for f in train_horse_names]
human_img_files = [os.path.join(train_human_dir, f) for f in train_human_names]
img_path = random.choice(horse_img_files + human_img_files)
img = load_img(img_path, target_size=(300, 300)) # this is a PIL image
x = img_to_array(img) # Numpy array with shape (150, 150, 3)
x = x.reshape((1,) + x.shape) # Numpy array with shape (1, 150, 150, 3)
# Rescale by 1/255
x /= 255
# Let's run our image through our network, thus obtaining all
# intermediate representations for this image.
successive_feature_maps = visualization_model.predict(x)
# These are the names of the layers, so can have them as part of our plot
layer_names = [layer.name for layer in model.layers]
# Now let's display our representations
for layer_name, feature_map in zip(layer_names, successive_feature_maps):
if len(feature_map.shape) == 4:
# Just do this for the conv / maxpool layers, not the fully-connected layers
n_features = feature_map.shape[-1] # number of features in feature map
# The feature map has shape (1, size, size, n_features)
size = feature_map.shape[1]
# We will tile our images in this matrix
display_grid = np.zeros((size, size * n_features))
for i in range(n_features):
# Postprocess the feature to make it visually palatable
x = feature_map[0, :, :, i]
x -= x.mean()
x /= x.std()
x *= 64
x += 128
x = np.clip(x, 0, 255).astype('uint8')
# We'll tile each filter into this big horizontal grid
display_grid[:, i * size : (i + 1) * size] = x
# Display the grid
scale = 20. / n_features
plt.figure(figsize=(scale * n_features, scale))
plt.title(layer_name)
plt.grid(False)
plt.imshow(display_grid, aspect='auto', cmap='viridis')
结果:随机选择图片查看他的训练过程,从上到下是卷积层对应结果。
图像从原始像素过渡到越来越抽象和紧凑的表示形式。下游的表示开始突出显示网络要注意的内容,并且它们显示出越来越少的“激活”功能。大多数设置为零。这称为“稀疏”。表示稀疏性是深度学习的关键特征。
这些表示承载的图像原始像素信息越来越少,但是承载的图像类别信息却越来越精细。您可以将卷积网络(或通常称为深层网络)视为信息蒸馏管道。
最后:关闭进程
import os, signal
os.kill(os.getpid(), signal.SIGKILL)
2.加入自动检测准确度
这个自动检测准确度的思路呢就是再下载一个验证集,它不用来训练、改进我们的模型,而是作为一个告诉我们什么时候该终止训练以防止过拟合的指标,下面是引入验证集的代码。
validation_datagen = ImageDataGenerator(rescale=1/255)
validation_generator = validation_datagen.flow_from_directory(
'/tmp/validation-horse-or-human/', # This is the source directory for training images
target_size=(150, 150), # All images will be resized to 150x150
batch_size=32,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
history = model.fit_generator(
train_generator,
steps_per_epoch=8,
epochs=15,
verbose=1,
validation_data = validation_generator,
validation_steps=8)
3.作业代码
作业的要求是训练一个表情识别模型,然后支持回调。
import tensorflow as tf
import os
import zipfile
from os import path, getcwd, chdir
# DO NOT CHANGE THE LINE BELOW. If you are developing in a local
# environment, then grab happy-or-sad.zip from the Coursera Jupyter Notebook
# and place it inside a local folder and edit the path to that location
path = f"{getcwd()}/../tmp2/happy-or-sad.zip"
zip_ref = zipfile.ZipFile(path, 'r')
zip_ref.extractall("/tmp/h-or-s")
zip_ref.close()
# GRADED FUNCTION: train_happy_sad_model
def train_happy_sad_model():
# Please write your code only where you are indicated.
# please do not remove # model fitting inline comments.
DESIRED_ACCURACY = 0.999
class myCallback(tf.keras.callbacks.Callback):
# Your Code
def on_epoch_end(self,epoch,logs={}):
if(logs.get('acc') > 0.999):
print("\nReached 99.9% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCallback()
# Directory with our training horse pictures
train_horse_dir = os.path.join('/tmp/h-or-s/happy')
# Directory with our training human pictures
train_human_dir = os.path.join('/tmp/h-or-s/sad')
# This Code Block should Define and Compile the Model. Please assume the images are 150 X 150 in your implementation.
model = tf.keras.models.Sequential([
# Your Code Here
tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
#tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
#tf.keras.layers.MaxPooling2D(2,2),
# The fifth convolution
#tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
#tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
# Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('horses') and 1 for the other ('humans')
tf.keras.layers.Dense(1, activation='sigmoid')
])
from tensorflow.keras.optimizers import RMSprop
model.compile(# Your Code Here #
loss='binary_crossentropy',
optimizer=RMSprop(lr=0.001),
metrics=['acc']
)
# This code block should create an instance of an ImageDataGenerator called train_datagen
# And a train_generator by calling train_datagen.flow_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1/255)# Your Code Here
# Please use a target_size of 150 X 150.
train_generator = train_datagen.flow_from_directory(
'/tmp/h-or-s/', # This is the source directory for training images
target_size=(150, 150), # All images will be resized to 150x150
batch_size=128,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary'
)
# Expected output: 'Found 80 images belonging to 2 classes'
# This code block should call model.fit_generator and train for
# a number of epochs.
# model fitting
history = model.fit_generator(
# Your Code Here
train_generator,
steps_per_epoch=8,
epochs=15,
verbose=1,
#注:verbose参数指定每次epoch显示几行
callbacks=[callbacks ]
)
# model fitting
return history.history['acc'][-1]
模型概览:
4.结语
!!!哈哈哈哈哈哈哈我终于学完啦!!!