0%

猫狗识别

前言

整理的网上资源,方便大家学习,也不知道哪哪看到的了!

tensorflow

在这里插入图片描述
input_data.py:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import tensorflow as tf
import os
import numpy as np

#读取所有的训练集
def get_files(file_dir):
cats = []
label_cats = []
dogs = []
label_dogs = []
for file in os.listdir(file_dir):
name = file.split(sep='.')
if 'cat' in name[0]:
cats.append(file_dir + file)
label_cats.append(0)
elif 'dog' in name[0]:
dogs.append(file_dir + file)
label_dogs.append(1)
image_list = np.hstack((cats, dogs))
label_list = np.hstack((label_cats, label_dogs))
# print('There are %d cats\nThere are %d dogs' %(len(cats), len(dogs)))
#两行多列
temp = np.array([image_list, label_list])
# 转置矩阵,两列多行
temp = temp.transpose()
# 打乱顺序
np.random.shuffle(temp)

# 取出第一个元素为 image 第二个元素为 label
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list

#自定义训练批次
# image_W ,image_H 指定图片大小,batch_size 每批读取的个数 ,capacity队列中 最多容纳元素的个数
def get_batch(image, label, image_W, image_H, batch_size, capacity):
# 转换数据
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)

# 将image 和 label 放倒队列里
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
# 读取图片的全部信息
image_contents = tf.read_file(input_queue[0])
# 把图片解码,3 为彩色图片
image = tf.image.decode_jpeg(image_contents, channels=3)
# 将图片以图片中心进行裁剪或者扩充为 指定的image_W,image_H
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
# 对数据进行标准化,就是减去它的均值,除以他的方差
image = tf.image.per_image_standardization(image)

# 生成批次
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)

# 重新定义下 label_batch 的形状
label_batch = tf.reshape(label_batch, [batch_size])
# 转化图片
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch

model.py:
在这里插入图片描述

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import tensorflow as tf

# 结构
# conv1 卷积层 1
# pooling1_lrn 池化层 1
# conv2 卷积层 2
# pooling2_lrn 池化层 2
# local3 全连接层 1
# local4 全连接层 2
# softmax 全连接层 3
def inference(images, batch_size, n_classes):
with tf.variable_scope('conv1') as scope:
# 3*3 的卷积核,图片厚度是3,16个featuremap
#即卷积后变细长
weights = tf.get_variable('weights',
shape=[3, 3, 3, 16],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)

with tf.variable_scope('pooling1_lrn') as scope:
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')
norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

with tf.variable_scope('conv2') as scope:
weights = tf.get_variable('weights',
shape=[3, 3, 16, 16],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name='conv2')

# pool2 and norm2
with tf.variable_scope('pooling2_lrn') as scope:
norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')

with tf.variable_scope('local3') as scope:
reshape = tf.reshape(pool2, shape=[batch_size, -1])
dim = reshape.get_shape()[1].value
weights = tf.get_variable('weights',
shape=[dim, 128],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[128],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

# local4
with tf.variable_scope('local4') as scope:
weights = tf.get_variable('weights',
shape=[128, 128],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[128],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = tf.get_variable('softmax_linear',
shape=[128, n_classes],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[n_classes],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

return softmax_linear


def losses(logits, labels):
with tf.variable_scope('loss') as scope:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \
(logits=logits, labels=labels, name='xentropy_per_example')
loss = tf.reduce_mean(cross_entropy, name='loss')
tf.summary.scalar(scope.name + '/loss', loss)
return loss


def trainning(loss, learning_rate):
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op


def evaluation(logits, labels):
with tf.variable_scope('accuracy') as scope:
correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar(scope.name + '/accuracy', accuracy)
return accuracy

training.py:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import numpy as np
import tensorflow as tf
import input_data
import model

N_CLASSES = 2 #[1,0] 或者 [0,1]猫和狗的概率
IMG_W = 208 # 定义图片的大小
IMG_H = 208
BATCH_SIZE = 32 # 每批数据的大小
CAPACITY = 256
MAX_STEP = 1000 # 训练的步数
learning_rate = 0.0001 # 学习率,建议刚开始的 learning_rate <= 0.0001

def run_training():
# 数据集
train_dir = 'F:/dataset_kaggledogvscat/train/'
# 训练模型保存路径
logs_train_dir = 'F:/dataset_kaggledogvscat/save/'

# 获取图片和标签集
train, train_label = input_data.get_files(train_dir)
# 生成批次
train_batch, train_label_batch = input_data.get_batch(train,
train_label,
IMG_W,
IMG_H,
BATCH_SIZE,
CAPACITY)
# 进入模型
train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)
# 获取 loss
train_loss = model.losses(train_logits, train_label_batch)
# 训练
train_op = model.trainning(train_loss, learning_rate)
# 获取准确率
train__acc = model.evaluation(train_logits, train_label_batch)
# 合并 summary
summary_op = tf.summary.merge_all()
sess = tf.Session()
# 保存summary
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:
for step in np.arange(MAX_STEP):
if coord.should_stop():
break
_, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])

if step % 50 == 0:
print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)

if step % 50 == 0 or (step + 1) == MAX_STEP:
# 每隔X步保存一下模型
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)

except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()


# train
run_training()

test.py:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# coding=utf-8
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import input_data
import numpy as np
import model
import os


# 选取一张图片
def get_one_image(train):
files = os.listdir(train)
n = len(files)
ind = np.random.randint(0, n)
img_dir = os.path.join(train, files[ind])
image = Image.open(img_dir)
plt.imshow(image)
plt.show()
image = image.resize([208, 208])
image = np.array(image)
return image


def evaluate_one_image():
train = 'F:/dataset_kaggledogvscat/my/'
image_array = get_one_image(train)

with tf.Graph().as_default():
BATCH_SIZE = 1
N_CLASSES = 2
# 转化图片格式
image = tf.cast(image_array, tf.float32)
# 图片标准化
image = tf.image.per_image_standardization(image)
# 图片->tensor
image = tf.reshape(image, [1, 208, 208, 3])
logit = model.inference(image, BATCH_SIZE, N_CLASSES)
# 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
logit = tf.nn.softmax(logit)

#占位符
x = tf.placeholder(tf.float32, shape=[208, 208, 3])

# 我门存放模型的路径
logs_train_dir = 'F:/dataset_kaggledogvscat/save/'

saver = tf.train.Saver()

with tf.Session() as sess:

print("从指定的路径中加载模型。。。。")
ckpt = tf.train.get_checkpoint_state(logs_train_dir)
if ckpt and ckpt.model_checkpoint_path:
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
saver.restore(sess, ckpt.model_checkpoint_path)
print('模型加载成功, 训练的步数为 %s' % global_step)
else:
print('模型加载失败,,,文件没有找到')
# 将图片输入到模型计算
prediction = sess.run(logit, feed_dict={x: image_array})
# 获取输出结果中最大概率的索引
max_index = np.argmax(prediction)
if max_index == 0:
print('猫的概率 %.6f' % prediction[:, 0])
else:
print('狗的概率 %.6f' % prediction[:, 1])
# 测试
evaluate_one_image()

效果

在这里插入图片描述
在这里插入图片描述
插入:
通俗理解tf.name_scope()、tf.variable_scope()
下面是测试代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import tensorflow as tf
'''
get_variable函数共有十一个参数,常用的有:名称name、变量规格shape、变量类型dtype、变量初始化方式initializer。该函数的作用是创建新的tensorflow变量,常见的initializer有:常量初始化器tf.constant_initializer、正太分布初始化器tf.random_normal_initializer、截断正态分布初始化器tf.truncated_normal_initializer、均匀分布初始化器tf.random_uniform_initializer。
'''

with tf.variable_scope('V1', reuse=None):
a1 = tf.get_variable(name='a1', shape=[1], initializer=tf.constant_initializer(1))
a2 = tf.Variable(tf.random_normal(shape=[2, 3], mean=0, stddev=1), name='a2')
with tf.variable_scope('V1', reuse=True):
a3 = tf.get_variable(name='a1', shape=[1], initializer=tf.constant_initializer(1))
a4 = tf.Variable(tf.random_normal(shape=[2, 3], mean=0, stddev=1), name='a2')

with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
print(a1.name)
print(a2.name)
print(a3.name)
print(a4.name)

with tf.name_scope("my_name_scope"):
v1 = tf.get_variable("var1", [1], dtype=tf.float32)
v2 = tf.Variable(1, name="var2", dtype=tf.float32)
a = tf.add(v1, v2)
print(v1.name)
print(v2.name)
print(a.name)

with tf.variable_scope("my_variable_scope"):
v1 = tf.get_variable("var1", [1], dtype=tf.float32)
v2 = tf.Variable(1, name="var2", dtype=tf.float32)
a = tf.add(v1, v2)
print(v1.name)
print(v2.name)
print(a.name)

数据集

数据集分享

keras

文档十分详细

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from keras import layers
from keras import models
import os
#设置文件目录
base_dir = 'F:/kaggle-cats-and-dogs/cats_and_dogs_small/'
trainDir = os.path.join(base_dir , 'train/')
valDir = os.path.join(base_dir , 'validation/')
testDir = os.path.join(base_dir , 'test/')
#训练集
trainCatDir = os.path.join(trainDir, 'cats')
trainDogDir = os.path.join(trainDir, 'dogs')
#验证集
valCatDir = os.path.join(valDir , 'cats')
valDogDir = os.path.join(valDir , 'dogs')
#测试集
testCatDir = os.path.join(trainDir , 'cats')
testDogDir = os.path.join(trainDir , 'dogs')

#创建模型
model = models.Sequential()
#卷积层,输出特征图的深度为32,提取信息的窗口大小(3,3),卷积核的大小也为(3,3),激活函数relu,输入图片大小(150,150,3)
model.add(layers.Conv2D(32, (3, 3), activation='relu',input_shape=(150, 150, 3)))
#池化层,窗口大小为(2,2),缩小特征图的尺寸
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dropout(rate=0.5))
#扁平层,将多维的输入转化为一维的输出
model.add(layers.Dense(512, activation='relu'))
#全连接层,将提取的特征组合,得出结果
model.add(layers.Dense(1, activation='sigmoid'))

#设置损失函数,优化器,模型在训练和测试时的性能指标
from keras import optimizers

model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])

#配置图片生成器
from keras.preprocessing.image import ImageDataGenerator
#将图片像素缩小为[0,1]之间的浮点数
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40, #图像随机旋转的最大角度
width_shift_range=0.2, #图片在水平位置上偏移的最大百分比值
height_shift_range=0.2, #数值位置上
shear_range=0.2, #随机错位切换的角度
zoom_range=0.2, #图片随机缩放的范围
horizontal_flip=True #随机将一半的图片进行水平翻转
)
#验证集的数据不能增强
test_datagen = ImageDataGenerator(rescale=1./255)

#创建图片生成器
train_generator = train_datagen.flow_from_directory(
trainDir, #图片地址
target_size=(150, 150), #将图片调整为(150,150)大小
batch_size=32, #设置批量数据的大小为32
class_mode='binary' #设置返回标签的类型
)

val_generator = test_datagen.flow_from_directory(
valDir,
target_size=(150, 150),
batch_size=32,
class_mode='binary'
)

#拟合模型
history = model.fit_generator(
train_generator,
steps_per_epoch=100, #迭代进入下一轮次需要抽取的批次
epochs=100, #数据迭代的轮数
validation_data=val_generator,
validation_steps=50 #验证集用于评估的批次
)

#保存模型
model.save('cats_and_dogs_small_1.h5')

#画出结果
import matplotlib.pyplot as plt

#查看变量,发现history.history中就只有这四个值,分别是准确度,验证集准确度,损失,验证集损失
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

#正确率
plt.figure(1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.show()

加载模型:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import cv2
import numpy as np
from keras.models import load_model

model = load_model('cats_and_dogs_small_1.h5') #选取自己的.h模型名称
#path = r'cats_and_dogs_small\test\dogs\dog.1501.jpg'
path = r'F:\dataset_kaggledogvscat\my\1 (7).jpg'
img = cv2.imread(path)

def format_pucture(file_path, shape):
from keras.preprocessing import image
img = image.load_img(file_path, target_size=shape)
array = image.img_to_array(img)
array = array.reshape((1,) + shape) / 255
return array

pucture = format_pucture(path, (150, 150, 3))
predict = model.predict(pucture)
res = predict[0][0]
print(res)
if res < 0.5:
print("this is a cat")
else:
if res > 0.5:
print("this is a dog")

cv2.imshow("Image1", img)
cv2.waitKey(0)

效果

在这里插入图片描述

数据集

数据集分享

------------- Thank you for reading -------------

Title - Artist
0:00