0%

智能计算系统 lab3.1 深度学习

实验目的

  • 使用Python语言numpy模块基于VGG19网络模型实现ImageNet数据集分类

  • 优化:img2col改善计算性能

这篇文章重点记录了原始前向传播和优化的过程与理解

参考文章:智能计算系统实验3.3非实时图像风格迁移


实验内容

ConvolutionalLayer

简单前向传播:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def forward_slow(self, _input):

# input格式 [batchN, Channel, Height, Width]
self.input = _input

# 计算Padding后大小
height = self.input.shape[2] + self.padding*2
width = self.input.shape[3] + self.padding*2

# 创建PaddingInput
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input

# 计算输出矩阵大小并创建0矩阵
height_out = int((height - self.kernel_size) / self.stride + 1)
width_out = int((width - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])

# 四重循环计算输出矩阵每一个单元卷积后的值(主要的优化点)
for idxn in range(self.input.shape[0]):
for idxc in range(self.channel_out):
for idxh in range(height_out):
for idxw in range(width_out):
h = idxh * self.stride
w = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * self.input_pad[idxn, :, h:h + self.kernel_size, w:w + self.kernel_size]) + self.bias[idxc]
return self.output

优化思路:

L8z0zT.jpg

  • WinoGrad,TODO

优化结果(大部分来自参考文章):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def im2col(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col

def forward_fast(self, _input):
# 同上
self.input = _input
height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding + self.input.shape[2], self.padding:self.padding + self.input.shape[3]] = self.input
height_out = int((height - self.kernel_size) / self.stride + 1)
width_out = int((width - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])

cin = self.input.shape[0] # Channel In
cout = self.weight.shape[3] # Channel Out
col_weight = np.reshape(self.weight, [-1, cout])
# col reshape
self.col_image = im2col(self.input_pad, self.kernel_size, self.stride)

# matrix multiply
self.output = np.dot(self.col_image, col_weight) + self.bias
# reshape to output
self.output = np.reshape(self.output, np.hstack(([cin], [height_out], [width_out], [cout])))
self.output = np.transpose(self.output, [0, 3, 1, 2])
return self.output

MaxPoolingLayer

简单前向传播:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
def forward_slow(self, _input):	
self.input = _input
self.max_index = np.zeros(self.input.shape)
height_out = int((self.input.shape[2] - self.kernel_size) / self.stride + 1)
width_out = int((self.input.shape[3] - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.input.shape[1]):
for idxh in range(height_out):
for idxw in range(width_out):
h = idxh * self.stride
w = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.max(self.input[idxn, idxc, h:h + self.kernel_size, w:w + self.kernel_size])
return self.output

这部分的优化较为简单,把所有求MaxPool需要的input的子矩阵提取出来之后再利用numpy的max函数的并行优化快速求最大值。缺点同样是内存开支较大

快速前向传播:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def forward_fast(self, _input):
self.input = _input
self.max_index = np.zeros(self.input.shape)
height_out = int((self.input.shape[2] - self.kernel_size) / self.stride + 1)
width_out = int((self.input.shape[3] - self.kernel_size) / self.stride + 1)
self.input_vectorized = np.zeros([self.input.shape[0], self.input.shape[1],
height_out * width_out, self.kernel_size * self.kernel_size])
for idxh in range(height_out):
for idxw in range(width_out):
roi = self.input[:, :,
idxh * self.stride:idxh * self.stride + self.kernel_size,
idxw * self.stride:idxw * self.stride + self.kernel_size]
self.input_vectorized[:, :, idxh * width_out + idxw] = roi.reshape([roi.shape[0], roi.shape[1], -1])
self.output = np.max(self.input_vectorized, axis=-1).reshape([self.input.shape[0], self.input.shape[1], height_out, width_out])
return self.output

整体代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# coding=utf-8
# wyl 2022.4.15
import time
import numpy as np
import scipy.io as sio
from PIL import Image


def im2col(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col


def im2col_pool(image, ksize, stride):
# image is a 4d tensor([batchsize, channel, height, width])
image_col = []
for b in range(image.shape[0]):
for i in range(0, image.shape[2] - ksize + 1, stride):
for j in range(0, image.shape[3] - ksize + 1, stride):
col = image[b, :, i:i + ksize, j:j + ksize].reshape([image.shape[1],-1])
image_col.append(col)
image_col = np.array(image_col)
return image_col


class ConvolutionalLayer(object):
def __init__(self, kernel_size, channel_in, channel_out, padding, stride):
self.kernel_size = kernel_size
self.channel_in = channel_in
self.channel_out = channel_out
self.padding = padding
self.stride = stride

def init_param(self, std=0.01):
self.weight = np.random.normal(loc=0.0, scale=std, size=(self.channel_in, self.kernel_size, self.kernel_size, self.channel_out))
self.bias = np.zeros([self.channel_out])

def forward(self, _input):
return self.forward_fast(_input)

def forward_fast(self, _input):
self.input = _input

height = self.input.shape[2] + self.padding * 2
width = self.input.shape[3] + self.padding * 2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding + self.input.shape[2], self.padding:self.padding + self.input.shape[3]] = self.input
height_out = int((height - self.kernel_size) / self.stride + 1)
width_out = int((width - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])

cin = self.input.shape[0]
cout = self.weight.shape[3]
col_weight = np.reshape(self.weight, [-1, cout])
# col reshape
self.col_image = im2col(self.input_pad, self.kernel_size, self.stride)

# matrix multiply
self.output = np.dot(self.col_image, col_weight) + self.bias
# reshape to output
self.output = np.reshape(self.output, np.hstack(([cin], [height_out], [width_out], [cout])))
self.output = np.transpose(self.output, [0, 3, 1, 2])
return self.output

def forward_slow(self, _input):
self.input = _input
height = self.input.shape[2] + self.padding*2
width = self.input.shape[3] + self.padding*2
self.input_pad = np.zeros([self.input.shape[0], self.input.shape[1], height, width])
self.input_pad[:, :, self.padding:self.padding+self.input.shape[2], self.padding:self.padding+self.input.shape[3]] = self.input
height_out = int((height - self.kernel_size) / self.stride + 1)
width_out = int((width - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.channel_out, height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.channel_out):
for idxh in range(height_out):
for idxw in range(width_out):
h = idxh * self.stride
w = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.sum(self.weight[:, :, :, idxc] * self.input_pad[idxn, :, h:h + self.kernel_size, w:w + self.kernel_size]) + self.bias[idxc]
return self.output

def load_param(self, weight, bias):
self.weight = weight
self.bias = bias


class MaxPoolingLayer(object):
def __init__(self, kernel_size, stride):
self.kernel_size = kernel_size
self.stride = stride

def forward(self, _input):
return self.forward_fast(_input)

def forward_fast(self, _input):
self.input = _input
self.max_index = np.zeros(self.input.shape)
height_out = int((self.input.shape[2] - self.kernel_size) / self.stride + 1)
width_out = int((self.input.shape[3] - self.kernel_size) / self.stride + 1)
self.input_vectorized = np.zeros([self.input.shape[0], self.input.shape[1],
height_out * width_out, self.kernel_size * self.kernel_size])
for idxh in range(height_out):
for idxw in range(width_out):
roi = self.input[:, :,
idxh * self.stride:idxh * self.stride + self.kernel_size,
idxw * self.stride:idxw * self.stride + self.kernel_size]
self.input_vectorized[:, :, idxh * width_out + idxw] = roi.reshape([roi.shape[0], roi.shape[1], -1])
self.output = np.max(self.input_vectorized, axis=-1).reshape([self.input.shape[0], self.input.shape[1], height_out, width_out])
return self.output

def forward_slow(self, _input):
self.input = _input
self.max_index = np.zeros(self.input.shape)
height_out = int((self.input.shape[2] - self.kernel_size) / self.stride + 1)
width_out = int((self.input.shape[3] - self.kernel_size) / self.stride + 1)
self.output = np.zeros([self.input.shape[0], self.input.shape[1], height_out, width_out])
for idxn in range(self.input.shape[0]):
for idxc in range(self.input.shape[1]):
for idxh in range(height_out):
for idxw in range(width_out):
h = idxh * self.stride
w = idxw * self.stride
self.output[idxn, idxc, idxh, idxw] = np.max(self.input[idxn, idxc, h:h + self.kernel_size, w:w + self.kernel_size])
return self.output


class FlattenLayer(object):
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape

def forward(self, _input):
self.input = np.transpose(_input, [0, 2, 3, 1])
self.output = self.input.reshape([self.input.shape[0]] + list(self.output_shape))
return self.output


class FullyConnectedLayer(object):
def __init__(self, num_input, num_output):
self.num_input = num_input
self.num_output = num_output

def init_param(self, std=0.01):
self.weight = np.random.normal(loc=0.0, scale=std, size=(self.num_input, self.num_output))
self.bias = np.zeros([1, self.num_output])

def forward(self, _input):
self.input = _input
output = np.dot(_input, self.weight) + self.bias
return output

def backward(self, top_diff):
self.d_weight = np.dot(self.input.T, top_diff)
self.d_bias = np.sum(top_diff, axis=0)
bottom_diff = np.dot(top_diff, self.weight.T)
return bottom_diff

def update_param(self, lr):
self.weight = self.weight - lr * self.d_weight
self.bias = self.bias - lr * self.d_bias

def load_param(self, weight, bias):
assert self.weight.shape == weight.shape
assert self.bias.shape == bias.shape
self.weight = weight
self.bias = bias

def save_param(self):
return self.weight, self.bias


class ReLULayer(object):
def forward(self, _input):
self.input = _input
output = np.maximum(self.input, 0)
return output

def backward(self, top_diff):
bottom_diff = top_diff
bottom_diff[self.input < 0] = 0
return bottom_diff


class SoftmaxLossLayer(object):
def forward(self, _input):
input_max = np.max(_input, axis=1, keepdims=True)
input_exp = np.exp(_input - input_max)
self.prob = input_exp / np.sum(input_exp, axis=1, keepdims=True)
return self.prob

def get_loss(self, label):
self.batch_size = self.prob.shape[0]
self.label_onehot = np.zeros_like(self.prob)
self.label_onehot[np.arange(self.batch_size), label] = 1.0
loss = -np.sum(np.log(self.prob)*self.label_onehot) / self.batch_size
return loss

def backward(self):
bottom_diff = (self.prob - self.label_onehot) / self.batch_size
return bottom_diff


class VGG19(object):
def __init__(self, param_path='imagenet-vgg-verydeep-19.mat'):
self.param_path = param_path
self.param_layer_name = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4', 'pool5',
'flatten', 'fc6', 'relu6', 'fc7', 'relu7', 'fc8', 'softmax')

def build_model(self):
print('Building model')
self.layers = {}
self.layers['conv1_1'] = ConvolutionalLayer(3, 3, 64, 1, 1)
self.layers['relu1_1'] = ReLULayer()
self.layers['conv1_2'] = ConvolutionalLayer(3, 64, 64, 1, 1)
self.layers['relu1_2'] = ReLULayer()
self.layers['pool1'] = MaxPoolingLayer(2, 2)

self.layers['conv2_1'] = ConvolutionalLayer(3, 64, 128, 1, 1)
self.layers['relu2_1'] = ReLULayer()
self.layers['conv2_2'] = ConvolutionalLayer(3, 128, 128, 1, 1)
self.layers['relu2_2'] = ReLULayer()
self.layers['pool2'] = MaxPoolingLayer(2, 2)

self.layers['conv3_1'] = ConvolutionalLayer(3, 128, 256, 1, 1)
self.layers['relu3_1'] = ReLULayer()
self.layers['conv3_2'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_2'] = ReLULayer()
self.layers['conv3_3'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_3'] = ReLULayer()
self.layers['conv3_4'] = ConvolutionalLayer(3, 256, 256, 1, 1)
self.layers['relu3_4'] = ReLULayer()
self.layers['pool3'] = MaxPoolingLayer(2, 2)

self.layers['conv4_1'] = ConvolutionalLayer(3, 256, 512, 1, 1)
self.layers['relu4_1'] = ReLULayer()
self.layers['conv4_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_2'] = ReLULayer()
self.layers['conv4_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_3'] = ReLULayer()
self.layers['conv4_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu4_4'] = ReLULayer()
self.layers['pool4'] = MaxPoolingLayer(2, 2)

self.layers['conv5_1'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_1'] = ReLULayer()
self.layers['conv5_2'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_2'] = ReLULayer()
self.layers['conv5_3'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_3'] = ReLULayer()
self.layers['conv5_4'] = ConvolutionalLayer(3, 512, 512, 1, 1)
self.layers['relu5_4'] = ReLULayer()
self.layers['pool5'] = MaxPoolingLayer(2, 2)

self.layers['flatten'] = FlattenLayer([512, 7, 7], [512*7*7])
self.layers['fc6'] = FullyConnectedLayer(512*7*7, 4096)
self.layers['relu6'] = ReLULayer()
self.layers['fc7'] = FullyConnectedLayer(4096, 4096)
self.layers['relu7'] = ReLULayer()
self.layers['fc8'] = FullyConnectedLayer(4096, 1000)
self.layers['softmax'] = SoftmaxLossLayer()

self.update_layer_list = []
for layer_name in self.layers.keys():
if 'conv' in layer_name or 'fc' in layer_name:
self.update_layer_list.append(layer_name)

def init_model(self):
print('Initing model')
for layer_name in self.update_layer_list:
self.layers[layer_name].init_param()

def load_model(self):
print('Loading model')
params = sio.loadmat(self.param_path)
self.image_mean = params['normalization'][0][0][0]
self.image_mean = np.mean(self.image_mean, axis=(0, 1))
for idx in range(43):
if 'conv' in self.param_layer_name[idx]:
weight, bias = params['layers'][0][idx][0][0][0][0]
weight = np.transpose(weight, [2, 0, 1, 3])
bias = bias.reshape(-1)
self.layers[self.param_layer_name[idx]].load_param(weight, bias)
if idx >= 37 and 'fc' in self.param_layer_name[idx]:
weight, bias = params['layers'][0][idx-1][0][0][0][0]
weight = weight.reshape([weight.shape[0]*weight.shape[1]*weight.shape[2], weight.shape[3]])
self.layers[self.param_layer_name[idx]].load_param(weight, bias)

def forward(self):
current = self.input_image
for idx in range(len(self.param_layer_name)):
print('Forwarding in layer: %s' % self.param_layer_name[idx], end="")
start_time = time.time()
current = self.layers[self.param_layer_name[idx]].forward(current)
end_time = time.time()
print(' time:%.2fs' % (end_time-start_time))

return current

def evaluate(self):
prob = self.forward()
top1 = np.argmax(prob[0])
print('Classification result: id=%d, prob=%f' % (top1, prob[0, top1]))

def load_image(self, image_dir):
print('Loading and preprocessing image from ' + image_dir)
self.input_image = Image.open(image_dir).resize((224, 224))
self.input_image = np.array(self.input_image).astype(np.float32)
self.input_image -= self.image_mean
self.input_image = np.reshape(self.input_image, [1]+list(self.input_image.shape))
self.input_image = np.transpose(self.input_image, [0, 3, 1, 2])


if __name__ == '__main__':
vgg = VGG19()
vgg.build_model()
vgg.init_model()
vgg.load_model()
vgg.load_image('c67ab7b456b55a4cba9f6e2956b7c2ef.jpeg')
vgg.evaluate()