欢迎关注YaK芽课
通过python的深度学习算法包去训练计算机模仿世界名画的风格,然后应用到另一幅画中。
1. 图像的张量表示
用的一幅原图以及一幅风格图,将原图进行风格转化:
首先将图片输入神经网络,将它们转换为同一数据格式,Keras后端TensorFlow的变量函数等价于tf.variable。该参数将表示转换为数组的图像,然后我们将对风格图像执行相同的操作,创造出一个以后可以存储最终结果的组合图像,然后使用占位符用给定的宽度和高度初始化。
2. 将三张图片合并到一个Keras张量作为输入
使用concatenate 连接函数执行此操作。
3. 用3个图像作为输入创建VGG16网络
将输入设置为新创建的张量,并将权重设置为imagenet,设置include_top = False。
4.将损失函数合并为单个标量
调用助手类组合损失函数并给出它的模型和,输出图像作为参数。
5.得到关于损失的输出图像的梯度
利用Keras的梯度函数,在后台转换为tf.gradients。这就给出了一个张量关于一个或多个其他张量的符号梯度。
6.在输出图像的像素上运行优化算法(L-BFGS)以最小化损失
这与随机梯度下降很相似,但收敛速度更快。把计算出的梯度输入最小化函数,它就能输出结果图像。
代码如下:
from__future__importprint_function
fromscipy.miscimportimsave
importnumpyasnp
fromscipy.optimizeimportfmin_l_bfgs_b
importtime
importargparse
fromkeras.applicationsimportvgg16
fromkerasimportbackendasK
parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
parser.add_argument('base_image_path',metavar='base',type=str,
help='Path to the image to transform.')
parser.add_argument('style_reference_image_path',metavar='ref',type=str,
help='Path to the style reference image.')
parser.add_argument('result_prefix',metavar='res_prefix',type=str,
help='Prefix for the saved results.')
parser.add_argument('--iter',type=int,default=10,required=False,
help='Number of iterations to run.')
parser.add_argument('--content_weight',type=float,default=0.025,required=False,
help='Content weight.')
parser.add_argument('--style_weight',type=float,default=1.0,required=False,
help='Style weight.')
parser.add_argument('--tv_weight',type=float,default=1.0,required=False,
help='Total Variation weight.')
args = parser.parse_args()
base_image_path = args.base_image_path
style_reference_image_path = args.style_reference_image_path
result_prefix = args.result_prefix
iterations = args.iter
# these are the weights of the different loss components
total_variation_weight = args.tv_weight
style_weight = args.style_weight
content_weight = args.content_weight
# dimensions of the generated picture.
width, height = load_img(base_image_path).size
img_nrows =400
img_ncols =int(width * img_nrows / height)
# util function to open, resize and format pictures into appropriate tensors
defpreprocess_image(image_path):
img = load_img(image_path,target_size=(img_nrows, img_ncols))
img = img_to_array(img)
img = np.expand_dims(img,axis=)
img = vgg16.preprocess_input(img)
returnimg
# util function to convert a tensor into a valid image
defdeprocess_image(x):
ifK.image_data_format() =='channels_first':
x = x.reshape((3, img_nrows, img_ncols))
x = x.transpose((1,2,))
else:
x = x.reshape((img_nrows, img_ncols,3))
# Remove zero-center by mean pixel
x[:, :,] +=103.939
x[:, :,1] +=116.779
x[:, :,2] +=123.68
# 'BGR'->'RGB'
x = x[:, :, ::-1]
x = np.clip(x,,255).astype('uint8')
returnx
# get tensor representations of our images
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_reference_image_path))
# this will contain our generated image
ifK.image_data_format() =='channels_first':
combination_image = K.placeholder((1,3, img_nrows, img_ncols))
else:
combination_image = K.placeholder((1, img_nrows, img_ncols,3))
# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([base_image,
style_reference_image,
combination_image],axis=)
# build the VGG16 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights
model = vgg16.VGG16(input_tensor=input_tensor,
weights='imagenet',include_top=False)
print('Model loaded.')
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict =dict([(layer.name, layer.output)forlayerinmodel.layers])
# compute the neural style loss
# first we need to define 4 util functions
# the gram matrix of an image tensor (feature-wise outer product)
defgram_matrix(x):
assertK.ndim(x) ==3
ifK.image_data_format() =='channels_first':
features = K.batch_flatten(x)
else:
features = K.batch_flatten(K.permute_dimensions(x, (2,,1)))
gram = K.dot(features, K.transpose(features))
returngram
# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image
defstyle_loss(style,combination):
assertK.ndim(style) ==3
assertK.ndim(combination) ==3
S = gram_matrix(style)
C = gram_matrix(combination)
channels =3
size = img_nrows * img_ncols
returnK.sum(K.square(S - C)) / (4.* (channels **2) * (size **2))
# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
defcontent_loss(base,combination):
returnK.sum(K.square(combination - base))
# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent
deftotal_variation_loss(x):
assertK.ndim(x) ==4
ifK.image_data_format() =='channels_first':
a = K.square(x[:, :, :img_nrows -1, :img_ncols -1] - x[:, :,1:, :img_ncols -1])
b = K.square(x[:, :, :img_nrows -1, :img_ncols -1] - x[:, :, :img_nrows -1,1:])
else:
a = K.square(x[:, :img_nrows -1, :img_ncols -1, :] - x[:,1:, :img_ncols -1, :])
b = K.square(x[:, :img_nrows -1, :img_ncols -1, :] - x[:, :img_nrows -1,1:, :])
returnK.sum(K.pow(a + b,1.25))
# combine these loss functions into a single scalar
loss = K.variable(0.)
layer_features = outputs_dict['block4_conv2']
base_image_features = layer_features[, :, :, :]
combination_features = layer_features[2, :, :, :]
loss += content_weight * content_loss(base_image_features,
combination_features)
feature_layers = ['block1_conv1','block2_conv1',
'block3_conv1','block4_conv1',
'block5_conv1']
forlayer_nameinfeature_layers:
layer_features = outputs_dict[layer_name]
style_reference_features = layer_features[1, :, :, :]
combination_features = layer_features[2, :, :, :]
sl = style_loss(style_reference_features, combination_features)
loss += (style_weight /len(feature_layers)) * sl
loss += total_variation_weight * total_variation_loss(combination_image)
# get the gradients of the generated image wrt the loss
grads = K.gradients(loss, combination_image)
outputs = [loss]
ifisinstance(grads, (list,tuple)):
outputs += grads
else:
outputs.append(grads)
f_outputs = K.function([combination_image], outputs)
defeval_loss_and_grads(x):
ifK.image_data_format() =='channels_first':
x = x.reshape((1,3, img_nrows, img_ncols))
else:
x = x.reshape((1, img_nrows, img_ncols,3))
outs = f_outputs([x])
loss_value = outs[]
iflen(outs[1:]) ==1:
grad_values = outs[1].flatten().astype('float64')
else:
grad_values = np.array(outs[1:]).flatten().astype('float64')
returnloss_value, grad_values
classEvaluator(object):
def__init__(self):
self.loss_value =None
self.grads_values =None
defloss(self,x):
assertself.loss_valueisNone
loss_value, grad_values = eval_loss_and_grads(x)
self.loss_value = loss_value
self.grad_values = grad_values
returnself.loss_value
defgrads(self,x):
assertself.loss_valueisnotNone
grad_values = np.copy(self.grad_values)
self.loss_value =None
self.grad_values =None
returngrad_values
evaluator = Evaluator()
# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
ifK.image_data_format() =='channels_first':
else:
foriinrange(iterations):
print('Start of iteration', i)
start_time = time.time()
x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
fprime=evaluator.grads,maxfun=20)
print('Current loss value:', min_val)
# save current generated image
img = deprocess_image(x.copy())
fname = result_prefix +'_at_iteration_%d.png'% i
imsave(fname, img)
end_time = time.time()
print('Image saved as', fname)
print('Iteration %d completed in %ds'% (i, end_time - start_time)
芽课:用计算开启科学认知,展开生命智慧的大树。帮助孩子形成更好的科学素养和科研能力。让这些出生即数字公民的孩子,拥有释放自己无限想象力的能量。
欢迎关注YaK
用计算的力量改变世界是每一个程序员的梦想,YaK团队抱着对教育的敬仰和热忱,开发了有趣的YaK编程工具以及配套的系统化教学课程。让孩子可以用编程去学习和理解自然的语言:数学。
领取专属 10元无门槛券
私享最新 技术干货