论文依据:Cvae-gan: fine-grained image generation through asymmetric training。
代码来源:github
关于论文讲解分析,csdn已经有不少例子,在此不做详细解释。
该模型呢,可以应用于,如图像修复、超分辨率和数据增强,以训练更好的人脸识别模型等领域。
该程序,按实验目的分为3部分,即训练网络,测试网络以及分类网络。又有三个基础支持网络,即基本模型,VAE网络和判别网络。分工明确,环环相扣。
首先是基本模型
model_utils.py
import tensorflow as tf
import tensorlayer as tl
import numpy as np
def _channel_shuffle(x, n_group):
n, h, w, c = x.shape.as_list()
x_reshaped = tf.reshape(x, [-1, h, w, n_group, c // n_group])
x_transposed = tf.transpose(x_reshaped, [0, 1, 2, 4, 3])
output = tf.reshape(x_transposed, [-1, h, w, c])
return output
def _group_norm_and_channel_shuffle(x, is_train, G=32, epsilon=1e-12, use_shuffle=False, name='_group_norm'):
with tf.variable_scope(name):
N, H, W, C = x.get_shape().as_list()
if N == None:
N = -1
G = min(G, C)
x = tf.reshape(x, [N, G, H, W, C // G])
mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
x = (x - mean) / tf.sqrt(var + epsilon)
# shuffle channel
if use_shuffle:
x = tf.transpose(x, [0, 4, 2, 3, 1])
# per channel gamma and beta
gamma = tf.get_variable('gamma', [C], initializer=tf.constant_initializer(1.0), trainable=is_train)
beta = tf.get_variable('beta', [C], initializer=tf.constant_initializer(0.0), trainable=is_train)
gamma = tf.reshape(gamma, [1, 1, 1, C])
beta = tf.reshape(beta, [1, 1, 1, C])
output = tf.reshape(x, [N, H, W, C]) * gamma + beta
return output
def _switch_norm(x, name='_switch_norm') :
with tf.variable_scope(name) :
ch = x.shape[-1]
eps = 1e-5
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], keep_dims=True)
ins_mean, ins_var = tf.nn.moments(x, [1, 2], keep_dims=True)
layer_mean, layer_var = tf.nn.moments(x, [1, 2, 3], keep_dims=True)
gamma = tf.get_variable("gamma", [ch], initializer=tf.constant_initializer(1.0))
beta = tf.get_variable("beta", [ch], initializer=tf.constant_initializer(0.0))
mean_weight = tf.nn.softmax(tf.get_variable("mean_weight", [3], initializer=tf.constant_initializer(1.0)))
var_wegiht = tf.nn.softmax(tf.get_variable("var_weight", [3], initializer=tf.constant_initializer(1.0)))
mean = mean_weight[0] * batch_mean + mean_weight[1] * ins_mean + mean_weight[2] * layer_mean
var = var_wegiht[0] * batch_var + var_wegiht[1] * ins_var + var_wegiht[2] * layer_var
x = (x - mean) / (tf.sqrt(var + eps))
x = x * gamma + beta
return x
def _add_coord(x):
batch_size = tf.shape(x)[0]
height, width = x.shape.as_list()[1:3]
# 加1是为了使坐标值为[0,1],不加1则是[0,1)
y_coord = tf.range(0, height, dtype=tf.float32)
y_coord = tf.reshape(y_coord, [1, -1, 1, 1]) # b,h,w,c
y_coord = tf.tile(y_coord, [batch_size, 1, width, 1]) / (height-1)
x_coord = tf.range(0, width, dtype=tf.float32)
x_coord = tf.reshape(x_coord, [1, 1, -1, 1]) # b,h,w,c
x_coord = tf.tile(x_coord, [batch_size, height, 1, 1]) / (width-1)
o = tf.concat([x, y_coord, x_coord], 3)
return o
def coord_layer(net):
return tl.layers.LambdaLayer(net, _add_coord, name='coord_layer')
def switchnorm_layer(net, act, name):
net = tl.layers.LambdaLayer(net, _switch_norm, name=name)
if act is not None:
net = tl.layers.LambdaLayer(net, act, name=name)
return net
def groupnorm_layer(net, is_train, G, use_shuffle, act, name):
net = tl.layers.LambdaLayer(net, _group_norm_and_channel_shuffle, {
'is_train':is_train, 'G':G, 'use_shuffle':use_shuffle, 'name':name}, name=name)
if act is not None:
net = tl.layers.LambdaLayer(net, act, name=name)
return net
def upsampling_layer(net, shortpoint):
hw = shortpoint.outputs.shape.as_list()[1:3]
net_upsamping = tl.layers.UpSampling2dLayer(net, hw, is_scale=False)
net = tl.layers.ConcatLayer([net_upsamping, shortpoint], -1)
return net
def upsampling_layer2(net, shortpoint, name):
with tf.variable_scope(name):
hw = shortpoint.outputs.shape.as_list()[1:3]
dim1 = net.outputs.shape.as_list()[3]
dim2 = shortpoint.outputs.shape.as_list()[3]
net = conv2d(net, dim1//2, 1, 1, None, 'SAME', True, True, False, 'up1')
shortpoint = conv2d(shortpoint, dim2//2, 1, 1, None, 'SAME', True, True, False, 'up2')
net = tl.layers.UpSampling2dLayer(net, hw, is_scale