导入包和部署TPU
导入包
import os, cv2, re,logging, warnings, functools, PIL, shutil
from kaggle_datasets import KaggleDatasets
import random
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.losses import BinaryCrossentropy
import numpy as np
import pandas as pd
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import matplotlib
部署TPU
挺奇怪的,在广州用TPU一直要排队,搁家里都不用排,直接就用
try:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print(f'Running on TPU {tpu.master()}')
except ValueError:
tpu = None
if tpu:
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.TPUStrategy(tpu)
else:
strategy = tf.distribute.get_strategy()
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')
导入数据和构建Cycle gan
导入数据
AUTO= tf.data.experimental.AUTOTUNE
MONET_FILENAMES = tf.io.gfile.glob('/kaggle/input/gan-getting-started/monet_tfrec/*.tfrec')
PHOTO_FILENAMES = tf.io.gfile.glob('/kaggle/input/gan-getting-started/photo_tfrec/*.tfrec')
def count_data_items(filenames):
n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
for filename in filenames[:20]:
print("Sample filenames:", filename)
return np.sum(n)
n_monet_samples = count_data_items(MONET_FILENAMES)
n_photo_samples = count_data_items(PHOTO_FILENAMES)
定义各种函数和类
用于解码和进行归一化
def decode_and_rescale(image):
image = tf.image.decode_jpeg(image, channels=3)
image = (tf.cast(image, tf.float32) / 127.5) - 1
image = tf.reshape(image, [256, 256, 3])
return image
转换格式
def read_tfrecord(example):
tfrecord_format = {
'image_name': tf.io.FixedLenFeature([], tf.string),
'image': tf.io.FixedLenFeature([], tf.string),
'target': tf.io.FixedLenFeature([], tf.string)
}
example = tf.io.parse_single_example(example, tfrecord_format)
image = decode_and_rescale(example['image'])
return image
载入数据集
def load_dataset(filenames, labeled=True, ordered=False):
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(read_tfrecord, num_parallel_calls=AUTO)
return dataset
定义单元归一化类
#自定义
class InstanceNormalization(tf.keras.layers.Layer):
def __init__(self, epsilon=1e-5):
super(InstanceNormalization, self).__init__()
self.epsilon = epsilon
def build(self, input_shape):
self.scale = self.add_weight(
name='scale',
shape=input_shape[-1:],
initializer=tf.random_normal_initializer(1., 0.02),
trainable=True)
self.offset = self.add_weight(
name='offset',
shape=input_shape[-1:],
initializer='zeros',
trainable=True)
def call(self, x):
mean, variance = tf.nn.moments(x, axes=[1, 2], keepdims=True)
inv = tf.math.rsqrt(variance + self.epsilon)
normalized = (x - mean) * inv
return self.scale * normalized + self.offset
这个因为现在都升级到keras3了,有些代码已经不兼容了,我参考的notebook都4年前的东西了,本来这个是有简写为tfa的模块可以完成这个工作的,但是我下载之后用的时候一直报错,只能放弃
可微数据增强
#可微数据增强
with strategy.scope():
def DiffAugment(x, policy='', channels_first=False):
if policy:
if channels_first:
x = tf.transpose(x, [0, 2, 3, 1])
for p in policy.split(','):
for f in AUGMENT_FNS[p]:
x = f(x)
if channels_first:
x = tf.transpose(x, [0, 3, 1, 2])
return x
def rand_brightness(x):
magnitude = tf.random.uniform([tf.shape(x)[0], 1, 1, 1]) - 0.5
x = x + magnitude
return x
def rand_saturation(x):
magnitude = tf.random.uniform([tf.shape(x)[0], 1, 1, 1]) * 2
x_mean = tf.reduce_sum(x, axis=3, keepdims=True) * 0.3333333333333333333
x = (x - x_mean) * magnitude + x_mean
return x
def rand_contrast(x):
magnitude = tf.random.uniform([tf.shape(x)[0], 1, 1, 1]) + 0.5
x_mean = tf.reduce_sum(x, axis=[1, 2, 3], keepdims=True) * 5.086e-6
x = (x - x_mean) * magnitude + x_mean
return x
def rand_translation(x, ratio=0.125):
batch_size = tf.shape(x)[0]
image_size = tf.shape(x)[1:3]
shift = tf.cast(tf.cast(image_size, tf.float32) * ratio + 0.5, tf.int32)
translation_x = tf.random.uniform([batch_size, 1], -shift[0], shift[0] + 1, dtype=tf.int32)
translation_y = tf.random.uniform([batch_size, 1], -shift[1], shift[1] + 1, dtype=tf.int32)
grid_x = tf.clip_by_value(tf.expand_dims(tf.range(image_size[0], dtype=tf.int32), 0) + translation_x + 1, 0, image_size[0] + 1)
grid_y = tf.clip_by_value(tf.expand_dims(tf.range(image_size[1], dtype=tf.int32), 0) + translation_y + 1, 0, image_size[1] + 1)
x = tf.gather_nd(tf.pad(x, [[0, 0], [1, 1], [0, 0], [0, 0]]), tf.expand_dims(grid_x, -1), batch_dims=1)
x = tf.transpose(tf.gather_nd(tf.pad(tf.transpose(x, [0, 2, 1, 3]), [[0, 0], [1, 1], [0, 0], [0, 0]]), tf.expand_dims(grid_y, -1), batch_dims=1), [0, 2, 1, 3])
return x
def rand_cutout(x, ratio=0.5):
batch_size = tf.shape(x)[0]
image_size = tf.shape(x)[1:3]
cutout_size = tf.cast(tf.cast(image_size, tf.float32) * ratio + 0.5, tf.int32)
offset_x = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[0] + (1 - cutout_size[0] % 2), dtype=tf.int32)
offset_y = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[1] + (1 - cutout_size[1] % 2), dtype=tf.int32)
grid_batch, grid_x, grid_y = tf.meshgrid(tf.range(batch_size, dtype=tf.int32), tf.range(cutout_size[0], dtype=tf.int32), tf.range(cutout_size[1], dtype=tf.int32), indexing='ij')
cutout_grid = tf.stack([grid_batch, grid_x + offset_x - cutout_size[0] // 2, grid_y + offset_y - cutout_size[1] // 2], axis=-1)
mask_shape = tf.stack([batch_size, image_size[0], image_size[1]])
cutout_grid = tf.maximum(cutout_grid, 0)
cutout_grid = tf.minimum(cutout_grid, tf.reshape(mask_shape - 1, [1, 1, 1, 3]))
mask = tf.maximum(1 - tf.scatter_nd(cutout_grid, tf.ones([batch_size, cutout_size[0], cutout_size[1]], dtype=tf.float32), mask_shape), 0)
x = x * tf.expand_dims(mask, axis=3)
return x
def data_augment_flip(image):
image = tf.image.random_flip_left_right(image)
return image
AUGMENT_FNS = {
'color': [rand_brightness, rand_saturation, rand_contrast],
'translation': [rand_translation],
'cutout': [rand_cutout],
}
def aug_fn(image):
return DiffAugment(image,"color,translation,cutout")
def data_augment_color(image):
image = tf.image.random_flip_left_right(image)
image = DiffAugment(image,"color")
return image
def data_augment_flip(image):
image = tf.image.random_flip_left_right(image)
return image
这个可以自动进行数据增强策略的优化,用一般的数据增强我评分只到50分,我看排名靠前的基本都用了
获得完整数据集用于训练
def get_gan_dataset(monet_files, photo_files, augment=None, repeat=True, shuffle=True, batch_size=1):
monet_ds = load_dataset(monet_files)
photo_ds = load_dataset(photo_files)
if repeat:
monet_ds = monet_ds.repeat()
photo_ds = photo_ds.repeat()
if shuffle:
monet_ds = monet_ds.shuffle(2048)
photo_ds = photo_ds.shuffle(2048)
monet_ds = monet_ds.batch(batch_size, drop_remainder=True)
photo_ds = photo_ds.batch(batch_size, drop_remainder=True)
if augment:
monet_ds = monet_ds.map(augment, num_parallel_calls=AUTO)
photo_ds = photo_ds.map(augment, num_parallel_calls=AUTO)
monet_ds = monet_ds.prefetch(AUTO)
photo_ds = photo_ds.prefetch(AUTO)
gan_ds = tf.data.Dataset.zip((monet_ds, photo_ds))
return gan_ds
下采样和上采样函数
def downsample(filters, size, apply_instancenorm=True):
initializer = tf.random_normal_initializer(0., 0.02)
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
result = keras.Sequential()
result.add(layers.Conv2D(filters, size, strides=2, padding='same',
kernel_initializer=initializer, use_bias=False))
if apply_instancenorm:
result.add(InstanceNormalization())
result.add(layers.LeakyReLU())
return result
def upsample(filters, size, apply_dropout=False):
initializer = tf.random_normal_initializer(0., 0.02)
result = keras.Sequential()
result.add(layers.Conv2DTranspose(filters, size, strides=2,
padding='same',
kernel_initializer=initializer,
use_bias=False))
result.add(InstanceNormalization())
if apply_dropout:
result.add(layers.Dropout(0.5))
result.add(layers.ReLU())
return result
生成器和判别器
def Generator():
inputs = layers.Input(shape=[256,256,3])
down_stack = [
downsample(64, 4, apply_instancenorm=False), # (bs, 128, 128, 64)
downsample(128, 4), # (bs, 64, 64, 128)
downsample(256, 4), # (bs, 32, 32, 256)
downsample(512, 4), # (bs, 16, 16, 512)
downsample(512, 4), # (bs, 8, 8, 512)
downsample(512, 4), # (bs, 4, 4, 512)
downsample(512, 4), # (bs, 2, 2, 512)
downsample(512, 4), ]# (bs, 1, 1, 512)]
up_stack = [
upsample(512, 4, apply_dropout=True), # (bs, 2, 2, 1024)
upsample(512, 4, apply_dropout=True), # (bs, 4, 4, 1024)
upsample(512, 4, apply_dropout=True), # (bs, 8, 8, 1024)
upsample(512, 4), # (bs, 16, 16, 1024)
upsample(256, 4), # (bs, 32, 32, 512)
upsample(128, 4), # (bs, 64, 64, 256)
upsample(64, 4), # (bs, 128, 128, 128)
]
initializer = tf.random_normal_initializer(0., 0.02)
last = layers.Conv2DTranspose(OUTPUT_CHANNELS, 4,
strides=2,
padding='same',
kernel_initializer=initializer,
activation='tanh') # (bs, 256, 256, 3)
x = inputs
skips = []
for down in down_stack:
x = down(x)
skips.append(x)
skips = reversed(skips[:-1])
for up, skip in zip(up_stack, skips):
x = up(x)
x = layers.Concatenate()([x, skip])
x = last(x)
return keras.Model(inputs=inputs, outputs=x)
def Discriminator():
initializer = tf.random_normal_initializer(0., 0.02)
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02)
inp = layers.Input(shape=[256, 256, 3], name='input_image')
x = inp
down1 = downsample(64, 4, False)(x) # (bs, 128, 128, 64)
down2 = downsample(128, 4)(down1) # (bs, 64, 64, 128)
down3 = downsample(256, 4)(down2) # (bs, 32, 32, 256)
zero_pad1 = layers.ZeroPadding2D()(down3) # (bs, 34, 34, 256)
conv = layers.Conv2D(512, 4, strides=1,
kernel_initializer=initializer,
use_bias=False)(zero_pad1) # (bs, 31, 31, 512)
norm1 = InstanceNormalization()(conv)
leaky_relu = layers.LeakyReLU()(norm1)
zero_pad2 = layers.ZeroPadding2D()(leaky_relu) # (bs, 33, 33, 512)
last = layers.Conv2D(1, 4, strides=1,
kernel_initializer=initializer)(zero_pad2) # (bs, 30, 30, 1)
return tf.keras.Model(inputs=inp, outputs=last)
导入数据和设置参数
导入数据
monet_ds = load_dataset(MONET_FILENAMES, labeled=True).batch(1)
photo_ds = load_dataset(PHOTO_FILENAMES, labeled=True).batch(1)
定义生成器和判别器
BATCH_SIZE = 128
EPOCHS_NUM = 28
full_dataset = get_gan_dataset(MONET_FILENAMES, PHOTO_FILENAMES, augment=data_augment_flip, repeat=True, shuffle=False, batch_size=BATCH_SIZE)
OUTPUT_CHANNELS = 3
with strategy.scope():
monet_generator = Generator() # transforms photos to Monet-esque paintings
photo_generator = Generator() # transforms Monet paintings to be more like photos
monet_discriminator = Discriminator() # differentiates real Monet paintings and generated Monet paintings
photo_discriminator = Discriminator()
Cycle gan需要两个判别器和两个生成器,同时有四种损失,包括判别器损失,生成器损失,循环一致性损失,身份损失,域与域之间的联系靠的循环一致性损失,这里只用了随机翻转,后面训练才用可微数据增强
定义cycle gan
class CycleGan(keras.Model):
def __init__(
self,
monet_generator,
photo_generator,
monet_discriminator,
photo_discriminator,
lambda_cycle=10,
):
super(CycleGan, self).__init__()
self.m_gen = monet_generator
self.p_gen = photo_generator
self.m_disc = monet_discriminator
self.p_disc = photo_discriminator
self.lambda_cycle = lambda_cycle
def compile(
self,
m_gen_optimizer,
p_gen_optimizer,
m_disc_optimizer,
p_disc_optimizer,
gen_loss_fn,
disc_loss_fn,
cycle_loss_fn,
identity_loss_fn
):
super(CycleGan, self).compile()
self.m_gen_optimizer = m_gen_optimizer
self.p_gen_optimizer = p_gen_optimizer
self.m_disc_optimizer = m_disc_optimizer
self.p_disc_optimizer = p_disc_optimizer
self.gen_loss_fn = gen_loss_fn
self.disc_loss_fn = disc_loss_fn
self.cycle_loss_fn = cycle_loss_fn
self.identity_loss_fn = identity_loss_fn
def train_step(self, batch_data):
real_monet, real_photo = batch_data
batch_size = tf.shape(real_monet)[0]
with tf.GradientTape(persistent=True) as tape:
fake_monet = self.m_gen(real_photo, training=True)
cycled_photo = self.p_gen(fake_monet, training=True)
fake_photo = self.p_gen(real_monet, training=True)
cycled_monet = self.m_gen(fake_photo, training=True)
same_monet = self.m_gen(real_monet, training=True)
same_photo = self.p_gen(real_photo, training=True)
both_monet = tf.concat([real_monet, fake_monet], axis=0)
aug_monet = aug_fn(both_monet)
aug_real_monet = aug_monet[:batch_size]
aug_fake_monet = aug_monet[batch_size:]
disc_real_monet = self.m_disc(aug_real_monet, training=True) # aug_real_monet
disc_real_photo = self.p_disc(real_photo, training=True)
disc_fake_monet = self.m_disc(aug_fake_monet, training=True) # aug_fake_monet
disc_fake_photo = self.p_disc(fake_photo, training=True)
monet_gen_loss = self.gen_loss_fn(disc_fake_monet)
photo_gen_loss = self.gen_loss_fn(disc_fake_photo)
total_cycle_loss = self.cycle_loss_fn(real_monet, cycled_monet, self.lambda_cycle) + self.cycle_loss_fn(real_photo, cycled_photo, self.lambda_cycle)
total_monet_gen_loss = monet_gen_loss + total_cycle_loss + self.identity_loss_fn(real_monet, same_monet, self.lambda_cycle)
total_photo_gen_loss = photo_gen_loss + total_cycle_loss + self.identity_loss_fn(real_photo, same_photo, self.lambda_cycle)
monet_disc_loss = self.disc_loss_fn(disc_real_monet, disc_fake_monet)
photo_disc_loss = self.disc_loss_fn(disc_real_photo, disc_fake_photo)
monet_generator_gradients = tape.gradient(total_monet_gen_loss,
self.m_gen.trainable_variables)
photo_generator_gradients = tape.gradient(total_photo_gen_loss,
self.p_gen.trainable_variables)
monet_discriminator_gradients = tape.gradient(monet_disc_loss,
self.m_disc.trainable_variables)
photo_discriminator_gradients = tape.gradient(photo_disc_loss,
self.p_disc.trainable_variables)
self.m_gen_optimizer.apply_gradients(zip(monet_generator_gradients,
self.m_gen.trainable_variables))
self.p_gen_optimizer.apply_gradients(zip(photo_generator_gradients,
self.p_gen.trainable_variables))
self.m_disc_optimizer.apply_gradients(zip(monet_discriminator_gradients,
self.m_disc.trainable_variables))
self.p_disc_optimizer.apply_gradients(zip(photo_discriminator_gradients,
self.p_disc.trainable_variables))
return {
"monet_gen_loss": total_monet_gen_loss,
"photo_gen_loss": total_photo_gen_loss,
"monet_disc_loss": monet_disc_loss,
"photo_disc_loss": photo_disc_loss
}
定义四种损失并开始训练
with strategy.scope():
def discriminator_loss(real, generated):
real_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(tf.ones_like(real), real)
generated_loss = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(tf.zeros_like(generated), generated)
total_disc_loss = real_loss + generated_loss
return total_disc_loss * 0.5
def generator_loss(generated):
return tf.keras.losses.BinaryCrossentropy(from_logits=True,
reduction=tf.keras.losses.Reduction.NONE)(tf.ones_like(generated), generated)
def calc_cycle_loss(real_image, cycled_image, LAMBDA):
loss1 = tf.reduce_mean(tf.abs(real_image - cycled_image))
return LAMBDA * loss1
def identity_loss(real_image, same_image, LAMBDA):
loss = tf.reduce_mean(tf.abs(real_image - same_image))
return LAMBDA * 0.5 * loss
monet_generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
photo_generator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
monet_discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
photo_discriminator_optimizer = tf.keras.optimizers.Adam(1e-4, beta_1=0.5)
cycle_gan_model = CycleGan(
monet_generator, photo_generator, monet_discriminator, photo_discriminator)
cycle_gan_model.compile(
m_gen_optimizer = monet_generator_optimizer,
p_gen_optimizer = photo_generator_optimizer,
m_disc_optimizer = monet_discriminator_optimizer,
p_disc_optimizer = photo_discriminator_optimizer,
gen_loss_fn = generator_loss,
disc_loss_fn = discriminator_loss,
cycle_loss_fn = calc_cycle_loss,
identity_loss_fn = identity_loss
)
cycle_gan_model.fit(full_dataset,epochs=4,steps_per_epoch=(max(n_monet_samples, n_photo_samples)//4),)
提交notebook
! mkdir ../images
i = 1
for photo in photo_ds:
with strategy.scope():
photo_to_monet= monet_generator(photo, training=False)[0].numpy()
photo_to_monet = (photo_to_monet * 127.5 + 127.5).astype(np.uint8)
im = PIL.Image.fromarray(photo_to_monet)
im.save("../images/" + str(i) + ".jpg")
i += 1
shutil.make_archive("/kaggle/working/images", 'zip', "/kaggle/images")
用普通数据增强成绩如下
之后换成了可微数据增强,也没什么提升
大概运行一个小时吧
写的比较好的应该下面这个大佬,但运行他的代码结果也就50
https://www.kaggle.com/code/unfriendlyai/diffaugment-is-all-you-need
其实还有很多继续提高的,但我懒得看论文不想弄了