您的位置:

探究DeepDream:AI绘画技术的奥秘

DeepDream,是Google在2015年推出的一种基于卷积神经网络的视觉模型算法。其主要功能是通过构建出一个训练好的神经网络模型,将输入图像转化为对人脑具有高智能度认识的“梦境”图像。这种技术的诞生,给艺术创作带来了极大的启示,也开创了人工智能的无限探索。本文将从deepdream软件、热气球deepdream、deepdream什么意思、deep dream generator、dream网站ai生成画等多个方面对这项技术进行详细阐述。

一、DeepDream软件

在谈论DeepDream时,不得不提到这款由Google开源的DeepDream软件。它可以让使用者上传自己的图片,然后通过该算法分析图片,将其转化为“梦境”图像的形式。用户只需轻松地选择”magnify layer” 或者 “level of zoom” ,就能够让计算机开始工作,将改变后的图片输出为一种意想不到的艺术创作。


import numpy as np
import PIL.Image
import tensorflow as tf
import urllib.request

model_url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip'
model_fn = 'inception5h.zip'
layers = ['model/pool_3:0']
# 预载入模型,设置层数
download_model = urllib.request.urlretrieve(model_url, model_fn)
zip_ref = zipfile.ZipFile(model_fn, 'r')
zip_ref.extractall()
zip_ref.close()
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)
# 载入整个图像模型进缓存
with tf.gfile.FastGFile('tensorflow_inception_graph.pb', 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
# 将缓存中的图像模型导入计算图中
t_input = tf.placeholder(np.float32, name='input') #定义输入图像
imagenet_mean = 117.0 # 在预处理中,IMAGENET数据集的平均值
t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input':t_preprocessed})

# 拿到图像模型的某一层
def T(layer):
    return graph.get_tensor_by_name("import/"+layer)

# 定义计算图片在某一层激活值的函数
def calc_grad_tiled(img, t_grad, tile_size=512):
    sz = tile_size
    h, w = img.shape[:2]
    sx, sy = np.random.randint(sz, size=2)
    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
    grad = np.zeros_like(img)
    for y in range(0, max(h-sz//2, sz), sz):
        for x in range(0, max(w-sz//2, sz), sz):
            sub = img_shift[y:y+sz,x:x+sz]
            g = sess.run(t_grad, {t_input:sub})
            grad[y:y+sz,x:x+sz] = g
    return np.roll(np.roll(grad, -sx, 1), -sy, 0)
# 定义算法的主体
def render_deepdream(t_obj, img0=img_noise, iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
    t_score = tf.reduce_mean(t_obj)
    t_grad = tf.gradients(t_score, t_input)[0]
    img = img0
    octaves = []
    for i in range(octave_n-1):
        hw = img.shape[:2]
        lo = resize(img, np.int32(np.float32(hw)/octave_scale))
        hi = img-resize(lo, hw)
        img = lo
        octaves.append(hi)
    for octave in range(octave_n):
        if octave > 0:
            hi = octaves[-octave]
            img = resize(img, hi.shape[:2])+hi
        for i in range(iter_n):
            g = calc_grad_tiled(img, t_grad)
            img += g*(step / (np.abs(g).mean()+1e-7))
    return img / 255.0

二、热气球 DeepDream

DeepDream 的诞生,归功于 Google 的这位科学家 Alex Krizhevsky。一天晚上,Alex在梦中对人脑活动的模拟漫游产生了灵感,随后他就开始尝试用神经网络在计算机上相似地模拟人类视觉系统。据Alex自己介绍,他最初的试验媒介是一张鲜红色的热气球图片,这便是DeepDream的第一个图案。对于这张看上去蔚为壮观的图案,Alex自己都很难解释清楚何为Deep Dream。

三、DeepDream的意义

DeepDream算法出现之后,对于科研领域和艺术领域都产生了很大的影响。在计算机视觉和图像识别领域,它能够帮助开发者探究深度神经网络并进一步拓展识别能力。在艺术领域,使用这一技术可以让艺术家或设计师在短时间内实现自己理想的艺术创意,帮助他们发掘潜在的创作潜力。而在人工智能领域,深度学习的技术可以为我们提供更大的编程灵活性,提升机器自学习的能力,开创机器的智能新纪元。

四、Deep Dream Generator

Deep Dream Generator是一个网站,这个网站用人工智能(AI)的方式来生成艺术形成。用户可以选择自己的图片上传,以及对 Deep Style 进行程序化的文艺修改。通过在不同的图片之间选择和调整不同的参数,这个训练好的神经网络模型可以深度探究到图像的每个角落并转化出具有艺术感的梦幻般的新图像。


# A Python Script for generating Deep Dreams:  
import os, numpy as np, PIL.Image, time, argparse, functools
import tensorflow as tf
import matplotlib.pyplot as plt
from io import BytesIO

model_url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip"
model_fn = 'inception5h.zip'
layers = ['mixed3a_1x1_pre_relu', 'mixed5b_3x3_pre_relu', 'mixed4a_3x3_bottleneck_pre_relu', 'mixed4e_3x3_bottleneck_pre_relu', 'mixed4e_5x5_bottleneck_pre_relu', 'mixed4a_5x5_pre_relu', 'mixed4a_3x3_bottleneck_pre_relu', 'mixed4e_pool_reduce_pre_relu', 'mixed3a_3x3_bottleneck_pre_relu', 'mixed4b_1x1_pre_relu', 'mixed4b_5x5_pre_relu', 'mixed5b_5x5_pre_relu', 'mixed5a_3x3_pre_relu', 'mixed4a_1x1_pre_relu', 'mixed3a_5x5_pre_relu', 'mixed4e_1x1_pre_relu']
num_iterations = 10
step_size = 1.5
rescale_factor = 0.7
img_noise = np.random.uniform(size=(224,224,3)) + 100.0
session = None
graph = None
model = None

def get_model():
    global session, graph, model

    # make sure we have a model
    if model is None:
        print(f"Loading model from {model_fn} ...")
        start = time.monotonic()

        # downloader
        if not os.path.exists(model_fn):
            import requests 
            print(f"Downloading: {model_url} ...")
            response = requests.get(model_url)
            open(model_fn, 'wb').write(response.content)

        # unpack
        import zipfile
        with zipfile.ZipFile(model_fn) as zip_ref:
            zip_ref.extractall('.')
        model_dir = os.path.splitext(model_fn)[0]

        with open(os.path.join(model_dir, 'tensorflow_inception_graph.pb'), 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())

        # start session
        session = tf.InteractiveSession()
        graph = tf.Graph()
        with graph.as_default():
            tf.import_graph_def(graph_def)
        session.run(tf.global_variables_initializer())
        model = graph.get_tensor_by_name("import/input:0")
        logits = graph.get_tensor_by_name("import/softmax2:0")
        inception = lambda img : session.run(logits, feed_dict={model: img})[:, np.argmax(model_labels)]
        print(f"Took {time.monotonic()-start:.3f} seconds")
    else:
        print("Using cached model")

    return inception

def resize(img, scale):
    if scale == 1:
        return img
    else:
        size = np.array(img.shape[0:2])
        new_size = (size * float(scale)).astype(int)
        img1 = np.copy(img)
        img1.resize((new_size[0], new_size[1], img.shape[2]))
        return img1

def calc_grad_tiled(img, model, tile_size=512, fmt='png'):
    if isinstance(img, str):
        # load image from file
        try:
            img = PIL.Image.open(img)
        except Exception as e:
            print(e)
            return None

    # rescale image if required
    initial_shape = img.size[::-1]
    scale = float(tile_size) / np.max(initial_shape)
    img = resize(img, scale)

    # important variables
    x = img
    a = img_noise.copy()

    start_x, end_x = 0, 0
    start_y, end_y = 0, 0

    # select random tiles to migrate
    shift = np.array([tile_size/2.0, tile_size/2.0])
    ox = np.random.randint(-tile_size, tile_size)
    oy = np.random.randint(-tile_size, tile_size)
    ox = (ox/scale) + shift[0]
    oy = (oy/scale) + shift[1]

    # begin migration
    x1 = np.roll(np.roll(x, int(ox-shift[0]), axis=1), int(oy-shift[1]), axis=0)
    a1 = np.roll(np.roll(a, int(ox-shift[0]), axis=1), int(oy-shift[1]), axis=0)

    def get_tile():
        # detemine the tiles for this iteration
        nonlocal x, x1, a, a1, start_x, end_x, start_y, end_y
        sz = tile_size
        
        start_x += np.random.randint(sz/2, size=1)[0]
        start_y += np.random.randint(sz/2, size=1)[0]
        end_x = min(start_x+sz, x.shape[1])
        end_y = min(start_y+sz, x.shape[0])
        
        x_slice = x[start_y:end_y, start_x:end_x, :]
        a_slice = a1[start_y:end_y, start_x:end_x, :]

        return x_slice, a_slice

    def update_tile(g):
        nonlocal x, x1, a, a1, start_x, end_x, start_y, end_y
        x_slice = x[start_y:end_y, start_x:end_x, :]
        a_slice = a1[start_y:end_y, start_x:end_x, :]
            
        x_grad = x_slice * (rescale_factor**2) * g
        tile_image = PIL.Image.fromarray(np.uint8(np.clip(x_slice + x_grad, 0, 255)))

        # save tile gradient
        a_slice += x_grad
        a1[start_y:end_y, start_x:end_x, :] = a_slice
        a = np.roll(np.roll(a1, int(-(ox-shift[0])), axis=1), int(-(oy-shift[1])), axis=0)

        tile_buffer = BytesIO()
        tile_image.save(tile_buffer, format=fmt)
        tile_buffer.seek(0)

        img_tile = PIL.Image.open(tile_buffer)
        x[start_y:end_y, start_x:end_x, :] = np.array(img_tile)
        x1 = np.roll(np.roll(x, int(-(ox-shift[0])), axis=1), int(-(oy-shift[1])), axis=0)

    model_labels = np.loadtxt("synset.txt