1. 安裝配置MMagic
1.1 安裝Pytorch
# 安裝Pytorch
!pip3 install install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
1.2 安裝MMCV、MMEngine環(huán)境
!pip3 install openmim
!mim install 'mmcv>=2.0.0'
!mim install 'mmengine'
1.3 安裝MMagic
方式一:文章來源:http://www.zghlxwxcb.cn/news/detail-499166.html
!mim install 'mmagic'
方式二:源碼安裝文章來源地址http://www.zghlxwxcb.cn/news/detail-499166.html
!rm -rf mmagic # 刪除原有的 mmagic 文件夾(如有)
!git clone https://github.com/open-mmlab/mmagic.git # 下載 mmagic 源代碼
import os
os.chdir('mmagic')
!pip3 install -e .
1.4檢查安裝成功
# 檢查 Pytorch
import torch, torchvision
print('Pytorch 版本', torch.__version__)
print('CUDA 是否可用',torch.cuda.is_available())
# 檢查 mmcv
import mmcv
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print('MMCV版本', mmcv.__version__)
print('CUDA版本', get_compiling_cuda_version())
print('編譯器版本', get_compiler_version())
# 檢查 mmagic
import mmagic
print('MMagic版本', mmagic.__version__)
1.5 安裝其他工具包
!pip install opencv-python pillow matplotlib seaborn tqdm -i https://pypi.tuna.tsinghua.edu.cn/simple
!pip install clip transformers gradio 'httpx[socks]' diffusers==0.14.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
!mim install 'mmdet>=3.0.0'
2.應(yīng)用:黑白照片上色
2.1 進(jìn)入 MMagic 主目錄
import os
os.chdir('mmagic')
2.2下載樣例圖片
!wget https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230613-MMagic/data/test_colorization.jpg -O test_colorization.jpg
2.3 運(yùn)行預(yù)測(cè)
!python demo/mmagic_inference_demo.py \
--model-name inst_colorization \
--img test_colorization.jpg \
--result-out-dir out_colorization.png
3.應(yīng)用:文生圖 Stable Diffusion
3.1 導(dǎo)入工具包
from mmagic.apis import MMagicInferencer
3.2 載入模型
sd_inferencer = MMagicInferencer(model_name='stable_diffusion')
3.3 指定Prompt文本
text_prompts = 'A panda is having dinner at KFC'
text_prompts = 'A Persian cat walking in the streets of New York'
3.4 預(yù)測(cè)
sd_inferencer.infer(text=text_prompts, result_out_dir='output/sd_res.png')
4.應(yīng)用:文生圖 Dreambooth
4.1 進(jìn)入 MMagic 主目錄
import os
os.chdir('mmagic')
4.2 在數(shù)據(jù)集上訓(xùn)練Dreambooth
!bash tools/dist_train.sh configs/dreambooth/dreambooth-lora.py 1
4.3 用訓(xùn)練好的模型做預(yù)測(cè)
from mmengine import Config
from mmagic.registry import MODELS
from mmagic.utils import register_all_modules
register_all_modules()
cfg = Config.fromfile('./mmagic/configs/dreambooth/dreambooth-lora.py')
dreambooth_lora = MODELS.build(cfg.model)
state = torch.load('mmagic/work_dirs/dreambooth-lora/iter_1000.pth')['state_dict']
def convert_state_dict(state):
state_dict_new = {}
for k, v in state.items():
if '.module' in k:
k_new = k.replace('.module', '')
else:
k_new = k
if 'vae' in k:
if 'to_q' in k:
k_new = k.replace('to_q', 'query')
elif 'to_k' in k:
k_new = k.replace('to_k', 'key')
elif 'to_v' in k:
k_new = k.replace('to_v', 'value')
elif 'to_out' in k:
k_new = k.replace('to_out.0', 'proj_attn')
state_dict_new[k_new] = v
return state_dict_new
dreambooth_lora.load_state_dict(convert_state_dict(state))
dreambooth_lora = dreambooth_lora.cuda()
samples = dreambooth_lora.infer('side view of sks dog', guidance_scale=5)
samples['samples'][0]
samples = dreambooth_lora.infer('ear close-up of sks dog', guidance_scale=5)
samples['samples'][0]
5.應(yīng)用:圖生圖-ControlNet-Canny
5.1 進(jìn)入 MMagic 主目錄
import os
os.chdir('mmagic')
5.2 導(dǎo)入工具包
import cv2
import numpy as np
import mmcv
from mmengine import Config
from PIL import Image
from mmagic.registry import MODELS
from mmagic.utils import register_all_modules
register_all_modules()
5.3 載入ControlNet模型
cfg = Config.fromfile('configs/controlnet/controlnet-canny.py')
controlnet = MODELS.build(cfg.model).cuda()
5.4 輸入Canny邊緣圖
control_url = 'https://user-images.githubusercontent.com/28132635/230288866-99603172-04cb-47b3-8adb-d1aa532d1d2c.jpg'
control_img = mmcv.imread(control_url)
control = cv2.Canny(control_img, 100, 200)
control = control[:, :, None]
control = np.concatenate([control] * 3, axis=2)
control = Image.fromarray(control)
5.5 Prompt
prompt = 'Room with blue walls and a yellow ceiling.'
5.6 執(zhí)行預(yù)測(cè)
output_dict = controlnet.infer(prompt, control=control)
samples = output_dict['samples']
for idx, sample in enumerate(samples):
sample.save(f'sample_{idx}.png')
controls = output_dict['controls']
for idx, control in enumerate(controls):
control.save(f'control_{idx}.png')
6.應(yīng)用:圖生圖-ControlNet-Pose
6.1 進(jìn)入 MMagic 主目錄
import os
os.chdir('mmagic')
6.2 導(dǎo)入工具包
import mmcv
from mmengine import Config
from PIL import Image
from mmagic.registry import MODELS
from mmagic.utils import register_all_modules
register_all_modules()
6.3 載入ControlNet模型
cfg = Config.fromfile('configs/controlnet/controlnet-pose.py')
# convert ControlNet's weight from SD-v1.5 to Counterfeit-v2.5
cfg.model.unet.from_pretrained = 'gsdf/Counterfeit-V2.5'
cfg.model.vae.from_pretrained = 'gsdf/Counterfeit-V2.5'
cfg.model.init_cfg['type'] = 'convert_from_unet'
controlnet = MODELS.build(cfg.model).cuda()
# call init_weights manually to convert weight
controlnet.init_weights()
6.4 Prompt
prompt = 'masterpiece, best quality, sky, black hair, skirt, sailor collar, looking at viewer, short hair, building, bangs, neckerchief, long sleeves, cloudy sky, power lines, shirt, cityscape, pleated skirt, scenery, blunt bangs, city, night, black sailor collar, closed mouth'
6.5 輸入Pose圖
control_url = 'https://user-images.githubusercontent.com/28132635/230380893-2eae68af-d610-4f7f-aa68-c2f22c2abf7e.png'
control_img = mmcv.imread(control_url)
control = Image.fromarray(control_img)
control.save('control.png')
6.6 執(zhí)行預(yù)測(cè)
output_dict = controlnet.infer(prompt, control=control, width=512, height=512, guidance_scale=7.5)
samples = output_dict['samples']
for idx, sample in enumerate(samples):
sample.save(f'sample_{idx}.png')
controls = output_dict['controls']
for idx, control in enumerate(controls):
control.save(f'control_{idx}.png')
7. 圖生圖-ControlNet Animation
7.1方式一:Gradio命令行
!python demo/gradio_controlnet_animation.py
7.2 方式二:MMagic API
# 導(dǎo)入工具包
from mmagic.apis import MMagicInferencer
# Create a MMEdit instance and infer
editor = MMagicInferencer(model_name='controlnet_animation')
# 指定 prompt 咒語
prompt = 'a girl, black hair, T-shirt, smoking, best quality, extremely detailed'
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
# 待測(cè)視頻
# https://user-images.githubusercontent.com/12782558/227418400-80ad9123-7f8e-4c1a-8e19-0892ebad2a4f.mp4
video = '../run_forrest_frames_rename_resized.mp4'
save_path = '../output_video.mp4'
# 執(zhí)行預(yù)測(cè)
editor.infer(video=video, prompt=prompt, image_width=512, image_height=512, negative_prompt=negative_prompt, save_path=save_path)
到了這里,關(guān)于【OpenMMLab AI實(shí)戰(zhàn)營(yíng)二期筆記】第十一天 玩轉(zhuǎn)AIGC神器MMagic代碼教程的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!