add clip vison and remove shits
This commit is contained in:
@@ -1,133 +0,0 @@
|
||||
import os
|
||||
import cv2
|
||||
import torch
|
||||
|
||||
from modules import devices
|
||||
from modules.modelloader import load_file_from_url
|
||||
from annotator.annotator_path import models_path
|
||||
from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor
|
||||
|
||||
|
||||
config_clip_g = {
|
||||
"attention_dropout": 0.0,
|
||||
"dropout": 0.0,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_size": 1664,
|
||||
"image_size": 224,
|
||||
"initializer_factor": 1.0,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 8192,
|
||||
"layer_norm_eps": 1e-05,
|
||||
"model_type": "clip_vision_model",
|
||||
"num_attention_heads": 16,
|
||||
"num_channels": 3,
|
||||
"num_hidden_layers": 48,
|
||||
"patch_size": 14,
|
||||
"projection_dim": 1280,
|
||||
"torch_dtype": "float32"
|
||||
}
|
||||
|
||||
config_clip_h = {
|
||||
"attention_dropout": 0.0,
|
||||
"dropout": 0.0,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_size": 1280,
|
||||
"image_size": 224,
|
||||
"initializer_factor": 1.0,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 5120,
|
||||
"layer_norm_eps": 1e-05,
|
||||
"model_type": "clip_vision_model",
|
||||
"num_attention_heads": 16,
|
||||
"num_channels": 3,
|
||||
"num_hidden_layers": 32,
|
||||
"patch_size": 14,
|
||||
"projection_dim": 1024,
|
||||
"torch_dtype": "float32"
|
||||
}
|
||||
|
||||
config_clip_vitl = {
|
||||
"attention_dropout": 0.0,
|
||||
"dropout": 0.0,
|
||||
"hidden_act": "quick_gelu",
|
||||
"hidden_size": 1024,
|
||||
"image_size": 224,
|
||||
"initializer_factor": 1.0,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 4096,
|
||||
"layer_norm_eps": 1e-05,
|
||||
"model_type": "clip_vision_model",
|
||||
"num_attention_heads": 16,
|
||||
"num_channels": 3,
|
||||
"num_hidden_layers": 24,
|
||||
"patch_size": 14,
|
||||
"projection_dim": 768,
|
||||
"torch_dtype": "float32"
|
||||
}
|
||||
|
||||
configs = {
|
||||
'clip_g': config_clip_g,
|
||||
'clip_h': config_clip_h,
|
||||
'clip_vitl': config_clip_vitl,
|
||||
}
|
||||
|
||||
downloads = {
|
||||
'clip_vitl': 'https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin',
|
||||
'clip_g': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/clip_g.pth',
|
||||
'clip_h': 'https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/pytorch_model.bin'
|
||||
}
|
||||
|
||||
|
||||
clip_vision_h_uc = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'clip_vision_h_uc.data')
|
||||
clip_vision_h_uc = torch.load(clip_vision_h_uc, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))['uc']
|
||||
|
||||
clip_vision_vith_uc = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'clip_vision_vith_uc.data')
|
||||
clip_vision_vith_uc = torch.load(clip_vision_vith_uc, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))['uc']
|
||||
|
||||
|
||||
class ClipVisionDetector:
|
||||
def __init__(self, config, low_vram: bool):
|
||||
assert config in downloads
|
||||
self.download_link = downloads[config]
|
||||
self.model_path = os.path.join(models_path, 'clip_vision')
|
||||
self.file_name = config + '.pth'
|
||||
self.config = configs[config]
|
||||
self.device = (
|
||||
torch.device("cpu") if low_vram else
|
||||
devices.get_device_for("controlnet")
|
||||
)
|
||||
os.makedirs(self.model_path, exist_ok=True)
|
||||
file_path = os.path.join(self.model_path, self.file_name)
|
||||
if not os.path.exists(file_path):
|
||||
load_file_from_url(url=self.download_link, model_dir=self.model_path, file_name=self.file_name)
|
||||
config = CLIPVisionConfig(**self.config)
|
||||
|
||||
self.model = CLIPVisionModelWithProjection(config)
|
||||
self.processor = CLIPImageProcessor(crop_size=224,
|
||||
do_center_crop=True,
|
||||
do_convert_rgb=True,
|
||||
do_normalize=True,
|
||||
do_resize=True,
|
||||
image_mean=[0.48145466, 0.4578275, 0.40821073],
|
||||
image_std=[0.26862954, 0.26130258, 0.27577711],
|
||||
resample=3,
|
||||
size=224)
|
||||
sd = torch.load(file_path, map_location=self.device)
|
||||
self.model.load_state_dict(sd, strict=False)
|
||||
del sd
|
||||
self.model.to(self.device)
|
||||
self.model.eval()
|
||||
|
||||
def unload_model(self):
|
||||
if self.model is not None:
|
||||
self.model.to('meta')
|
||||
|
||||
def __call__(self, input_image):
|
||||
with torch.no_grad():
|
||||
input_image = cv2.resize(input_image, (224, 224), interpolation=cv2.INTER_AREA)
|
||||
feat = self.processor(images=input_image, return_tensors="pt")
|
||||
feat['pixel_values'] = feat['pixel_values'].to(self.device)
|
||||
result = self.model(**feat, output_hidden_states=True)
|
||||
result['hidden_states'] = [v.to(self.device) for v in result['hidden_states']]
|
||||
result = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v for k, v in result.items()}
|
||||
return result
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
+96
-96
@@ -393,86 +393,86 @@ legacy_preprocessors = {
|
||||
"Instant_ID"
|
||||
]
|
||||
},
|
||||
"ip-adapter_clip_sd15": {
|
||||
"label": "ip-adapter_clip_sd15",
|
||||
"call_function": functools.partial(clip, config='clip_h'),
|
||||
"unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
"managed_model": "unknown",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 100,
|
||||
"tags": [
|
||||
"IP-Adapter"
|
||||
]
|
||||
},
|
||||
"ip-adapter_clip_sdxl": {
|
||||
"label": "ip-adapter_clip_sdxl",
|
||||
"call_function": functools.partial(clip, config='clip_g'),
|
||||
"unload_function": functools.partial(unload_clip, config='clip_g'),
|
||||
"managed_model": "unknown",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 0,
|
||||
"tags": [
|
||||
"IP-Adapter"
|
||||
]
|
||||
},
|
||||
"ip-adapter_clip_sdxl_plus_vith": {
|
||||
"label": "ip-adapter_clip_sdxl_plus_vith",
|
||||
"call_function": functools.partial(clip, config='clip_h'),
|
||||
"unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
"managed_model": "unknown",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 0,
|
||||
"tags": [
|
||||
"IP-Adapter"
|
||||
]
|
||||
},
|
||||
"ip-adapter_face_id": {
|
||||
"label": "ip-adapter_face_id",
|
||||
"call_function": g_insight_face_model.run_model,
|
||||
"unload_function": None,
|
||||
"managed_model": "g_insight_face_model",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 0,
|
||||
"tags": [
|
||||
"IP-Adapter"
|
||||
]
|
||||
},
|
||||
"ip-adapter_face_id_plus": {
|
||||
"label": "ip-adapter_face_id_plus",
|
||||
"call_function": face_id_plus,
|
||||
"unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
"managed_model": "unknown",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 0,
|
||||
"tags": [
|
||||
"IP-Adapter"
|
||||
]
|
||||
},
|
||||
# "ip-adapter_clip_sd15": {
|
||||
# "label": "ip-adapter_clip_sd15",
|
||||
# "call_function": functools.partial(clip, config='clip_h'),
|
||||
# "unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
# "managed_model": "unknown",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 100,
|
||||
# "tags": [
|
||||
# "IP-Adapter"
|
||||
# ]
|
||||
# },
|
||||
# "ip-adapter_clip_sdxl": {
|
||||
# "label": "ip-adapter_clip_sdxl",
|
||||
# "call_function": functools.partial(clip, config='clip_g'),
|
||||
# "unload_function": functools.partial(unload_clip, config='clip_g'),
|
||||
# "managed_model": "unknown",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 0,
|
||||
# "tags": [
|
||||
# "IP-Adapter"
|
||||
# ]
|
||||
# },
|
||||
# "ip-adapter_clip_sdxl_plus_vith": {
|
||||
# "label": "ip-adapter_clip_sdxl_plus_vith",
|
||||
# "call_function": functools.partial(clip, config='clip_h'),
|
||||
# "unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
# "managed_model": "unknown",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 0,
|
||||
# "tags": [
|
||||
# "IP-Adapter"
|
||||
# ]
|
||||
# },
|
||||
# "ip-adapter_face_id": {
|
||||
# "label": "ip-adapter_face_id",
|
||||
# "call_function": g_insight_face_model.run_model,
|
||||
# "unload_function": None,
|
||||
# "managed_model": "g_insight_face_model",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 0,
|
||||
# "tags": [
|
||||
# "IP-Adapter"
|
||||
# ]
|
||||
# },
|
||||
# "ip-adapter_face_id_plus": {
|
||||
# "label": "ip-adapter_face_id_plus",
|
||||
# "call_function": face_id_plus,
|
||||
# "unload_function": functools.partial(unload_clip, config='clip_h'),
|
||||
# "managed_model": "unknown",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 0,
|
||||
# "tags": [
|
||||
# "IP-Adapter"
|
||||
# ]
|
||||
# },
|
||||
"lineart_anime": {
|
||||
"label": "lineart_anime",
|
||||
"call_function": lineart_anime,
|
||||
@@ -1193,22 +1193,22 @@ legacy_preprocessors = {
|
||||
"T2I-Adapter"
|
||||
]
|
||||
},
|
||||
"t2ia_style_clipvision": {
|
||||
"label": "t2ia_style_clipvision",
|
||||
"call_function": functools.partial(clip, config='clip_vitl'),
|
||||
"unload_function": functools.partial(unload_clip, config='clip_vitl'),
|
||||
"managed_model": "unknown",
|
||||
"model_free": False,
|
||||
"no_control_mode": True,
|
||||
"resolution": None,
|
||||
"slider_1": None,
|
||||
"slider_2": None,
|
||||
"slider_3": None,
|
||||
"priority": 0,
|
||||
"tags": [
|
||||
"T2I-Adapter"
|
||||
]
|
||||
},
|
||||
# "t2ia_style_clipvision": {
|
||||
# "label": "t2ia_style_clipvision",
|
||||
# "call_function": functools.partial(clip, config='clip_vitl'),
|
||||
# "unload_function": functools.partial(unload_clip, config='clip_vitl'),
|
||||
# "managed_model": "unknown",
|
||||
# "model_free": False,
|
||||
# "no_control_mode": True,
|
||||
# "resolution": None,
|
||||
# "slider_1": None,
|
||||
# "slider_2": None,
|
||||
# "slider_3": None,
|
||||
# "priority": 0,
|
||||
# "tags": [
|
||||
# "T2I-Adapter"
|
||||
# ]
|
||||
# },
|
||||
"threshold": {
|
||||
"label": "threshold",
|
||||
"call_function": threshold,
|
||||
|
||||
+43
-3
@@ -1,11 +1,51 @@
|
||||
from modules_forge.supported_preprocessor import Preprocessor, PreprocessorParameter
|
||||
from modules_forge.supported_preprocessor import Preprocessor
|
||||
from modules_forge.shared import preprocessor_dir, add_supported_preprocessor
|
||||
from modules.modelloader import load_file_from_url
|
||||
from modules_forge.forge_util import numpy_to_pytorch
|
||||
|
||||
import ldm_patched.modules.clip_vision
|
||||
|
||||
|
||||
class PreprocessorClipVision(Preprocessor):
|
||||
def __init__(self):
|
||||
def __init__(self, name, url, filename):
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.filename = filename
|
||||
self.tags = ['IP-Adapter']
|
||||
self.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab = False
|
||||
self.show_control_mode = False
|
||||
self.sorting_priority = 1
|
||||
self.clipvision = None
|
||||
|
||||
def __call__(self, input_image, resolution, slider_1=None, slider_2=None, slider_3=None, **kwargs):
|
||||
if self.clipvision is None:
|
||||
ckpt_path = load_file_from_url(
|
||||
url=self.url,
|
||||
model_dir=preprocessor_dir,
|
||||
file_name=self.filename
|
||||
)
|
||||
self.clipvision = ldm_patched.modules.clip_vision.load(ckpt_path)
|
||||
|
||||
input_image = numpy_to_pytorch(input_image).to(self.clipvision.patcher.current_device)
|
||||
|
||||
return self.clipvision.encode_image(input_image)
|
||||
|
||||
|
||||
add_supported_preprocessor(PreprocessorClipVision())
|
||||
add_supported_preprocessor(PreprocessorClipVision(
|
||||
name='CLIP-ViT-H',
|
||||
url='https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors',
|
||||
filename='CLIP-ViT-H-14.safetensors'
|
||||
))
|
||||
|
||||
add_supported_preprocessor(PreprocessorClipVision(
|
||||
name='CLIP-ViT-bigG',
|
||||
url='https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors',
|
||||
filename='CLIP-ViT-bigG.safetensors'
|
||||
))
|
||||
|
||||
add_supported_preprocessor(PreprocessorClipVision(
|
||||
name='CLIP-ViT-L',
|
||||
url='https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin',
|
||||
filename='CLIP-ViT-bigG.safetensors'
|
||||
))
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import torch
|
||||
import os
|
||||
import time
|
||||
import safetensors
|
||||
import ldm_patched.modules.samplers
|
||||
|
||||
from ldm_patched.modules.controlnet import ControlBase
|
||||
@@ -193,6 +195,40 @@ def patched_load_models_gpu(*args, **kwargs):
|
||||
return y
|
||||
|
||||
|
||||
def build_loaded(module, loader_name):
|
||||
original_loader_name = loader_name + '_origin'
|
||||
|
||||
if not hasattr(module, original_loader_name):
|
||||
setattr(module, original_loader_name, getattr(module, loader_name))
|
||||
|
||||
original_loader = getattr(module, original_loader_name)
|
||||
|
||||
def loader(*args, **kwargs):
|
||||
result = None
|
||||
try:
|
||||
result = original_loader(*args, **kwargs)
|
||||
except Exception as e:
|
||||
result = None
|
||||
exp = str(e) + '\n'
|
||||
for path in list(args) + list(kwargs.values()):
|
||||
if isinstance(path, str):
|
||||
if os.path.exists(path):
|
||||
exp += f'File corrupted: {path} \n'
|
||||
corrupted_backup_file = path + '.corrupted'
|
||||
if os.path.exists(corrupted_backup_file):
|
||||
os.remove(corrupted_backup_file)
|
||||
os.replace(path, corrupted_backup_file)
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
exp += f'Forge has tried to move the corrupted file to {corrupted_backup_file} \n'
|
||||
exp += f'You may try again now and Fooocus will download models again. \n'
|
||||
raise ValueError(exp)
|
||||
return result
|
||||
|
||||
setattr(module, loader_name, loader)
|
||||
return
|
||||
|
||||
|
||||
def patch_all_basics():
|
||||
if not hasattr(model_management, 'load_models_gpu_origin'):
|
||||
model_management.load_models_gpu_origin = model_management.load_models_gpu
|
||||
@@ -201,4 +237,7 @@ def patch_all_basics():
|
||||
|
||||
ControlBase.control_merge = patched_control_merge
|
||||
ldm_patched.modules.samplers.calc_cond_uncond_batch = patched_calc_cond_uncond_batch
|
||||
|
||||
build_loaded(safetensors.torch, 'load_file')
|
||||
build_loaded(torch, 'load')
|
||||
return
|
||||
|
||||
@@ -26,7 +26,7 @@ class Preprocessor:
|
||||
self.model_patcher: ModelPatcher = None
|
||||
self.show_control_mode = True
|
||||
self.do_not_need_model = False
|
||||
self.sorting_priority = 0.0 # higher goes to top in the list
|
||||
self.sorting_priority = 0 # higher goes to top in the list
|
||||
self.corp_image_with_a1111_mask_when_in_img2img_inpaint_tab = True
|
||||
|
||||
def setup_model_patcher(self, model, load_device=None, offload_device=None, dtype=torch.float32, **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user