junnyu commited on
Commit
83feac1
·
1 Parent(s): 9dc30c0

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +47 -34
pipeline.py CHANGED
@@ -17,6 +17,7 @@
17
  # Here is the AGPL-3.0 license https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/LICENSE.txt
18
 
19
  import inspect
 
20
  from typing import Any, Callable, Dict, List, Optional, Union
21
 
22
  import paddle
@@ -30,18 +31,25 @@ from ppdiffusers.pipelines.stable_diffusion.safety_checker import (
30
  StableDiffusionSafetyChecker,
31
  )
32
  from ppdiffusers.schedulers import KarrasDiffusionSchedulers
33
- from ppdiffusers.utils import logging, randn_tensor, safetensors_load, torch_load, smart_load
34
-
35
- from pathlib import Path
 
 
 
 
36
 
37
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
38
 
 
39
  @paddle.no_grad()
40
- def load_lora(pipeline,
41
- state_dict: dict,
42
- LORA_PREFIX_UNET: str = "lora_unet",
43
- LORA_PREFIX_TEXT_ENCODER: str = "lora_te",
44
- ratio: float = 1.0):
 
 
45
  ratio = float(ratio)
46
  visited = []
47
  for key in state_dict:
@@ -49,8 +57,7 @@ def load_lora(pipeline,
49
  continue
50
 
51
  if "text" in key:
52
- tmp_layer_infos = key.split(".")[0].split(
53
- LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
54
  hf_to_ppnlp = {
55
  "encoder": "transformer",
56
  "fc1": "linear1",
@@ -58,12 +65,12 @@ def load_lora(pipeline,
58
  }
59
  layer_infos = []
60
  for layer_info in tmp_layer_infos:
61
- if layer_info == "mlp": continue
 
62
  layer_infos.append(hf_to_ppnlp.get(layer_info, layer_info))
63
  curr_layer: paddle.nn.Linear = pipeline.text_encoder
64
  else:
65
- layer_infos = key.split(".")[0].split(LORA_PREFIX_UNET +
66
- "_")[-1].split("_")
67
  curr_layer: paddle.nn.Linear = pipeline.unet
68
 
69
  temp_name = layer_infos.pop(0)
@@ -82,14 +89,9 @@ def load_lora(pipeline,
82
  else:
83
  temp_name = layer_infos.pop(0)
84
 
85
- triplet_keys = [
86
- key,
87
- key.replace("lora_down", "lora_up"),
88
- key.replace("lora_down.weight", "alpha")
89
- ]
90
  dtype: paddle.dtype = curr_layer.weight.dtype
91
- weight_down: paddle.Tensor = state_dict[triplet_keys[0]].cast(
92
- dtype)
93
  weight_up: paddle.Tensor = state_dict[triplet_keys[1]].cast(dtype)
94
  rank: float = float(weight_down.shape[0])
95
  if triplet_keys[2] in state_dict:
@@ -100,31 +102,37 @@ def load_lora(pipeline,
100
 
101
  if not hasattr(curr_layer, "backup_weights"):
102
  curr_layer.backup_weights = curr_layer.weight.clone()
103
-
104
  if len(weight_down.shape) == 4:
105
  if weight_down.shape[2:4] == [1, 1]:
106
  # conv2d 1x1
107
  curr_layer.weight.copy_(
108
- curr_layer.weight +
109
- ratio * paddle.matmul(weight_up.squeeze(
110
- [-1, -2]), weight_down.squeeze([-1, -2])).unsqueeze(
111
- [-1, -2]) * scale, True)
 
 
112
  else:
113
  # conv2d 3x3
114
  curr_layer.weight.copy_(
115
- curr_layer.weight + ratio * paddle.nn.functional.conv2d(
116
- weight_down.transpose([1, 0, 2, 3]),
117
- weight_up).transpose([1, 0, 2, 3]) * scale, True)
 
 
 
 
 
118
  else:
119
  # linear
120
- curr_layer.weight.copy_(
121
- curr_layer.weight +
122
- ratio * paddle.matmul(weight_up, weight_down).T * scale, True)
123
 
124
  # update visited list
125
  visited.extend(triplet_keys)
126
  return pipeline
127
 
 
128
  class WebUIStableDiffusionPipeline(DiffusionPipeline):
129
  r"""
130
  Pipeline for text-to-image generation using Stable Diffusion.
@@ -399,7 +407,7 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
399
  callback: Optional[Callable[[int, int, paddle.Tensor], None]] = None,
400
  callback_steps: Optional[int] = 1,
401
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
402
- clip_skip: int = 0,
403
  lora_dir: str = "./loras",
404
  ):
405
  r"""
@@ -452,7 +460,9 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
452
  `self.processor` in
453
  [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
454
  clip_skip (`int`, *optional*, defaults to 0):
455
- CLIP_stop_at_last_layers, if clip_skip < 1, we will use the last_hidden_state from text_encoder.
 
 
456
  Examples:
457
 
458
  Returns:
@@ -554,7 +564,9 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
554
  cross_attention_kwargs=cross_attention_kwargs,
555
  ).sample
556
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
557
- noise_pred = noise_pred_uncond + weight * guidance_scale * (noise_pred_text - noise_pred_uncond)
 
 
558
  else:
559
  noise_pred = self.unet(
560
  latent_model_input,
@@ -616,6 +628,7 @@ class WebUIStableDiffusionPipeline(DiffusionPipeline):
616
  sub_layer.weight.copy_(sub_layer.backup_weights, True)
617
  self.weights_has_changed = False
618
 
 
619
  # clip.py
620
  import math
621
  from collections import namedtuple
 
17
  # Here is the AGPL-3.0 license https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/LICENSE.txt
18
 
19
  import inspect
20
+ from pathlib import Path
21
  from typing import Any, Callable, Dict, List, Optional, Union
22
 
23
  import paddle
 
31
  StableDiffusionSafetyChecker,
32
  )
33
  from ppdiffusers.schedulers import KarrasDiffusionSchedulers
34
+ from ppdiffusers.utils import (
35
+ logging,
36
+ randn_tensor,
37
+ safetensors_load,
38
+ smart_load,
39
+ torch_load,
40
+ )
41
 
42
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
43
 
44
+
45
  @paddle.no_grad()
46
+ def load_lora(
47
+ pipeline,
48
+ state_dict: dict,
49
+ LORA_PREFIX_UNET: str = "lora_unet",
50
+ LORA_PREFIX_TEXT_ENCODER: str = "lora_te",
51
+ ratio: float = 1.0,
52
+ ):
53
  ratio = float(ratio)
54
  visited = []
55
  for key in state_dict:
 
57
  continue
58
 
59
  if "text" in key:
60
+ tmp_layer_infos = key.split(".")[0].split(LORA_PREFIX_TEXT_ENCODER + "_")[-1].split("_")
 
61
  hf_to_ppnlp = {
62
  "encoder": "transformer",
63
  "fc1": "linear1",
 
65
  }
66
  layer_infos = []
67
  for layer_info in tmp_layer_infos:
68
+ if layer_info == "mlp":
69
+ continue
70
  layer_infos.append(hf_to_ppnlp.get(layer_info, layer_info))
71
  curr_layer: paddle.nn.Linear = pipeline.text_encoder
72
  else:
73
+ layer_infos = key.split(".")[0].split(LORA_PREFIX_UNET + "_")[-1].split("_")
 
74
  curr_layer: paddle.nn.Linear = pipeline.unet
75
 
76
  temp_name = layer_infos.pop(0)
 
89
  else:
90
  temp_name = layer_infos.pop(0)
91
 
92
+ triplet_keys = [key, key.replace("lora_down", "lora_up"), key.replace("lora_down.weight", "alpha")]
 
 
 
 
93
  dtype: paddle.dtype = curr_layer.weight.dtype
94
+ weight_down: paddle.Tensor = state_dict[triplet_keys[0]].cast(dtype)
 
95
  weight_up: paddle.Tensor = state_dict[triplet_keys[1]].cast(dtype)
96
  rank: float = float(weight_down.shape[0])
97
  if triplet_keys[2] in state_dict:
 
102
 
103
  if not hasattr(curr_layer, "backup_weights"):
104
  curr_layer.backup_weights = curr_layer.weight.clone()
105
+
106
  if len(weight_down.shape) == 4:
107
  if weight_down.shape[2:4] == [1, 1]:
108
  # conv2d 1x1
109
  curr_layer.weight.copy_(
110
+ curr_layer.weight
111
+ + ratio
112
+ * paddle.matmul(weight_up.squeeze([-1, -2]), weight_down.squeeze([-1, -2])).unsqueeze([-1, -2])
113
+ * scale,
114
+ True,
115
+ )
116
  else:
117
  # conv2d 3x3
118
  curr_layer.weight.copy_(
119
+ curr_layer.weight
120
+ + ratio
121
+ * paddle.nn.functional.conv2d(weight_down.transpose([1, 0, 2, 3]), weight_up).transpose(
122
+ [1, 0, 2, 3]
123
+ )
124
+ * scale,
125
+ True,
126
+ )
127
  else:
128
  # linear
129
+ curr_layer.weight.copy_(curr_layer.weight + ratio * paddle.matmul(weight_up, weight_down).T * scale, True)
 
 
130
 
131
  # update visited list
132
  visited.extend(triplet_keys)
133
  return pipeline
134
 
135
+
136
  class WebUIStableDiffusionPipeline(DiffusionPipeline):
137
  r"""
138
  Pipeline for text-to-image generation using Stable Diffusion.
 
407
  callback: Optional[Callable[[int, int, paddle.Tensor], None]] = None,
408
  callback_steps: Optional[int] = 1,
409
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
410
+ clip_skip: int = 1,
411
  lora_dir: str = "./loras",
412
  ):
413
  r"""
 
460
  `self.processor` in
461
  [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
462
  clip_skip (`int`, *optional*, defaults to 0):
463
+ CLIP_stop_at_last_layers, if clip_skip <= 1, we will use the last_hidden_state from text_encoder.
464
+ lora_dir (`str`, *optional*):
465
+ Path to lora which we want to load.
466
  Examples:
467
 
468
  Returns:
 
564
  cross_attention_kwargs=cross_attention_kwargs,
565
  ).sample
566
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
567
+ noise_pred = noise_pred_uncond + weight * guidance_scale * (
568
+ noise_pred_text - noise_pred_uncond
569
+ )
570
  else:
571
  noise_pred = self.unet(
572
  latent_model_input,
 
628
  sub_layer.weight.copy_(sub_layer.backup_weights, True)
629
  self.weights_has_changed = False
630
 
631
+
632
  # clip.py
633
  import math
634
  from collections import namedtuple