RomanBeliy commited on
Commit
e36e800
·
verified ·
1 Parent(s): edd0238

Upload folder using huggingface_hub

Browse files
data/external_models/MindEyeV2/sd_xl_base.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: sgm.models.diffusion.DiffusionEngine
3
+ params:
4
+ scale_factor: 0.13025
5
+ disable_first_stage_autocast: True
6
+
7
+ denoiser_config:
8
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
9
+ params:
10
+ num_idx: 1000
11
+
12
+ scaling_config:
13
+ target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
14
+ discretization_config:
15
+ target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
16
+
17
+ network_config:
18
+ target: sgm.modules.diffusionmodules.openaimodel.UNetModel
19
+ params:
20
+ adm_in_channels: 2816
21
+ num_classes: sequential
22
+ use_checkpoint: True
23
+ in_channels: 4
24
+ out_channels: 4
25
+ model_channels: 320
26
+ attention_resolutions: [4, 2]
27
+ num_res_blocks: 2
28
+ channel_mult: [1, 2, 4]
29
+ num_head_channels: 64
30
+ use_linear_in_transformer: True
31
+ transformer_depth: [1, 2, 10]
32
+ context_dim: 2048
33
+ spatial_transformer_attn_type: softmax-xformers
34
+
35
+ conditioner_config:
36
+ target: sgm.modules.GeneralConditioner
37
+ params:
38
+ emb_models:
39
+ - is_trainable: False
40
+ input_key: txt
41
+ target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
42
+ params:
43
+ layer: hidden
44
+ layer_idx: 11
45
+
46
+ - is_trainable: False
47
+ input_key: txt
48
+ target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
49
+ params:
50
+ arch: ViT-bigG-14
51
+ version: laion2b_s39b_b160k
52
+ freeze: True
53
+ layer: penultimate
54
+ always_return_pooled: True
55
+ legacy: False
56
+
57
+ - is_trainable: False
58
+ input_key: original_size_as_tuple
59
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
60
+ params:
61
+ outdim: 256
62
+
63
+ - is_trainable: False
64
+ input_key: crop_coords_top_left
65
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
66
+ params:
67
+ outdim: 256
68
+
69
+ - is_trainable: False
70
+ input_key: target_size_as_tuple
71
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
72
+ params:
73
+ outdim: 256
74
+
75
+ first_stage_config:
76
+ target: sgm.models.autoencoder.AutoencoderKL
77
+ params:
78
+ embed_dim: 4
79
+ monitor: val/rec_loss
80
+ ddconfig:
81
+ attn_type: vanilla-xformers
82
+ double_z: true
83
+ z_channels: 4
84
+ resolution: 256
85
+ in_channels: 3
86
+ out_ch: 3
87
+ ch: 128
88
+ ch_mult: [1, 2, 4, 4]
89
+ num_res_blocks: 2
90
+ attn_resolutions: []
91
+ dropout: 0.0
92
+ lossconfig:
93
+ target: torch.nn.Identity
data/external_models/MindEyeV2/unclip6.yaml ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-5
3
+ target: sgm.models.diffusion.DiffusionEngine
4
+ params:
5
+ scale_factor: 0.13025
6
+ disable_first_stage_autocast: True
7
+ no_cond_log: True
8
+
9
+ ckpt_config:
10
+ target: sgm.modules.checkpoint.CheckpointEngine
11
+ params:
12
+ ckpt_path: checkpoints/sd_xl_base_1.0.safetensors
13
+ pre_adapters:
14
+ - target: sgm.modules.checkpoint.Finetuner
15
+ params:
16
+ keys:
17
+ - model\.diffusion_model\.(input_blocks|middle_block|output_blocks)(\.[0-9])?\.[0-9]\.transformer_blocks\.[0-9]\.attn2\.(to_k|to_v)\.weight
18
+ - target: sgm.modules.checkpoint.Pruner
19
+ params:
20
+ keys:
21
+ - model\.diffusion_model\.label_emb\.0\.0\.weight
22
+ slices:
23
+ - ":, :1024"
24
+ print_sd_keys: False
25
+ print_model: False
26
+
27
+ scheduler_config:
28
+ target: sgm.lr_scheduler.LambdaLinearScheduler
29
+ params:
30
+ warm_up_steps: [ 1000 ]
31
+ cycle_lengths: [ 10000000000000 ]
32
+ f_start: [ 1.e-6 ]
33
+ f_max: [ 1. ]
34
+ f_min: [ 1. ]
35
+
36
+ denoiser_config:
37
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
38
+ params:
39
+ num_idx: 1000
40
+
41
+ scaling_config:
42
+ target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
43
+
44
+ discretization_config:
45
+ target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
46
+
47
+ network_config:
48
+ target: sgm.modules.diffusionmodules.openaimodel.UNetModel
49
+ params:
50
+ adm_in_channels: 1024 #2816
51
+ num_classes: sequential
52
+ use_checkpoint: True
53
+ in_channels: 4
54
+ out_channels: 4
55
+ model_channels: 320
56
+ attention_resolutions: [ 4, 2 ]
57
+ num_res_blocks: 2
58
+ channel_mult: [ 1, 2, 4 ]
59
+ num_head_channels: 64
60
+ use_linear_in_transformer: True
61
+ transformer_depth: [ 1, 2, 10 ] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
62
+ context_dim: 1664 #1280
63
+ spatial_transformer_attn_type: softmax-xformers
64
+
65
+ conditioner_config:
66
+ target: sgm.modules.GeneralConditioner
67
+ params:
68
+ emb_models:
69
+ # cross atn
70
+ - is_trainable: False
71
+ input_key: jpg
72
+ target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
73
+ params:
74
+ arch: ViT-bigG-14
75
+ version: laion2b_s39b_b160k
76
+ freeze: True
77
+ repeat_to_max_len: False
78
+ output_tokens: True
79
+ only_tokens: True
80
+ # vector cond
81
+ - is_trainable: False
82
+ input_key: original_size_as_tuple
83
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
84
+ params:
85
+ outdim: 256 # multiplied by two
86
+ # vector cond
87
+ - is_trainable: False
88
+ input_key: crop_coords_top_left
89
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
90
+ params:
91
+ outdim: 256 # multiplied by two
92
+ # # vector cond
93
+ # - is_trainable: False
94
+ # input_key: target_size_as_tuple
95
+ # target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
96
+ # params:
97
+ # outdim: 256 # multiplied by two
98
+
99
+ first_stage_config:
100
+ target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
101
+ params:
102
+ embed_dim: 4
103
+ monitor: val/rec_loss
104
+ ddconfig:
105
+ attn_type: vanilla-xformers
106
+ double_z: true
107
+ z_channels: 4
108
+ resolution: 256
109
+ in_channels: 3
110
+ out_ch: 3
111
+ ch: 128
112
+ ch_mult: [ 1, 2, 4, 4 ]
113
+ num_res_blocks: 2
114
+ attn_resolutions: [ ]
115
+ dropout: 0.0
116
+ lossconfig:
117
+ target: torch.nn.Identity
118
+
119
+ loss_fn_config:
120
+ target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
121
+ params:
122
+ offset_noise_level: 0.04
123
+ sigma_sampler_config:
124
+ target: sgm.modules.diffusionmodules.sigma_sampling.DiscreteSampling
125
+ params:
126
+ num_idx: 1000
127
+
128
+ discretization_config:
129
+ target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
130
+ loss_weighting_config:
131
+ target: sgm.modules.diffusionmodules.loss_weighting.EpsWeighting
132
+
133
+ sampler_config:
134
+ target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
135
+ params:
136
+ num_steps: 50
137
+
138
+ discretization_config:
139
+ target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
140
+
141
+ guider_config:
142
+ target: sgm.modules.diffusionmodules.guiders.VanillaCFG
143
+ params:
144
+ scale: 5.0
145
+
146
+ data:
147
+ target: sgm.data.dataset.StableDataModuleFromConfig
148
+ params:
149
+ train:
150
+ datapipeline:
151
+ urls:
152
+ - s3://stability-west/sddatasets/laiocosplitv1c/
153
+ pipeline_config:
154
+ shardshuffle: 10000
155
+ sample_shuffle: 10000
156
+
157
+ preprocessors:
158
+ - target: sdata.filters.SimpleKeyFilter
159
+ params:
160
+ keys: [txt, jpg]
161
+ - target: sdata.filters.AttributeFilter
162
+ params:
163
+ filter_dict:
164
+ SSCD_65: False
165
+ is_spawning: True
166
+ is_getty: True
167
+
168
+ decoders:
169
+ - pil
170
+
171
+ loader:
172
+ batch_size: 1
173
+ num_workers: 4
174
+ batched_transforms:
175
+ - target: sdata.mappers.MultiAspectCacher
176
+ params:
177
+ batch_size: 16
178
+ debug: False
179
+ crop_coords_key: crop_coords_top_left
180
+ target_size_key: target_size_as_tuple
181
+ original_size_key: original_size_as_tuple
182
+ max_pixels: 262144
183
+
184
+
185
+ lightning:
186
+ strategy:
187
+ target: pytorch_lightning.strategies.DDPStrategy
188
+
189
+ modelcheckpoint:
190
+ params:
191
+ every_n_train_steps: 100000
192
+
193
+ callbacks:
194
+ metrics_over_trainsteps_checkpoint:
195
+ params:
196
+ every_n_train_steps: 5000
197
+
198
+ image_logger:
199
+ target: sgm.modules.loggers.train_logging.SampleLogger
200
+ params:
201
+ disabled: False
202
+ enable_autocast: True
203
+ batch_frequency: 2000
204
+ max_images: 4
205
+ increase_log_steps: True
206
+ log_first_step: False
207
+ log_before_first_step: True
208
+ log_images_kwargs:
209
+ N: 4
210
+ num_steps:
211
+ - 50
212
+ ucg_keys: [ ]
213
+
214
+ trainer:
215
+ devices: 0,
216
+ benchmark: False
217
+ num_sanity_val_steps: 0
218
+ accumulate_grad_batches: 1
219
+ max_epochs: 1000
220
+ precision: 16
data/external_models/MindEyeV2/unclip6_epoch0_step110000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bd7033dee484bba449d243408f36f080ace62b774d0da7894fd898fdf155652
3
+ size 17980447939
data/external_models/SDXL/sd_xl_base.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: sgm.models.diffusion.DiffusionEngine
3
+ params:
4
+ scale_factor: 0.13025
5
+ disable_first_stage_autocast: True
6
+
7
+ denoiser_config:
8
+ target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
9
+ params:
10
+ num_idx: 1000
11
+
12
+ scaling_config:
13
+ target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
14
+ discretization_config:
15
+ target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
16
+
17
+ network_config:
18
+ target: sgm.modules.diffusionmodules.openaimodel.UNetModel
19
+ params:
20
+ adm_in_channels: 2816
21
+ num_classes: sequential
22
+ use_checkpoint: True
23
+ in_channels: 4
24
+ out_channels: 4
25
+ model_channels: 320
26
+ attention_resolutions: [4, 2]
27
+ num_res_blocks: 2
28
+ channel_mult: [1, 2, 4]
29
+ num_head_channels: 64
30
+ use_linear_in_transformer: True
31
+ transformer_depth: [1, 2, 10]
32
+ context_dim: 2048
33
+ spatial_transformer_attn_type: softmax-xformers
34
+
35
+ conditioner_config:
36
+ target: sgm.modules.GeneralConditioner
37
+ params:
38
+ emb_models:
39
+ - is_trainable: False
40
+ input_key: txt
41
+ target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
42
+ params:
43
+ layer: hidden
44
+ layer_idx: 11
45
+
46
+ - is_trainable: False
47
+ input_key: txt
48
+ target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
49
+ params:
50
+ arch: ViT-bigG-14
51
+ version: laion2b_s39b_b160k
52
+ freeze: True
53
+ layer: penultimate
54
+ always_return_pooled: True
55
+ legacy: False
56
+
57
+ - is_trainable: False
58
+ input_key: original_size_as_tuple
59
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
60
+ params:
61
+ outdim: 256
62
+
63
+ - is_trainable: False
64
+ input_key: crop_coords_top_left
65
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
66
+ params:
67
+ outdim: 256
68
+
69
+ - is_trainable: False
70
+ input_key: target_size_as_tuple
71
+ target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
72
+ params:
73
+ outdim: 256
74
+
75
+ first_stage_config:
76
+ target: sgm.models.autoencoder.AutoencoderKL
77
+ params:
78
+ embed_dim: 4
79
+ monitor: val/rec_loss
80
+ ddconfig:
81
+ attn_type: vanilla-xformers
82
+ double_z: true
83
+ z_channels: 4
84
+ resolution: 256
85
+ in_channels: 3
86
+ out_ch: 3
87
+ ch: 128
88
+ ch_mult: [1, 2, 4, 4]
89
+ num_res_blocks: 2
90
+ attn_resolutions: []
91
+ dropout: 0.0
92
+ lossconfig:
93
+ target: torch.nn.Identity
data/external_models/SDXL/zavychromaxl_v30.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1270c1f025398346c59dab6d9ee234ec50b8f650b01f19376f9b647e6411b75
3
+ size 6938040682