cldm_v21.yaml 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. model:
  2. target: cldm.cldm.ControlLDM
  3. params:
  4. linear_start: 0.00085
  5. linear_end: 0.0120
  6. num_timesteps_cond: 1
  7. log_every_t: 200
  8. timesteps: 1000
  9. first_stage_key: "jpg"
  10. cond_stage_key: "txt"
  11. control_key: "hint"
  12. image_size: 64
  13. channels: 4
  14. cond_stage_trainable: false
  15. conditioning_key: crossattn
  16. monitor: val/loss_simple_ema
  17. scale_factor: 0.18215
  18. use_ema: False
  19. only_mid_control: False
  20. control_stage_config:
  21. target: cldm.cldm.ControlNet
  22. params:
  23. use_checkpoint: True
  24. image_size: 32 # unused
  25. in_channels: 4
  26. hint_channels: 3
  27. model_channels: 320
  28. attention_resolutions: [ 4, 2, 1 ]
  29. num_res_blocks: 2
  30. channel_mult: [ 1, 2, 4, 4 ]
  31. num_head_channels: 64 # need to fix for flash-attn
  32. use_spatial_transformer: True
  33. use_linear_in_transformer: True
  34. transformer_depth: 1
  35. context_dim: 1024
  36. legacy: False
  37. unet_config:
  38. target: cldm.cldm.ControlledUnetModel
  39. params:
  40. use_checkpoint: True
  41. image_size: 32 # unused
  42. in_channels: 4
  43. out_channels: 4
  44. model_channels: 320
  45. attention_resolutions: [ 4, 2, 1 ]
  46. num_res_blocks: 2
  47. channel_mult: [ 1, 2, 4, 4 ]
  48. num_head_channels: 64 # need to fix for flash-attn
  49. use_spatial_transformer: True
  50. use_linear_in_transformer: True
  51. transformer_depth: 1
  52. context_dim: 1024
  53. legacy: False
  54. first_stage_config:
  55. target: ldm.models.autoencoder.AutoencoderKL
  56. params:
  57. embed_dim: 4
  58. monitor: val/rec_loss
  59. ddconfig:
  60. #attn_type: "vanilla-xformers"
  61. double_z: true
  62. z_channels: 4
  63. resolution: 256
  64. in_channels: 3
  65. out_ch: 3
  66. ch: 128
  67. ch_mult:
  68. - 1
  69. - 2
  70. - 4
  71. - 4
  72. num_res_blocks: 2
  73. attn_resolutions: []
  74. dropout: 0.0
  75. lossconfig:
  76. target: torch.nn.Identity
  77. cond_stage_config:
  78. target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
  79. params:
  80. freeze: True
  81. layer: "penultimate"