imagenet_vqgan.yaml 949 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. model:
  2. base_learning_rate: 4.5e-6
  3. target: taming.models.vqgan.VQModel
  4. params:
  5. embed_dim: 256
  6. n_embed: 1024
  7. ddconfig:
  8. double_z: False
  9. z_channels: 256
  10. resolution: 256
  11. in_channels: 3
  12. out_ch: 3
  13. ch: 128
  14. ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
  15. num_res_blocks: 2
  16. attn_resolutions: [16]
  17. dropout: 0.0
  18. lossconfig:
  19. target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
  20. params:
  21. disc_conditional: False
  22. disc_in_channels: 3
  23. disc_start: 250001
  24. disc_weight: 0.8
  25. codebook_weight: 1.0
  26. data:
  27. target: main.DataModuleFromConfig
  28. params:
  29. batch_size: 12
  30. num_workers: 24
  31. train:
  32. target: taming.data.imagenet.ImageNetTrain
  33. params:
  34. config:
  35. size: 256
  36. validation:
  37. target: taming.data.imagenet.ImageNetValidation
  38. params:
  39. config:
  40. size: 256