drin_transformer.yaml 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. model:
  2. base_learning_rate: 4.5e-06
  3. target: taming.models.cond_transformer.Net2NetTransformer
  4. params:
  5. cond_stage_key: depth
  6. transformer_config:
  7. target: taming.modules.transformer.mingpt.GPT
  8. params:
  9. vocab_size: 1024
  10. block_size: 512
  11. n_layer: 24
  12. n_head: 16
  13. n_embd: 1024
  14. first_stage_config:
  15. target: taming.models.vqgan.VQModel
  16. params:
  17. ckpt_path: logs/2020-09-23T17-56-33_imagenet_vqgan/checkpoints/last.ckpt
  18. embed_dim: 256
  19. n_embed: 1024
  20. ddconfig:
  21. double_z: false
  22. z_channels: 256
  23. resolution: 256
  24. in_channels: 3
  25. out_ch: 3
  26. ch: 128
  27. ch_mult:
  28. - 1
  29. - 1
  30. - 2
  31. - 2
  32. - 4
  33. num_res_blocks: 2
  34. attn_resolutions:
  35. - 16
  36. dropout: 0.0
  37. lossconfig:
  38. target: taming.modules.losses.DummyLoss
  39. cond_stage_config:
  40. target: taming.models.vqgan.VQModel
  41. params:
  42. ckpt_path: logs/2020-11-03T15-34-24_imagenetdepth_vqgan/checkpoints/last.ckpt
  43. embed_dim: 256
  44. n_embed: 1024
  45. ddconfig:
  46. double_z: false
  47. z_channels: 256
  48. resolution: 256
  49. in_channels: 1
  50. out_ch: 1
  51. ch: 128
  52. ch_mult:
  53. - 1
  54. - 1
  55. - 2
  56. - 2
  57. - 4
  58. num_res_blocks: 2
  59. attn_resolutions:
  60. - 16
  61. dropout: 0.0
  62. lossconfig:
  63. target: taming.modules.losses.DummyLoss
  64. data:
  65. target: main.DataModuleFromConfig
  66. params:
  67. batch_size: 2
  68. num_workers: 8
  69. train:
  70. target: taming.data.imagenet.RINTrainWithDepth
  71. params:
  72. size: 256
  73. validation:
  74. target: taming.data.imagenet.RINValidationWithDepth
  75. params:
  76. size: 256