coco_scene_images_transformer.yaml 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. model:
  2. base_learning_rate: 4.5e-06
  3. target: taming.models.cond_transformer.Net2NetTransformer
  4. params:
  5. cond_stage_key: objects_bbox
  6. transformer_config:
  7. target: taming.modules.transformer.mingpt.GPT
  8. params:
  9. vocab_size: 8192
  10. block_size: 348 # = 256 + 92 = dim(vqgan_latent_space,16x16) + dim(conditional_builder.embedding_dim)
  11. n_layer: 40
  12. n_head: 16
  13. n_embd: 1408
  14. embd_pdrop: 0.1
  15. resid_pdrop: 0.1
  16. attn_pdrop: 0.1
  17. first_stage_config:
  18. target: taming.models.vqgan.VQModel
  19. params:
  20. ckpt_path: /path/to/coco_epoch117.ckpt # https://heibox.uni-heidelberg.de/f/78dea9589974474c97c1/
  21. embed_dim: 256
  22. n_embed: 8192
  23. ddconfig:
  24. double_z: false
  25. z_channels: 256
  26. resolution: 256
  27. in_channels: 3
  28. out_ch: 3
  29. ch: 128
  30. ch_mult:
  31. - 1
  32. - 1
  33. - 2
  34. - 2
  35. - 4
  36. num_res_blocks: 2
  37. attn_resolutions:
  38. - 16
  39. dropout: 0.0
  40. lossconfig:
  41. target: taming.modules.losses.DummyLoss
  42. cond_stage_config:
  43. target: taming.models.dummy_cond_stage.DummyCondStage
  44. params:
  45. conditional_key: objects_bbox
  46. data:
  47. target: main.DataModuleFromConfig
  48. params:
  49. batch_size: 6
  50. train:
  51. target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
  52. params:
  53. data_path: data/coco_annotations_100 # substitute with path to full dataset
  54. split: train
  55. keys: [image, objects_bbox, file_name, annotations]
  56. no_tokens: 8192
  57. target_image_size: 256
  58. min_object_area: 0.00001
  59. min_objects_per_image: 2
  60. max_objects_per_image: 30
  61. crop_method: random-1d
  62. random_flip: true
  63. use_group_parameter: true
  64. encode_crop: true
  65. validation:
  66. target: taming.data.annotated_objects_coco.AnnotatedObjectsCoco
  67. params:
  68. data_path: data/coco_annotations_100 # substitute with path to full dataset
  69. split: validation
  70. keys: [image, objects_bbox, file_name, annotations]
  71. no_tokens: 8192
  72. target_image_size: 256
  73. min_object_area: 0.00001
  74. min_objects_per_image: 2
  75. max_objects_per_image: 30
  76. crop_method: center
  77. random_flip: false
  78. use_group_parameter: true
  79. encode_crop: true