CodeFormer_colorization.yml 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. # general settings
  2. name: CodeFormer_colorization
  3. model_type: CodeFormerIdxModel
  4. num_gpu: 8
  5. manual_seed: 0
  6. # dataset and data loader settings
  7. datasets:
  8. train:
  9. name: FFHQ
  10. type: FFHQBlindDataset
  11. dataroot_gt: datasets/ffhq/ffhq_512
  12. filename_tmpl: '{}'
  13. io_backend:
  14. type: disk
  15. in_size: 512
  16. gt_size: 512
  17. mean: [0.5, 0.5, 0.5]
  18. std: [0.5, 0.5, 0.5]
  19. use_hflip: true
  20. use_corrupt: true
  21. # large degradation in stageII
  22. blur_kernel_size: 41
  23. use_motion_kernel: false
  24. motion_kernel_prob: 0.001
  25. kernel_list: ['iso', 'aniso']
  26. kernel_prob: [0.5, 0.5]
  27. blur_sigma: [1, 15]
  28. downsample_range: [4, 30]
  29. noise_range: [0, 20]
  30. jpeg_range: [30, 80]
  31. # color jitter and gray
  32. color_jitter_prob: 0.3
  33. color_jitter_shift: 20
  34. color_jitter_pt_prob: 0.3
  35. gray_prob: 0.01
  36. latent_gt_path: ~ # without pre-calculated latent code
  37. # latent_gt_path: './experiments/pretrained_models/VQGAN/latent_gt_code1024.pth'
  38. # data loader
  39. num_worker_per_gpu: 2
  40. batch_size_per_gpu: 4
  41. dataset_enlarge_ratio: 100
  42. prefetch_mode: ~
  43. # val:
  44. # name: CelebA-HQ-512
  45. # type: PairedImageDataset
  46. # dataroot_lq: datasets/faces/validation/lq
  47. # dataroot_gt: datasets/faces/validation/gt
  48. # io_backend:
  49. # type: disk
  50. # mean: [0.5, 0.5, 0.5]
  51. # std: [0.5, 0.5, 0.5]
  52. # scale: 1
  53. # network structures
  54. network_g:
  55. type: CodeFormer
  56. dim_embd: 512
  57. n_head: 8
  58. n_layers: 9
  59. codebook_size: 1024
  60. connect_list: ['32', '64', '128', '256']
  61. fix_modules: ['quantize','generator']
  62. vqgan_path: './experiments/pretrained_models/vqgan/vqgan_code1024.pth' # pretrained VQGAN
  63. network_vqgan: # this config is needed if no pre-calculated latent
  64. type: VQAutoEncoder
  65. img_size: 512
  66. nf: 64
  67. ch_mult: [1, 2, 2, 4, 4, 8]
  68. quantizer: 'nearest'
  69. codebook_size: 1024
  70. # path
  71. path:
  72. pretrain_network_g: ~
  73. param_key_g: params_ema
  74. strict_load_g: false
  75. pretrain_network_d: ~
  76. strict_load_d: true
  77. resume_state: ~
  78. # base_lr(4.5e-6)*bach_size(4)
  79. train:
  80. use_hq_feat_loss: true
  81. feat_loss_weight: 1.0
  82. cross_entropy_loss: true
  83. entropy_loss_weight: 0.5
  84. fidelity_weight: 0
  85. optim_g:
  86. type: Adam
  87. lr: !!float 1e-4
  88. weight_decay: 0
  89. betas: [0.9, 0.99]
  90. scheduler:
  91. type: MultiStepLR
  92. milestones: [400000, 450000]
  93. gamma: 0.5
  94. total_iter: 500000
  95. warmup_iter: -1 # no warm up
  96. ema_decay: 0.995
  97. use_adaptive_weight: true
  98. net_g_start_iter: 0
  99. net_d_iters: 1
  100. net_d_start_iter: 0
  101. manual_seed: 0
  102. # validation settings
  103. val:
  104. val_freq: !!float 5e10 # no validation
  105. save_img: true
  106. metrics:
  107. psnr: # metric name, can be arbitrary
  108. type: calculate_psnr
  109. crop_border: 4
  110. test_y_channel: false
  111. # logging settings
  112. logger:
  113. print_freq: 100
  114. save_checkpoint_freq: !!float 1e4
  115. use_tb_logger: true
  116. wandb:
  117. project: ~
  118. resume_id: ~
  119. # dist training settings
  120. dist_params:
  121. backend: nccl
  122. port: 29419
  123. find_unused_parameters: true