test_fp16_optimizer.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # Copyright (c) Facebook, Inc. and its affiliates.
  2. #
  3. # This source code is licensed under the MIT license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. import copy
  6. import logging
  7. import unittest
  8. import torch
  9. from fairseq.optim.fp16_optimizer import FP16Optimizer, MemoryEfficientFP16Optimizer
  10. from omegaconf import OmegaConf
  11. @unittest.skipIf(not torch.cuda.is_available(), "test requires a GPU")
  12. class TestGradientScaling(unittest.TestCase):
  13. def setUp(self):
  14. self.x = torch.tensor([2.0]).cuda().half()
  15. weight = 3.0
  16. bias = 5.0
  17. self.error = 1.0
  18. self.target = torch.tensor([self.x * weight + bias + self.error]).cuda().half()
  19. self.loss_fn = torch.nn.L1Loss()
  20. self.model = torch.nn.Linear(1, 1)
  21. self.model.weight.data = torch.tensor([[weight]])
  22. self.model.bias.data = torch.tensor([bias])
  23. self.model.cuda().half()
  24. self.params = list(self.model.parameters())
  25. self.cfg_dls = OmegaConf.create(
  26. {
  27. "optimization": {
  28. "lr": [0.1],
  29. },
  30. "optimizer": {
  31. "_name": "adam",
  32. "lr": [0.1],
  33. "adam_betas": "(0.9, 0.999)",
  34. "adam_eps": 1e-8,
  35. "weight_decay": 0.0,
  36. },
  37. "common": {
  38. "fp16_init_scale": 1,
  39. "fp16_scale_window": 1,
  40. "fp16_scale_tolerance": 1,
  41. "threshold_loss_scale": 1,
  42. "min_loss_scale": 1e-4,
  43. "tpu": False,
  44. },
  45. }
  46. )
  47. logging.disable(logging.CRITICAL)
  48. def tearDown(self):
  49. logging.disable(logging.NOTSET)
  50. def run_iter(self, model, params, optimizer):
  51. optimizer.zero_grad()
  52. y = model(self.x)
  53. loss = self.loss_fn(y, self.target)
  54. optimizer.backward(loss)
  55. self.assertEqual(loss, torch.tensor(1.0, device="cuda:0", dtype=torch.float16))
  56. grad_norm = optimizer.clip_grad_norm(0)
  57. self.assertAlmostEqual(grad_norm.item(), 2.2361, 4)
  58. optimizer.step()
  59. self.assertEqual(
  60. model.weight,
  61. torch.tensor(
  62. [[3.0996]], device="cuda:0", dtype=torch.float16, requires_grad=True
  63. ),
  64. )
  65. self.assertEqual(
  66. model.bias,
  67. torch.tensor(
  68. [5.1016], device="cuda:0", dtype=torch.float16, requires_grad=True
  69. ),
  70. )
  71. self.assertEqual(optimizer.scaler.loss_scale, 2.0)
  72. def test_mixed_precision(self):
  73. model = copy.deepcopy(self.model)
  74. params = list(model.parameters())
  75. optimizer = FP16Optimizer.build_optimizer(self.cfg_dls, params)
  76. self.run_iter(model, params, optimizer)
  77. self.assertTrue(
  78. all(
  79. torch.all(
  80. fp32_params.eq(
  81. torch.tensor(
  82. [3.1000, 5.1000], device="cuda:0", requires_grad=True
  83. )
  84. )
  85. )
  86. for fp32_params in optimizer.fp32_params.values()
  87. )
  88. )
  89. def test_memory_efficient(self):
  90. model = copy.deepcopy(self.model)
  91. params = list(model.parameters())
  92. optimizer = MemoryEfficientFP16Optimizer.build_optimizer(self.cfg_dls, params)
  93. self.run_iter(model, params, optimizer)
  94. if __name__ == "__main__":
  95. unittest.main()