test_label_smoothing.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # Copyright (c) Facebook, Inc. and its affiliates.
  2. #
  3. # This source code is licensed under the MIT license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. import argparse
  6. import copy
  7. import unittest
  8. import tests.utils as test_utils
  9. import torch
  10. from fairseq.criterions.cross_entropy import CrossEntropyCriterion
  11. from fairseq.criterions.label_smoothed_cross_entropy import (
  12. LabelSmoothedCrossEntropyCriterion,
  13. )
  14. class TestLabelSmoothing(unittest.TestCase):
  15. def setUp(self):
  16. # build dictionary
  17. self.d = test_utils.dummy_dictionary(3)
  18. vocab = len(self.d)
  19. self.assertEqual(vocab, 4 + 3) # 4 special + 3 tokens
  20. self.assertEqual(self.d.pad(), 1)
  21. self.assertEqual(self.d.eos(), 2)
  22. self.assertEqual(self.d.unk(), 3)
  23. pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6 # noqa: F841
  24. # build dataset
  25. self.data = [
  26. # the first batch item has padding
  27. {
  28. "source": torch.LongTensor([w1, eos]),
  29. "target": torch.LongTensor([w1, eos]),
  30. },
  31. {
  32. "source": torch.LongTensor([w1, eos]),
  33. "target": torch.LongTensor([w1, w1, eos]),
  34. },
  35. ]
  36. self.sample = next(test_utils.dummy_dataloader(self.data))
  37. # build model
  38. self.args = argparse.Namespace()
  39. self.args.sentence_avg = False
  40. self.args.report_accuracy = False
  41. self.args.probs = (
  42. torch.FloatTensor(
  43. [
  44. # pad eos unk w1 w2 w3
  45. [0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05],
  46. [0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10],
  47. [0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15],
  48. ]
  49. )
  50. .unsqueeze(0)
  51. .expand(2, 3, 7)
  52. ) # add batch dimension
  53. self.task = test_utils.TestTranslationTask.setup_task(self.args, self.d, self.d)
  54. self.model = self.task.build_model(self.args)
  55. def test_nll_loss(self):
  56. self.args.label_smoothing = 0.1
  57. nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
  58. smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
  59. self.args, self.task
  60. )
  61. nll_loss, nll_sample_size, nll_logging_output = nll_crit(
  62. self.model, self.sample
  63. )
  64. smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
  65. self.model, self.sample
  66. )
  67. self.assertLess(abs(nll_loss - nll_logging_output["loss"]), 1e-6)
  68. self.assertLess(abs(nll_loss - smooth_logging_output["nll_loss"]), 1e-6)
  69. def test_padding(self):
  70. self.args.label_smoothing = 0.1
  71. crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
  72. loss, _, logging_output = crit(self.model, self.sample)
  73. def get_one_no_padding(idx):
  74. # create a new sample with just a single batch item so that there's
  75. # no padding
  76. sample1 = next(test_utils.dummy_dataloader([self.data[idx]]))
  77. args1 = copy.copy(self.args)
  78. args1.probs = args1.probs[idx, :, :].unsqueeze(0)
  79. model1 = self.task.build_model(args1)
  80. loss1, _, _ = crit(model1, sample1)
  81. return loss1
  82. loss1 = get_one_no_padding(0)
  83. loss2 = get_one_no_padding(1)
  84. self.assertAlmostEqual(loss, loss1 + loss2)
  85. def test_reduction(self):
  86. self.args.label_smoothing = 0.1
  87. crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
  88. loss, _, logging_output = crit(self.model, self.sample, reduce=True)
  89. unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False)
  90. self.assertAlmostEqual(loss, unreduced_loss.sum())
  91. def test_zero_eps(self):
  92. self.args.label_smoothing = 0.0
  93. nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
  94. smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
  95. self.args, self.task
  96. )
  97. nll_loss, nll_sample_size, nll_logging_output = nll_crit(
  98. self.model, self.sample
  99. )
  100. smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
  101. self.model, self.sample
  102. )
  103. self.assertAlmostEqual(nll_loss, smooth_loss)
  104. def assertAlmostEqual(self, t1, t2):
  105. self.assertEqual(t1.size(), t2.size(), "size mismatch")
  106. self.assertLess((t1 - t2).abs().max(), 1e-6)
  107. if __name__ == "__main__":
  108. unittest.main()