123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- # Copyright (c) Facebook, Inc. and its affiliates.
- #
- # This source code is licensed under the MIT license found in the
- # LICENSE file in the root directory of this source tree.
- import argparse
- import copy
- import unittest
- import tests.utils as test_utils
- import torch
- from fairseq.criterions.cross_entropy import CrossEntropyCriterion
- from fairseq.criterions.label_smoothed_cross_entropy import (
- LabelSmoothedCrossEntropyCriterion,
- )
- class TestLabelSmoothing(unittest.TestCase):
- def setUp(self):
- # build dictionary
- self.d = test_utils.dummy_dictionary(3)
- vocab = len(self.d)
- self.assertEqual(vocab, 4 + 3) # 4 special + 3 tokens
- self.assertEqual(self.d.pad(), 1)
- self.assertEqual(self.d.eos(), 2)
- self.assertEqual(self.d.unk(), 3)
- pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6 # noqa: F841
- # build dataset
- self.data = [
- # the first batch item has padding
- {
- "source": torch.LongTensor([w1, eos]),
- "target": torch.LongTensor([w1, eos]),
- },
- {
- "source": torch.LongTensor([w1, eos]),
- "target": torch.LongTensor([w1, w1, eos]),
- },
- ]
- self.sample = next(test_utils.dummy_dataloader(self.data))
- # build model
- self.args = argparse.Namespace()
- self.args.sentence_avg = False
- self.args.report_accuracy = False
- self.args.probs = (
- torch.FloatTensor(
- [
- # pad eos unk w1 w2 w3
- [0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05],
- [0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10],
- [0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15],
- ]
- )
- .unsqueeze(0)
- .expand(2, 3, 7)
- ) # add batch dimension
- self.task = test_utils.TestTranslationTask.setup_task(self.args, self.d, self.d)
- self.model = self.task.build_model(self.args)
- def test_nll_loss(self):
- self.args.label_smoothing = 0.1
- nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
- smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
- self.args, self.task
- )
- nll_loss, nll_sample_size, nll_logging_output = nll_crit(
- self.model, self.sample
- )
- smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
- self.model, self.sample
- )
- self.assertLess(abs(nll_loss - nll_logging_output["loss"]), 1e-6)
- self.assertLess(abs(nll_loss - smooth_logging_output["nll_loss"]), 1e-6)
- def test_padding(self):
- self.args.label_smoothing = 0.1
- crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
- loss, _, logging_output = crit(self.model, self.sample)
- def get_one_no_padding(idx):
- # create a new sample with just a single batch item so that there's
- # no padding
- sample1 = next(test_utils.dummy_dataloader([self.data[idx]]))
- args1 = copy.copy(self.args)
- args1.probs = args1.probs[idx, :, :].unsqueeze(0)
- model1 = self.task.build_model(args1)
- loss1, _, _ = crit(model1, sample1)
- return loss1
- loss1 = get_one_no_padding(0)
- loss2 = get_one_no_padding(1)
- self.assertAlmostEqual(loss, loss1 + loss2)
- def test_reduction(self):
- self.args.label_smoothing = 0.1
- crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
- loss, _, logging_output = crit(self.model, self.sample, reduce=True)
- unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False)
- self.assertAlmostEqual(loss, unreduced_loss.sum())
- def test_zero_eps(self):
- self.args.label_smoothing = 0.0
- nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
- smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
- self.args, self.task
- )
- nll_loss, nll_sample_size, nll_logging_output = nll_crit(
- self.model, self.sample
- )
- smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
- self.model, self.sample
- )
- self.assertAlmostEqual(nll_loss, smooth_loss)
- def assertAlmostEqual(self, t1, t2):
- self.assertEqual(t1.size(), t2.size(), "size mismatch")
- self.assertLess((t1 - t2).abs().max(), 1e-6)
- if __name__ == "__main__":
- unittest.main()
|