transformer_quantization_config.yaml 766 B

12345678910111213141516171819202122232425262728
  1. # Copyright (c) Facebook, Inc. and its affiliates.
  2. #
  3. # This source code is licensed under the MIT license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. # This file defines example configuration arguments for quantizing
  6. # a transformer model with product quantization
  7. n_centroids:
  8. Linear:
  9. key: in_features
  10. value: {"*": 8}
  11. Embedding:
  12. key: embedding_dim
  13. value: {"*": 8}
  14. block_sizes:
  15. Linear:
  16. key: fuzzy_name
  17. value: {fc: 8, attn: 4, emb: 4}
  18. Embedding:
  19. key: fuzzy_name
  20. value: {emb: 8}
  21. layers_to_quantize:
  22. - decoder\\.layers\\.\d+\\.fc[12]
  23. - decoder\\.embed_tokens\\.embeddings\\.[012]\\.[01]
  24. - decoder\\.layers\\.\d+\\.self_attn\\.(k_proj|v_proj|q_proj|out_proj)