diff --git a/metrics/mauve/README.md b/metrics/mauve/README.md index 3ba39d8fa..06a9c53f9 100644 --- a/metrics/mauve/README.md +++ b/metrics/mauve/README.md @@ -25,7 +25,7 @@ MAUVE is a measure of the gap between neural text and human text. It is computed This metric is a wrapper around the [official implementation](https://github.com/krishnap25/mauve) of MAUVE. -For more details, consult the [MAUVE paper](https://arxiv.org/abs/2102.01454). +For more details, consult the MAUVE papers [[NeurIPS '21](https://arxiv.org/abs/2102.01454), [JMLR '23](https://arxiv.org/pdf/2212.14578)]. ## How to use @@ -69,12 +69,16 @@ It also has several optional arguments: ## Output values -This metric outputs a dictionary with 5 key-value pairs: +This metric outputs a dictionary with the following key-value pairs: `mauve`: MAUVE score, which ranges between 0 and 1. **Larger** values indicate that P and Q are closer. `frontier_integral`: Frontier Integral, which ranges between 0 and 1. **Smaller** values indicate that P and Q are closer. +'mauve_star`: MAUVE score computed with Krichevsky-Trofimov smoothing, as advocated in [this paper](https://arxiv.org/pdf/2212.14578). + +'frontier_integral_star`: Frontier Interal score computed with Krichevsky-Trofimov smoothing, as advocated in [this paper](https://arxiv.org/pdf/2212.14578). + `divergence_curve`: a numpy.ndarray of shape (m, 2); plot it with `matplotlib` to view the divergence curve. `p_hist`: a discrete distribution, which is a quantized version of the text distribution `p_text`. @@ -131,11 +135,25 @@ See the [official implementation](https://github.com/krishnap25/mauve#best-pract ## Citation ```bibtex +@article{pillutla-etal:mauve:jmlr2023, + title={{MAUVE Scores for Generative Models: Theory and Practice}}, + author={Pillutla, Krishna and Liu, Lang and Thickstun, John and Welleck, Sean and Swayamdipta, Swabha and Zellers, Rowan and Oh, Sewoong and Choi, Yejin and Harchaoui, Zaid}, + journal={JMLR}, + year={2023} +} + @inproceedings{pillutla-etal:mauve:neurips2021, - title={MAUVE: Measuring the Gap Between Neural Text and Human Text using Divergence Frontiers}, + title={{MAUVE: Measuring the Gap Between Neural Text and Human Text using Divergence Frontiers}}, author={Pillutla, Krishna and Swayamdipta, Swabha and Zellers, Rowan and Thickstun, John and Welleck, Sean and Choi, Yejin and Harchaoui, Zaid}, booktitle = {NeurIPS}, - year = {2021} + year = {2021} +} + +@inproceedings{liu-etal:mauve-theory:neurips2021, + title={{Divergence Frontiers for Generative Models: Sample Complexity, Quantization Effects, and Frontier Integrals}}, + author={Liu, Lang and Pillutla, Krishna and Welleck, Sean and Oh, Sewoong and Choi, Yejin and Harchaoui, Zaid}, + booktitle={NeurIPS}, + year={2021} } ``` diff --git a/metrics/mauve/mauve.py b/metrics/mauve/mauve.py index fdacaa477..4f7b17a94 100644 --- a/metrics/mauve/mauve.py +++ b/metrics/mauve/mauve.py @@ -26,6 +26,13 @@ _CITATION = """\ + @article{pillutla-etal:mauve:jmlr2023, + title={{MAUVE Scores for Generative Models: Theory and Practice}}, + author={Pillutla, Krishna and Liu, Lang and Thickstun, John and Welleck, Sean and Swayamdipta, Swabha and Zellers, Rowan and Oh, Sewoong and Choi, Yejin and Harchaoui, Zaid}, + journal={JMLR}, + year={2023} +} + @inproceedings{pillutla-etal:mauve:neurips2021, title={{MAUVE: Measuring the Gap Between Neural Text and Human Text using Divergence Frontiers}}, author={Pillutla, Krishna and Swayamdipta, Swabha and Zellers, Rowan and Thickstun, John and Welleck, Sean and Choi, Yejin and Harchaoui, Zaid}, @@ -33,11 +40,11 @@ year = {2021} } -@article{pillutla-etal:mauve:arxiv2022, - title={{MAUVE Scores for Generative Models: Theory and Practice}}, - author={Pillutla, Krishna and Liu, Lang and Thickstun, John and Welleck, Sean and Swayamdipta, Swabha and Zellers, Rowan and Oh, Sewoong and Choi, Yejin and Harchaoui, Zaid}, - journal={arXiv Preprint}, - year={2022} +@inproceedings{liu-etal:mauve-theory:neurips2021, + title={{Divergence Frontiers for Generative Models: Sample Complexity, Quantization Effects, and Frontier Integrals}}, + author={Liu, Lang and Pillutla, Krishna and Welleck, Sean and Oh, Sewoong and Choi, Yejin and Harchaoui, Zaid}, + booktitle={NeurIPS}, + year={2021} } """ @@ -75,6 +82,8 @@ Returns: mauve: MAUVE score, a number between 0 and 1. Larger values indicate that P and Q are closer, frontier_integral: Frontier Integral, a number between 0 and 1. Smaller values indicate that P and Q are closer, + mauve_star: MAUVE score computed with Krichevsky-Trofimov smoothing, as advocated in [this paper](https://arxiv.org/pdf/2212.14578), + frontier_integral_star: Frontier Interal score computed with Krichevsky-Trofimov smoothing, as advocated in [this paper](https://arxiv.org/pdf/2212.14578), divergence_curve: a numpy.ndarray of shape (m, 2); plot it with matplotlib to view the divergence curve, p_hist: a discrete distribution, which is a quantized version of the text distribution p_text, q_hist: same as above, but with q_text.