Merge pull request #75 from zhampel/master

Adds initial joss paper structure. Updates diagram with out datset fo…
Lab41 · Feb 20, 2019 · 4029ae8 · 4029ae8
2 parents a0d02aa + 4f4f166
commit 4029ae8
Show file tree

Hide file tree

Showing 5 changed files with 460 additions and 55 deletions.
diff --git a/cyphercat/datadefs/cyphercat_dataset.py b/cyphercat/datadefs/cyphercat_dataset.py
@@ -73,5 +73,5 @@ def get_preload_split_fn(name=''):
         fn = PRELOAD_SPLIT_FN_DICT[name]
         return fn
     else:
-        raise ValueError('Invalid test statistic, {}, entered. Must be '
+        raise ValueError('Invalid dataset, {}, entered. Must be '
                          'in {}'.format(name, PRELOAD_SPLIT_FN_DICT.keys()))
diff --git a/docs/joss_paper/paper.bib b/docs/joss_paper/paper.bib
@@ -0,0 +1,165 @@
+
+@article{mlleaks,
+  author    = {Ahmed Salem and
+               Yang Zhang and
+               Mathias Humbert and
+               Mario Fritz and
+               Michael Backes},
+  title     = {ML-Leaks: Model and Data Independent Membership Inference Attacks
+               and Defenses on Machine Learning Models},
+  journal   = {CoRR},
+  volume    = {abs/1806.01246},
+  year      = {2018},
+  url       = {http://arxiv.org/abs/1806.01246},
+  archivePrefix = {arXiv},
+  eprint    = {1806.01246},
+  timestamp = {Mon, 13 Aug 2018 16:47:26 +0200},
+  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1806-01246},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+
+@inproceedings{fredrikson2015model,
+  title={Model inversion attacks that exploit confidence information and basic countermeasures},
+  author={Fredrikson, Matt and Jha, Somesh and Ristenpart, Thomas},
+  booktitle={Proceedings of the 22nd ACM SIGSAC Conference on Computer and Communications Security},
+  pages={1322--1333},
+  year={2015},
+  organization={ACM}
+}
+
+
+@inproceedings{pytorch,
+  title={Automatic differentiation in PyTorch},
+  author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
+  booktitle={NIPS-W},
+  year={2017}
+}
+
+
+
+@online{att_faces,
+  author = {AT&T Laboratories Cambridge},
+  title = {The AT&T Database of Faces},
+  year = {2002},
+  url = {https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html},
+  urldate = {}
+}
+
+@misc{goodfellow,
+  title={Explaining and harnessing adversarial examples. CoRR (2015)},
+  author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian}
+}
+
+@inproceedings{carlini,
+  title={Adversarial examples are not easily detected: Bypassing ten detection methods},
+  author={Carlini, Nicholas and Wagner, David},
+  booktitle={Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security},
+  pages={3--14},
+  year={2017},
+  organization={ACM}
+}
+
+@inproceedings{tramer,
+  title={Stealing Machine Learning Models via Prediction APIs.},
+  author={Tram{\`e}r, Florian and Zhang, Fan and Juels, Ari and Reiter, Michael K and Ristenpart, Thomas},
+  booktitle={USENIX Security Symposium},
+  pages={601--618},
+  year={2016}
+}
+
+
+@article{VOiCES,
+  author    = {Colleen Richey and
+               Mar{\'{\i}}a A. Barrios and
+               Zeb Armstrong and
+               Chris Bartels and
+               Horacio Franco and
+               Martin Graciarena and
+               Aaron Lawson and
+               Mahesh Kumar Nandwana and
+               Allen R. Stauffer and
+               Julien van Hout and
+               Paul Gamble and
+               Jeff Hetherly and
+               Cory Stephenson and
+               Karl Ni},
+  title     = {Voices Obscured in Complex Environmental Settings {(VOICES)} corpus},
+  journal   = {CoRR},
+  volume    = {abs/1804.05053},
+  year      = {2018},
+  url       = {http://arxiv.org/abs/1804.05053},
+  archivePrefix = {arXiv},
+  eprint    = {1804.05053},
+  timestamp = {Mon, 13 Aug 2018 16:49:06 +0200},
+  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1804-05053},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+
+@inproceedings{LibriSpeech,
+  title={{Free English and Czech telephone speech corpus shared under the CC-BY-SA 3.0 license}},
+  author={Korvas, Mat\v{e}j and Pl\'{a}tek, Ond\v{r}ej and Du\v{s}ek, Ond\v{r}ej and \v{Z}ilka, Luk\'{a}\v{s} and Jur\v{c}\'{i}\v{c}ek, Filip},
+  booktitle={Proceedings of the Eigth International Conference on Language Resources and Evaluation (LREC 2014)},
+  pages={To Appear},
+  year={2014},
+}
+
+
+@mastersthesis{cifar,
+    author = {Krizhevsky, Alex},
+    citeulike-article-id = {7491128},
+    citeulike-linkout-0 = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
+    keywords = {learning, sparse},
+    posted-at = {2010-07-15 10:17:28},
+    priority = {2},
+    title = {{Learning Multiple Layers of Features from Tiny Images}},
+    url = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
+    year = {2009}
+}
+
+
+@article{imagenet,
+  title={ImageNet: A large-scale hierarchical image database},
+  author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
+  journal={2009 IEEE Conference on Computer Vision and Pattern Recognition},
+  year={2009},
+  pages={248-255}
+}
+
+@TechReport{lfw,
+  author =       {Gary B. Huang and Manu Ramesh and Tamara Berg and 
+                  Erik Learned-Miller},
+  title =        {Labeled Faces in the Wild: A Database for Studying 
+                  Face Recognition in Unconstrained Environments},
+  institution =  {University of Massachusetts, Amherst},
+  year =         2007,
+  number =       {07-49},
+  month =        {October}
+}
+
+
+@incollection{sst,
+    title = {{Parsing With Compositional Vector Grammars}},
+    author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts},
+    booktitle = {{EMNLP}},
+    year = {2013}
+}
+
+
+@ARTICLE{distill_defense,
+       author = {{Papernot}, Nicolas and {McDaniel}, Patrick and {Wu}, Xi and {Jha},
+        Somesh and {Swami}, Ananthram},
+        title = "{Distillation as a Defense to Adversarial Perturbations against Deep Neural Networks}",
+      journal = {arXiv e-prints},
+     keywords = {Computer Science - Cryptography and Security, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
+         year = 2015,
+        month = Nov,
+          eid = {arXiv:1511.04508},
+        pages = {arXiv:1511.04508},
+archivePrefix = {arXiv},
+       eprint = {1511.04508},
+ primaryClass = {cs.CR},
+       adsurl = {https://ui.adsabs.harvard.edu/\#abs/2015arXiv151104508P},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
diff --git a/docs/joss_paper/paper.md b/docs/joss_paper/paper.md
@@ -0,0 +1,83 @@
+---
+title: 'Cyphercat: A Python Package for Reproduceably Evaluating Robustness Against Privacy Attacks'
+tags:
+  - Python
+  - machine learning
+  - adversarial attacks
+  - model inversion
+  - model privacy
+  - model robustness
+authors:
+  - name: Maria A. Barrios
+    orcid:
+    affiliation: "1"
+    email: [email protected]
+  - name: Paul Gamble
+    orcid:
+    affiliation: "1"
+    email: [email protected]
+  - name: Zigfried Hampel-Arias
+    orcid: 0000-0003-0253-9117
+    affiliation: "1"
+    email: [email protected]
+  - name: Nina Lopatina
+    orcid: 0000-0001-6844-4941
+    affiliation: "1"
+    email: [email protected]
+  - name: Michael Lomnitz
+    orcid: 0000-0001-5659-3501
+    affiliation: "1"
+    email: [email protected]
+  - name: Felipe A. Mejia
+    orcid: 0000-0001-6393-8408
+    affiliation: "1"
+    email: [email protected]
+  - name: Lucas Tindall
+    orcid: 0000-0003-1395-4818
+    affiliation: "1"
+    email: [email protected]
+affiliations:
+ - name: Lab41 -- an InQTel Lab, Menlo Park, CA, USA
+   index: 1
+date: DD MM 2019
+bibliography: paper.bib
+---
+
+# Summary
+
+With the proliferation of machine learning in everyday applications,
+research efforts have increasingly focused on understanding security
+vulnerabilities throughout the machine learning pipeline. 
+Attack capabilities at inference time elucidate how the model output 
+can be manipulated by having access to the training pipeline and
+poisoning the training data, or by accessing the model and
+manipulating images to fool the model [@goodfellow][@carlini]. 
+Other attacks target machine learning as a service platforms, 
+extracting the defining parameters of a target model [@tramer]. 
+Less work has focused on privacy attacks, were nefarious agents 
+can infer details of the training data from a targeted model [@mlleaks]. 
+This has significant implications for user privacy and model sharing.
+Fundamentally assessing model vulnerabilities to privacy attacks
+remains an open-ended challenge, as current attack and defense
+tactics are studied on a case by case basis.
+
+Cyphercat is an extensible Python package for benchmarking privacy
+attack and defense efficacy in a reproducible environment.
+The Cyphercat application programming interface (API) allows users to test the 
+robustness of a specified target model against several well-documented privacy
+attacks [@mlleaks][@fredrikson2015model], which aim to extract details of the training data from the model.
+Also included is the option to further assess the efficacy of several implemented defense methods.
+The API is built on the PyTorch [@pytorch] machine learning library and 
+provides access to well known image, audio, and text benchmark datasets used for machine learning applications.
+The Cyphercat API includes the option to train on commonly used architectures, 
+with subsequent assessment of attack and defense performance.
+The package also enables users to introduce custom datasets and model architectures.
+
+To use the API, a user must define a dataset, including data transformations,
+and the desired architectures for the target model (the model being assessed for vulnerabilities)
+and the attack model (the model used for generating an attack on the target model).
+These are then fed into specified functions to initiate training, attacking and defending.
+The source code for Cyphercat is available [here](https://github.com/Lab41/cyphercat/).
+
+
+# References