Updates JOSS paper documents based on comments

Lab41 · Feb 5, 2019 · 52808dd · 52808dd
1 parent 7e88ca4
commit 52808dd
Show file tree

Hide file tree

Showing 2 changed files with 159 additions and 32 deletions.
diff --git a/docs/joss_paper/paper.bib b/docs/joss_paper/paper.bib
@@ -35,3 +35,131 @@ @inproceedings{pytorch
   booktitle={NIPS-W},
   year={2017}
 }
+
+
+
+@online{att_faces,
+  author = {AT&T Laboratories Cambridge},
+  title = {The AT&T Database of Faces},
+  year = {2002},
+  url = {https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html},
+  urldate = {}
+}
+
+@misc{goodfellow,
+  title={Explaining and harnessing adversarial examples. CoRR (2015)},
+  author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian}
+}
+
+@inproceedings{carlini,
+  title={Adversarial examples are not easily detected: Bypassing ten detection methods},
+  author={Carlini, Nicholas and Wagner, David},
+  booktitle={Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security},
+  pages={3--14},
+  year={2017},
+  organization={ACM}
+}
+
+@inproceedings{tramer,
+  title={Stealing Machine Learning Models via Prediction APIs.},
+  author={Tram{\`e}r, Florian and Zhang, Fan and Juels, Ari and Reiter, Michael K and Ristenpart, Thomas},
+  booktitle={USENIX Security Symposium},
+  pages={601--618},
+  year={2016}
+}
+
+
+@article{VOiCES,
+  author    = {Colleen Richey and
+               Mar{\'{\i}}a A. Barrios and
+               Zeb Armstrong and
+               Chris Bartels and
+               Horacio Franco and
+               Martin Graciarena and
+               Aaron Lawson and
+               Mahesh Kumar Nandwana and
+               Allen R. Stauffer and
+               Julien van Hout and
+               Paul Gamble and
+               Jeff Hetherly and
+               Cory Stephenson and
+               Karl Ni},
+  title     = {Voices Obscured in Complex Environmental Settings {(VOICES)} corpus},
+  journal   = {CoRR},
+  volume    = {abs/1804.05053},
+  year      = {2018},
+  url       = {http://arxiv.org/abs/1804.05053},
+  archivePrefix = {arXiv},
+  eprint    = {1804.05053},
+  timestamp = {Mon, 13 Aug 2018 16:49:06 +0200},
+  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1804-05053},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+
+@inproceedings{LibriSpeech,
+  title={{Free English and Czech telephone speech corpus shared under the CC-BY-SA 3.0 license}},
+  author={Korvas, Mat\v{e}j and Pl\'{a}tek, Ond\v{r}ej and Du\v{s}ek, Ond\v{r}ej and \v{Z}ilka, Luk\'{a}\v{s} and Jur\v{c}\'{i}\v{c}ek, Filip},
+  booktitle={Proceedings of the Eigth International Conference on Language Resources and Evaluation (LREC 2014)},
+  pages={To Appear},
+  year={2014},
+}
+
+
+@mastersthesis{cifar,
+    author = {Krizhevsky, Alex},
+    citeulike-article-id = {7491128},
+    citeulike-linkout-0 = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
+    keywords = {learning, sparse},
+    posted-at = {2010-07-15 10:17:28},
+    priority = {2},
+    title = {{Learning Multiple Layers of Features from Tiny Images}},
+    url = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
+    year = {2009}
+}
+
+
+@article{imagenet,
+  title={ImageNet: A large-scale hierarchical image database},
+  author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
+  journal={2009 IEEE Conference on Computer Vision and Pattern Recognition},
+  year={2009},
+  pages={248-255}
+}
+
+@TechReport{lfw,
+  author =       {Gary B. Huang and Manu Ramesh and Tamara Berg and 
+                  Erik Learned-Miller},
+  title =        {Labeled Faces in the Wild: A Database for Studying 
+                  Face Recognition in Unconstrained Environments},
+  institution =  {University of Massachusetts, Amherst},
+  year =         2007,
+  number =       {07-49},
+  month =        {October}
+}
+
+
+@incollection{sst,
+    title = {{Parsing With Compositional Vector Grammars}},
+    author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts},
+    booktitle = {{EMNLP}},
+    year = {2013}
+}
+
+
+@ARTICLE{distill_defense,
+       author = {{Papernot}, Nicolas and {McDaniel}, Patrick and {Wu}, Xi and {Jha},
+        Somesh and {Swami}, Ananthram},
+        title = "{Distillation as a Defense to Adversarial Perturbations against Deep Neural Networks}",
+      journal = {arXiv e-prints},
+     keywords = {Computer Science - Cryptography and Security, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
+         year = 2015,
+        month = Nov,
+          eid = {arXiv:1511.04508},
+        pages = {arXiv:1511.04508},
+archivePrefix = {arXiv},
+       eprint = {1511.04508},
+ primaryClass = {cs.CR},
+       adsurl = {https://ui.adsabs.harvard.edu/\#abs/2015arXiv151104508P},
+      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
diff --git a/docs/joss_paper/paper.md b/docs/joss_paper/paper.md
@@ -1,5 +1,5 @@
 ---
-title: 'Cyphercat: A Python Package for Reproduceably Evaluating Adversarial Robustness
+title: 'Cyphercat: A Python Package for Reproduceably Evaluating Adversarial Robustness'
 tags:
   - Python
   - machine learning
@@ -46,39 +46,38 @@ bibliography: paper.bib
 # Summary
 
 With the proliferation of machine learning in everyday applications,
-research efforts have increasingly focused on understanding the vulnerabilities of
-machine learning models to privacy attacks.
-For example, this can involve extracting information regarding the defining parameters of a _target_ model
-or inferring details of data samples used to train the model.
-These types of attacks pave the way for nefarious agents to infer potentially private information
-from the training data or to manipulate the intended use of a trained model, 
-for example by forcing the model to produce a desired output.
-Fundamentally assessing model vulnerabilities to privacy attacks remains an open-ended challenge,
-as current attack and defense tactics are studied on a case by case basis.
+research efforts have increasingly focused on understanding security
+vulnerabilities throughout the machine learning pipeline. 
+Attack capabilities at inference time elucidate how the model output 
+can be manipulated by having access to the training pipeline and
+poisoning the training data, or by accessing the model and
+manipulating images to fool the model [@goodfellow][@carlini]. 
+Other attacks target machine learning as a service platforms, 
+extracting the defining parameters of a target model [@tramer]. 
+Less work has focused on privacy attacks, were nefarious agents 
+can infer details of the training data from a targeted model [@mlleaks]. 
+This has significant implications for user privacy and model sharing.
+Fundamentally assessing model vulnerabilities to privacy attacks
+remains an open-ended challenge, as current attack and defense
+tactics are studied on a case by case basis.
 
+Cyphercat is an extensible Python package for benchmarking privacy
+attack and defense efficacy in a reproducible environment.
+The Cyphercat application programming interface (API) allows users to test the 
+robustness of a specified target model against several well-documented privacy
+attacks [@mlleaks][@fredrikson2015model], which extract details of the training data from the model, 
+including the option to assess defenses.
+The API is built on the PyTorch [@pytorch] machine learning library and 
+provides access to well known image, audio, and text benchmark datasets used for machine learning applications.
+The Cyphercat API includes the option to train on commonly used architectures, 
+with subsequent assessment of attack and defense performance.
+The package also enables users to introduce custom datasets and model architectures.
 
-``Cyphercat`` is an extensible Python package for benchmarking privacy attack and defense efficacy
-in a reproduceable manner.
-The ``Cyphercat`` application programming interface (API) allows users to test the robustness a specified 
-target model against several well-documented privacy attacks (such as those presented in [@mlleaks], [@fredrikson2015model])
-that extract details of the training data from the model, with the option to assess defenses.
-The API is based on the PyTorch [@pytorch] machine learning library, provides access to datasets 
-traditionally used for benchmarking machine learning models, and the option to train commonly used 
-architectures via the API, with subsequent assessment of attack and defense performance.
-The package also permits users to introduce custom datasets in the image, audio, and text data type domains,
-as well as custom architectures for target, attack, and defense models.
+To use the API, a user must define a dataset, including data transformations,
+and the desired architectures for the target model (the model being assessed for vulnerabilities)
+and the attack model (the model used for generating an attack on the target model).
+These are then fed into specified functions to initiate training, attacking and defending.
+The source code for Cyphercat is available [here](https://github.com/Lab41/cyphercat/).
 
 
-``Cyphercat`` is a flexible framework designed for machine learning practitioners to test model vulnerabilities
-via various methods of attack and defense covering several data types.
-Details regarding the ``Cyphercat`` API, its implementation relative to the Python ecosystem, 
-including further information on implemented datasets, attack and defense methods, 
-and performance metrics are found in the online documentation.
-
-
-# Acknowledgements
-
-The authors acknowledge support from 
-We also acknowledge the financial support provided by the 
-
 # References