Skip to content
This repository was archived by the owner on Aug 21, 2020. It is now read-only.

Commit

Permalink
Updates JOSS paper documents based on comments
Browse files Browse the repository at this point in the history
  • Loading branch information
zhampel committed Feb 5, 2019
1 parent 7e88ca4 commit 52808dd
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 32 deletions.
128 changes: 128 additions & 0 deletions docs/joss_paper/paper.bib
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,131 @@ @inproceedings{pytorch
booktitle={NIPS-W},
year={2017}
}



@online{att_faces,
author = {AT&T Laboratories Cambridge},
title = {The AT&T Database of Faces},
year = {2002},
url = {https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html},
urldate = {}
}

@misc{goodfellow,
title={Explaining and harnessing adversarial examples. CoRR (2015)},
author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian}
}

@inproceedings{carlini,
title={Adversarial examples are not easily detected: Bypassing ten detection methods},
author={Carlini, Nicholas and Wagner, David},
booktitle={Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security},
pages={3--14},
year={2017},
organization={ACM}
}

@inproceedings{tramer,
title={Stealing Machine Learning Models via Prediction APIs.},
author={Tram{\`e}r, Florian and Zhang, Fan and Juels, Ari and Reiter, Michael K and Ristenpart, Thomas},
booktitle={USENIX Security Symposium},
pages={601--618},
year={2016}
}


@article{VOiCES,
author = {Colleen Richey and
Mar{\'{\i}}a A. Barrios and
Zeb Armstrong and
Chris Bartels and
Horacio Franco and
Martin Graciarena and
Aaron Lawson and
Mahesh Kumar Nandwana and
Allen R. Stauffer and
Julien van Hout and
Paul Gamble and
Jeff Hetherly and
Cory Stephenson and
Karl Ni},
title = {Voices Obscured in Complex Environmental Settings {(VOICES)} corpus},
journal = {CoRR},
volume = {abs/1804.05053},
year = {2018},
url = {http://arxiv.org/abs/1804.05053},
archivePrefix = {arXiv},
eprint = {1804.05053},
timestamp = {Mon, 13 Aug 2018 16:49:06 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-05053},
bibsource = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LibriSpeech,
title={{Free English and Czech telephone speech corpus shared under the CC-BY-SA 3.0 license}},
author={Korvas, Mat\v{e}j and Pl\'{a}tek, Ond\v{r}ej and Du\v{s}ek, Ond\v{r}ej and \v{Z}ilka, Luk\'{a}\v{s} and Jur\v{c}\'{i}\v{c}ek, Filip},
booktitle={Proceedings of the Eigth International Conference on Language Resources and Evaluation (LREC 2014)},
pages={To Appear},
year={2014},
}


@mastersthesis{cifar,
author = {Krizhevsky, Alex},
citeulike-article-id = {7491128},
citeulike-linkout-0 = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
keywords = {learning, sparse},
posted-at = {2010-07-15 10:17:28},
priority = {2},
title = {{Learning Multiple Layers of Features from Tiny Images}},
url = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
year = {2009}
}


@article{imagenet,
title={ImageNet: A large-scale hierarchical image database},
author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
journal={2009 IEEE Conference on Computer Vision and Pattern Recognition},
year={2009},
pages={248-255}
}

@TechReport{lfw,
author = {Gary B. Huang and Manu Ramesh and Tamara Berg and
Erik Learned-Miller},
title = {Labeled Faces in the Wild: A Database for Studying
Face Recognition in Unconstrained Environments},
institution = {University of Massachusetts, Amherst},
year = 2007,
number = {07-49},
month = {October}
}


@incollection{sst,
title = {{Parsing With Compositional Vector Grammars}},
author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts},
booktitle = {{EMNLP}},
year = {2013}
}


@ARTICLE{distill_defense,
author = {{Papernot}, Nicolas and {McDaniel}, Patrick and {Wu}, Xi and {Jha},
Somesh and {Swami}, Ananthram},
title = "{Distillation as a Defense to Adversarial Perturbations against Deep Neural Networks}",
journal = {arXiv e-prints},
keywords = {Computer Science - Cryptography and Security, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
year = 2015,
month = Nov,
eid = {arXiv:1511.04508},
pages = {arXiv:1511.04508},
archivePrefix = {arXiv},
eprint = {1511.04508},
primaryClass = {cs.CR},
adsurl = {https://ui.adsabs.harvard.edu/\#abs/2015arXiv151104508P},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}
63 changes: 31 additions & 32 deletions docs/joss_paper/paper.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: 'Cyphercat: A Python Package for Reproduceably Evaluating Adversarial Robustness
title: 'Cyphercat: A Python Package for Reproduceably Evaluating Adversarial Robustness'
tags:
- Python
- machine learning
Expand Down Expand Up @@ -46,39 +46,38 @@ bibliography: paper.bib
# Summary

With the proliferation of machine learning in everyday applications,
research efforts have increasingly focused on understanding the vulnerabilities of
machine learning models to privacy attacks.
For example, this can involve extracting information regarding the defining parameters of a _target_ model
or inferring details of data samples used to train the model.
These types of attacks pave the way for nefarious agents to infer potentially private information
from the training data or to manipulate the intended use of a trained model,
for example by forcing the model to produce a desired output.
Fundamentally assessing model vulnerabilities to privacy attacks remains an open-ended challenge,
as current attack and defense tactics are studied on a case by case basis.
research efforts have increasingly focused on understanding security
vulnerabilities throughout the machine learning pipeline.
Attack capabilities at inference time elucidate how the model output
can be manipulated by having access to the training pipeline and
poisoning the training data, or by accessing the model and
manipulating images to fool the model [@goodfellow][@carlini].
Other attacks target machine learning as a service platforms,
extracting the defining parameters of a target model [@tramer].
Less work has focused on privacy attacks, were nefarious agents
can infer details of the training data from a targeted model [@mlleaks].
This has significant implications for user privacy and model sharing.
Fundamentally assessing model vulnerabilities to privacy attacks
remains an open-ended challenge, as current attack and defense
tactics are studied on a case by case basis.

Cyphercat is an extensible Python package for benchmarking privacy
attack and defense efficacy in a reproducible environment.
The Cyphercat application programming interface (API) allows users to test the
robustness of a specified target model against several well-documented privacy
attacks [@mlleaks][@fredrikson2015model], which extract details of the training data from the model,
including the option to assess defenses.
The API is built on the PyTorch [@pytorch] machine learning library and
provides access to well known image, audio, and text benchmark datasets used for machine learning applications.
The Cyphercat API includes the option to train on commonly used architectures,
with subsequent assessment of attack and defense performance.
The package also enables users to introduce custom datasets and model architectures.

``Cyphercat`` is an extensible Python package for benchmarking privacy attack and defense efficacy
in a reproduceable manner.
The ``Cyphercat`` application programming interface (API) allows users to test the robustness a specified
target model against several well-documented privacy attacks (such as those presented in [@mlleaks], [@fredrikson2015model])
that extract details of the training data from the model, with the option to assess defenses.
The API is based on the PyTorch [@pytorch] machine learning library, provides access to datasets
traditionally used for benchmarking machine learning models, and the option to train commonly used
architectures via the API, with subsequent assessment of attack and defense performance.
The package also permits users to introduce custom datasets in the image, audio, and text data type domains,
as well as custom architectures for target, attack, and defense models.
To use the API, a user must define a dataset, including data transformations,
and the desired architectures for the target model (the model being assessed for vulnerabilities)
and the attack model (the model used for generating an attack on the target model).
These are then fed into specified functions to initiate training, attacking and defending.
The source code for Cyphercat is available [here](https://github.com/Lab41/cyphercat/).


``Cyphercat`` is a flexible framework designed for machine learning practitioners to test model vulnerabilities
via various methods of attack and defense covering several data types.
Details regarding the ``Cyphercat`` API, its implementation relative to the Python ecosystem,
including further information on implemented datasets, attack and defense methods,
and performance metrics are found in the online documentation.
# Acknowledgements
The authors acknowledge support from
We also acknowledge the financial support provided by the
# References

0 comments on commit 52808dd

Please sign in to comment.