Skip to content
This repository was archived by the owner on Aug 21, 2020. It is now read-only.

Commit

Permalink
Merge pull request #75 from zhampel/master
Browse files Browse the repository at this point in the history
Adds initial joss paper structure. Updates diagram with out datset fo…
  • Loading branch information
MaAleBarr authored Feb 20, 2019
2 parents a0d02aa + 4f4f166 commit 4029ae8
Show file tree
Hide file tree
Showing 5 changed files with 460 additions and 55 deletions.
2 changes: 1 addition & 1 deletion cyphercat/datadefs/cyphercat_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ def get_preload_split_fn(name=''):
fn = PRELOAD_SPLIT_FN_DICT[name]
return fn
else:
raise ValueError('Invalid test statistic, {}, entered. Must be '
raise ValueError('Invalid dataset, {}, entered. Must be '
'in {}'.format(name, PRELOAD_SPLIT_FN_DICT.keys()))
165 changes: 165 additions & 0 deletions docs/joss_paper/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@

@article{mlleaks,
author = {Ahmed Salem and
Yang Zhang and
Mathias Humbert and
Mario Fritz and
Michael Backes},
title = {ML-Leaks: Model and Data Independent Membership Inference Attacks
and Defenses on Machine Learning Models},
journal = {CoRR},
volume = {abs/1806.01246},
year = {2018},
url = {http://arxiv.org/abs/1806.01246},
archivePrefix = {arXiv},
eprint = {1806.01246},
timestamp = {Mon, 13 Aug 2018 16:47:26 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1806-01246},
bibsource = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{fredrikson2015model,
title={Model inversion attacks that exploit confidence information and basic countermeasures},
author={Fredrikson, Matt and Jha, Somesh and Ristenpart, Thomas},
booktitle={Proceedings of the 22nd ACM SIGSAC Conference on Computer and Communications Security},
pages={1322--1333},
year={2015},
organization={ACM}
}


@inproceedings{pytorch,
title={Automatic differentiation in PyTorch},
author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
booktitle={NIPS-W},
year={2017}
}



@online{att_faces,
author = {AT&T Laboratories Cambridge},
title = {The AT&T Database of Faces},
year = {2002},
url = {https://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html},
urldate = {}
}

@misc{goodfellow,
title={Explaining and harnessing adversarial examples. CoRR (2015)},
author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian}
}

@inproceedings{carlini,
title={Adversarial examples are not easily detected: Bypassing ten detection methods},
author={Carlini, Nicholas and Wagner, David},
booktitle={Proceedings of the 10th ACM Workshop on Artificial Intelligence and Security},
pages={3--14},
year={2017},
organization={ACM}
}

@inproceedings{tramer,
title={Stealing Machine Learning Models via Prediction APIs.},
author={Tram{\`e}r, Florian and Zhang, Fan and Juels, Ari and Reiter, Michael K and Ristenpart, Thomas},
booktitle={USENIX Security Symposium},
pages={601--618},
year={2016}
}


@article{VOiCES,
author = {Colleen Richey and
Mar{\'{\i}}a A. Barrios and
Zeb Armstrong and
Chris Bartels and
Horacio Franco and
Martin Graciarena and
Aaron Lawson and
Mahesh Kumar Nandwana and
Allen R. Stauffer and
Julien van Hout and
Paul Gamble and
Jeff Hetherly and
Cory Stephenson and
Karl Ni},
title = {Voices Obscured in Complex Environmental Settings {(VOICES)} corpus},
journal = {CoRR},
volume = {abs/1804.05053},
year = {2018},
url = {http://arxiv.org/abs/1804.05053},
archivePrefix = {arXiv},
eprint = {1804.05053},
timestamp = {Mon, 13 Aug 2018 16:49:06 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1804-05053},
bibsource = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LibriSpeech,
title={{Free English and Czech telephone speech corpus shared under the CC-BY-SA 3.0 license}},
author={Korvas, Mat\v{e}j and Pl\'{a}tek, Ond\v{r}ej and Du\v{s}ek, Ond\v{r}ej and \v{Z}ilka, Luk\'{a}\v{s} and Jur\v{c}\'{i}\v{c}ek, Filip},
booktitle={Proceedings of the Eigth International Conference on Language Resources and Evaluation (LREC 2014)},
pages={To Appear},
year={2014},
}


@mastersthesis{cifar,
author = {Krizhevsky, Alex},
citeulike-article-id = {7491128},
citeulike-linkout-0 = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
keywords = {learning, sparse},
posted-at = {2010-07-15 10:17:28},
priority = {2},
title = {{Learning Multiple Layers of Features from Tiny Images}},
url = {http://www.cs.toronto.edu/\~{}kriz/learning-features-2009-TR.pdf},
year = {2009}
}


@article{imagenet,
title={ImageNet: A large-scale hierarchical image database},
author={Jia Deng and Wei Dong and Richard Socher and Li-Jia Li and Kai Li and Li Fei-Fei},
journal={2009 IEEE Conference on Computer Vision and Pattern Recognition},
year={2009},
pages={248-255}
}

@TechReport{lfw,
author = {Gary B. Huang and Manu Ramesh and Tamara Berg and
Erik Learned-Miller},
title = {Labeled Faces in the Wild: A Database for Studying
Face Recognition in Unconstrained Environments},
institution = {University of Massachusetts, Amherst},
year = 2007,
number = {07-49},
month = {October}
}


@incollection{sst,
title = {{Parsing With Compositional Vector Grammars}},
author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher Manning and Andrew Ng and Christopher Potts},
booktitle = {{EMNLP}},
year = {2013}
}


@ARTICLE{distill_defense,
author = {{Papernot}, Nicolas and {McDaniel}, Patrick and {Wu}, Xi and {Jha},
Somesh and {Swami}, Ananthram},
title = "{Distillation as a Defense to Adversarial Perturbations against Deep Neural Networks}",
journal = {arXiv e-prints},
keywords = {Computer Science - Cryptography and Security, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Statistics - Machine Learning},
year = 2015,
month = Nov,
eid = {arXiv:1511.04508},
pages = {arXiv:1511.04508},
archivePrefix = {arXiv},
eprint = {1511.04508},
primaryClass = {cs.CR},
adsurl = {https://ui.adsabs.harvard.edu/\#abs/2015arXiv151104508P},
adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}
83 changes: 83 additions & 0 deletions docs/joss_paper/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---
title: 'Cyphercat: A Python Package for Reproduceably Evaluating Robustness Against Privacy Attacks'
tags:
- Python
- machine learning
- adversarial attacks
- model inversion
- model privacy
- model robustness
authors:
- name: Maria A. Barrios
orcid:
affiliation: "1"
email: [email protected]
- name: Paul Gamble
orcid:
affiliation: "1"
email: [email protected]
- name: Zigfried Hampel-Arias
orcid: 0000-0003-0253-9117
affiliation: "1"
email: [email protected]
- name: Nina Lopatina
orcid: 0000-0001-6844-4941
affiliation: "1"
email: [email protected]
- name: Michael Lomnitz
orcid: 0000-0001-5659-3501
affiliation: "1"
email: [email protected]
- name: Felipe A. Mejia
orcid: 0000-0001-6393-8408
affiliation: "1"
email: [email protected]
- name: Lucas Tindall
orcid: 0000-0003-1395-4818
affiliation: "1"
email: [email protected]
affiliations:
- name: Lab41 -- an InQTel Lab, Menlo Park, CA, USA
index: 1
date: DD MM 2019
bibliography: paper.bib
---

# Summary

With the proliferation of machine learning in everyday applications,
research efforts have increasingly focused on understanding security
vulnerabilities throughout the machine learning pipeline.
Attack capabilities at inference time elucidate how the model output
can be manipulated by having access to the training pipeline and
poisoning the training data, or by accessing the model and
manipulating images to fool the model [@goodfellow][@carlini].
Other attacks target machine learning as a service platforms,
extracting the defining parameters of a target model [@tramer].
Less work has focused on privacy attacks, were nefarious agents
can infer details of the training data from a targeted model [@mlleaks].
This has significant implications for user privacy and model sharing.
Fundamentally assessing model vulnerabilities to privacy attacks
remains an open-ended challenge, as current attack and defense
tactics are studied on a case by case basis.

Cyphercat is an extensible Python package for benchmarking privacy
attack and defense efficacy in a reproducible environment.
The Cyphercat application programming interface (API) allows users to test the
robustness of a specified target model against several well-documented privacy
attacks [@mlleaks][@fredrikson2015model], which aim to extract details of the training data from the model.
Also included is the option to further assess the efficacy of several implemented defense methods.
The API is built on the PyTorch [@pytorch] machine learning library and
provides access to well known image, audio, and text benchmark datasets used for machine learning applications.
The Cyphercat API includes the option to train on commonly used architectures,
with subsequent assessment of attack and defense performance.
The package also enables users to introduce custom datasets and model architectures.

To use the API, a user must define a dataset, including data transformations,
and the desired architectures for the target model (the model being assessed for vulnerabilities)
and the attack model (the model used for generating an attack on the target model).
These are then fed into specified functions to initiate training, attacking and defending.
The source code for Cyphercat is available [here](https://github.com/Lab41/cyphercat/).


# References
Loading

0 comments on commit 4029ae8

Please sign in to comment.