Skip to content

Commit

Permalink
dataset API
Browse files Browse the repository at this point in the history
  • Loading branch information
ebursztein committed Oct 28, 2021
1 parent f896e85 commit 3523a55
Show file tree
Hide file tree
Showing 10 changed files with 1,011 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ __pycache__/
*.prof
# C extensions
*.so
tmp/
algo_arch_implem_v1_train/

# Distribution / packaging
.Python
Expand Down
279 changes: 279 additions & 0 deletions notebooks/dataset_api.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from scaaml.io import Dataset\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# specify dummy data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"root_path = './'\n",
"architecture = 'arch'\n",
"implementation = 'implem'\n",
"algorithm = 'algo'\n",
"version = 1\n",
"minfo = {\n",
" \"trace1\": {\n",
" \"type\": \"power\",\n",
" \"len\": 1024,\n",
" }\n",
"}\n",
"\n",
"apinfo = {\n",
" \"key\": {\n",
" \"len\": 16,\n",
" \"max_val\": 256\n",
" }\n",
"}\n",
"chip_id = 1 # which chip this was captured on\n",
"comment= \"this is a test\" \n",
"purpose = \"train\"\n",
"example_per_shard = 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## generate fake data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"key = np.random.randint(0, 255, 16)\n",
"key2 = np.random.randint(0, 255, 16)\n",
"trace1 = np.random.rand(1024)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Creating dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"## init"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33m[Warning] Path exist, some files might be over-written\u001b[0m\n",
"\u001b[32mDataset path: algo_arch_implem_v1_train\u001b[0m\n"
]
}
],
"source": [
"ds = Dataset(root_path=root_path,\n",
" architecture=architecture,\n",
" implementation=implementation,\n",
" algorithm=algorithm,\n",
" version=version,\n",
" purpose=purpose,\n",
" comment=comment,\n",
" chip_id=chip_id,\n",
" examples_per_shard=example_per_shard,\n",
" measurements_info=minfo,\n",
" attack_points_info=apinfo)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## writing shard"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"train\n",
"defaultdict(<class 'int'>, {})\n"
]
}
],
"source": [
"ds.new_shard(key, 1, split='train')\n",
"ds.write_example({\"key\": key,\n",
" #\"sub_byte_in\": key\n",
" }, {\"trace1\": trace1})\n",
"ds.close_shard()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using a dataset"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"dataset_path = './algo_arch_implem_v1_train'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# display dataset info"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[36m[Dataset Summary]\u001b[0m\n",
"\u001b[33mInfo\u001b[0m\n",
"-------------- --------------\n",
"architecture arch\n",
"implementation implem\n",
"algorithm algo\n",
"version 1\n",
"chip_id 1\n",
"comment this is a test\n",
"purpose train\n",
"compression GZIP\n",
"-------------- --------------\n",
"\u001b[33m\n",
"Attack Points\u001b[0m\n",
"ap len max_val\n",
"---- ----- ---------\n",
"key 16 256\n",
"\u001b[35m\n",
"Measurements\u001b[0m\n",
"name type len\n",
"------ ------ -----\n",
"trace1 power 1024\n",
"\u001b[32m\n",
"Content\u001b[0m\n",
"split num_keys num_examples\n",
"------- ---------- --------------\n",
"train 1 1\n"
]
}
],
"source": [
"\n",
"Dataset.summary(dataset_path)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[35mreloading algo_arch_implem_v1_train\\info.json\u001b[0m\n",
"\u001b[32mDataset path: algo_arch_implem_v1_train\\algo_arch_implem_v1_train\u001b[0m\n"
]
}
],
"source": [
"trace_len = 1024\n",
"\n",
"train_ds, inputs, outputs = Dataset.as_tfdataset(dataset_path, \n",
" split='train', \n",
" attack_points='key',\n",
" traces='trace1',\n",
" traces_max_len=trace_len,\n",
" bytes=1,\n",
" shards=1)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"x -> dict_keys(['trace1']) (32, 32, 32)\n",
"y -> dict_keys(['key_1']) (32, 256)\n"
]
}
],
"source": [
"for batch in train_ds.take(1):\n",
" print('x ->', batch[0].keys(), batch[0]['trace1'].shape)\n",
" print('y ->', batch[1].keys(), batch[1]['key_1'].shape)"
]
}
],
"metadata": {
"interpreter": {
"hash": "f4b45c82ab6242cd8608f401505676f0fc37e99e72925447dbc1e4dcd37ea533"
},
"kernelspec": {
"display_name": "Python 3.8.10 64-bit ('venv': venv)",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Empty file added scaaml/io/README.md
Empty file.
1 change: 1 addition & 0 deletions scaaml/io/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .dataset import Dataset # noqa
Loading

0 comments on commit 3523a55

Please sign in to comment.