Skip to content

Commit

Permalink
Convert games to h5
Browse files Browse the repository at this point in the history
  • Loading branch information
tasuki committed Mar 7, 2018
1 parent d69bb90 commit 9bc89ed
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.ipynb_checkpoints
*.pyc
data/records*.txt
data/records*.h5
env/
Empty file added munch/__init__.py
Empty file.
53 changes: 53 additions & 0 deletions munch/convert_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from mutable import Converter

import numpy as np
import h5py
import sys

data_dir = sys.argv[1]

c = Converter()

def add_to_set(dset, data, inx_prev):
inx_cur = inx_prev + len(data)
print(inx_prev, inx_cur)
dset[inx_prev:inx_cur] = data

return inx_cur

def convert_set(path, hf):
print()
print(path)
records = open(path)

games = records.readlines()
moves = sum([(record.count(";") + 1) for record in games])
print(len(games))
print(moves)

dset = hf.create_dataset("samples", shape=(moves, 9, 9, 9), dtype=np.uint8)

cur = 0
inx_prev = 0
data = []
for record in games:
cur += 1
data.extend(c.convert(record.strip()))
if cur % 1000 == 0:
inx_prev = add_to_set(dset, data, inx_prev)
data = []

add_to_set(dset, data, inx_prev)

def convert_all():
data_sets = [
"records-1-train",
"records-2-valid",
"records-3-test",
]

for ds in data_sets:
with h5py.File("%s%s.h5" % (data_dir, ds), "w") as hf:
convert_set("%s%s.txt" % (data_dir, ds), hf)

convert_all()
2 changes: 2 additions & 0 deletions munch/process_games.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ cat "$DATADIR"/records-all.txt | sort | uniq -u > "$DATADIR"/records-clean.txt

# create train/valid/test data sets
python split_records.py "$DATADIR"

python convert_records.py "$DATADIR"
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
jupyter==1.0.0
numpy==1.14.1
h5py==2.7.1
Keras==2.1.5

0 comments on commit 9bc89ed

Please sign in to comment.