From 671c9708534e9d0595fb01ca058f6253216e8bbb Mon Sep 17 00:00:00 2001 From: Vit Brunner Date: Tue, 6 Mar 2018 11:51:51 +0100 Subject: [PATCH] Add munching --- munch/README.md | 3 +++ munch/get_records.py | 8 ++++++++ munch/process_games.sh | 12 ++++++++++++ munch/split_records.py | 21 +++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 munch/README.md create mode 100644 munch/get_records.py create mode 100755 munch/process_games.sh create mode 100644 munch/split_records.py diff --git a/munch/README.md b/munch/README.md new file mode 100644 index 0000000..a470c59 --- /dev/null +++ b/munch/README.md @@ -0,0 +1,3 @@ +Reads the game records from json, chooses which ones to use, splits them into training, validation, and test sets. + +Just cd here and run `bash process_games.sh` diff --git a/munch/get_records.py b/munch/get_records.py new file mode 100644 index 0000000..19b23e1 --- /dev/null +++ b/munch/get_records.py @@ -0,0 +1,8 @@ +import json +import sys + +fname = sys.argv[1] +data = json.load(open(fname)) + +for game in data: + print(game["record"]) diff --git a/munch/process_games.sh b/munch/process_games.sh new file mode 100755 index 0000000..8b5ad0b --- /dev/null +++ b/munch/process_games.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +DATADIR="../data/" + +# get game records +python get_records.py "$DATADIR"/bga-games-info.json > "$DATADIR"/records-all.txt + +# remove rows that appear more than once +cat "$DATADIR"/records-all.txt | sort | uniq -u > "$DATADIR"/records-clean.txt + +# create train/valid/test data sets +python split_records.py "$DATADIR" diff --git a/munch/split_records.py b/munch/split_records.py new file mode 100644 index 0000000..8c32623 --- /dev/null +++ b/munch/split_records.py @@ -0,0 +1,21 @@ +import sys + +data_dir = sys.argv[1] + +train = open(data_dir + "/records-1-train.txt", "w") +valid = open(data_dir + "/records-2-valid.txt", "w") +test = open(data_dir + "/records-3-test.txt", "w") + +with open(data_dir + "/records-clean.txt") as f: + records = f.readlines() + + counter = 0 + for record in records: + counter += 1 + fifth = counter % 5 + 1 + if fifth == 5: + test.write(record) + elif fifth == 4: + valid.write(record) + else: + train.write(record)