-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenerate.py
92 lines (67 loc) · 2.44 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import numpy as np
S = 50 # Number of states
A = 2 # Number of actions
outputFileNumber = 100 # Number of files to generate
def write_mdp(filename, S, A, R, T, gamma):
"""Function to write MDP file based on S, A, R, T and gamma"""
mdpfile = open(filename, 'w')
# Write S and A
mdpfile.write(str(S) + '\n')
mdpfile.write(str(A) + '\n')
# Write Reward function
for s in range(S):
for a in range(A):
for sPrime in range(S):
mdpfile.write(str.format("{0:.6f}",
R[s][a][sPrime]) + "\t".rstrip('\n'))
mdpfile.write("\n")
# Write Transition function
for s in range(S):
for a in range(A):
for sPrime in range(S):
mdpfile.write(str.format("{0:.6f}",
T[s][a][sPrime]) + "\t".rstrip('\n'))
mdpfile.write("\n")
# Write gamma
mdpfile.write(str.format("{0:.2f}", gamma))
mdpfile.write("\n")
mdpfile.close()
return
# Make specified directory if it does not exist
if not os.path.exists('generated'):
os.makedirs('generated')
seeds = []
# For each file to be generated
for i in range(outputFileNumber):
# Find a unique random seed
while 1:
seed = np.random.randint(10000)
if seed not in seeds:
seeds.append(seed)
np.random.seed(seed)
break
# Construct the filename
fn = './generated/newMDP' + str.format("{0:02d}", i) + '.txt'
# Initialize transition and reward arrays
T = np.zeros((S, A, S))
R = np.zeros((S, A, S))
# For each initial state and each action
for s in range(S):
for a in range(A):
# Generate a random vector of 0s and 1s
# corresponding to each sPrime
while 1:
k = np.sum([np.random.choice([0, 1]) for i in range(S)])
# Making sure that there is atleat one transition exists
if np.sum(k) != 0:
break
# Find transition probabilities s.t. their sum is 1
T[s][a][:] = k * np.random.random(S)
T[s][a][:] = T[s][a][:] / np.sum(T[s][a][:])
# Find rewards between -1 and 1
R[s][a] = (k * (2 * np.random.random(S) - np.ones(S)))
# Generate a random gamma
g = np.random.uniform(0, 1)
# Write S, A, R, T and gamma to file
write_mdp(fn, S, A, R, T, g)