-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathbuffer.py
46 lines (38 loc) · 1.01 KB
/
buffer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import numpy as np
import random
from collections import deque
class MemoryBuffer:
def __init__(self, size):
self.buffer = deque(maxlen=size)
self.maxSize = size
self.len = 0
def sample(self, count):
"""
samples a random batch from the replay memory buffer
:param count: batch size
:return: batch (numpy array)
"""
batch = []
count = min(count, self.len)
batch = random.sample(self.buffer, count)
s_arr = np.float32([arr[0] for arr in batch])
a_arr = np.float32([arr[1] for arr in batch])
r_arr = np.float32([arr[2] for arr in batch])
s1_arr = np.float32([arr[3] for arr in batch])
return s_arr, a_arr, r_arr, s1_arr
def len(self):
return self.len
def add(self, s, a, r, s1):
"""
adds a particular transaction in the memory buffer
:param s: current state
:param a: action taken
:param r: reward received
:param s1: next state
:return:
"""
transition = (s,a,r,s1)
self.len += 1
if self.len > self.maxSize:
self.len = self.maxSize
self.buffer.append(transition)