Skip to content

Commit

Permalink
DataBatch support gpu (apache#2797)
Browse files Browse the repository at this point in the history
* DataBatch support gpu

* DataBatch support gpu and storage data use NDArray

* DataBatch support gpu and storage data use NDArray

* DataBatch support gpu and storage data use NDArray

* fix DataBatch init error and support shuffer
  • Loading branch information
cjfan authored and piiswrong committed Jul 23, 2016
1 parent fde63e8 commit a1b404c
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions python/mxnet/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .base import check_call, ctypes2docstring
from .ndarray import NDArray
from .ndarray import array
from .ndarray import concatenate


class DataBatch(object):
Expand Down Expand Up @@ -307,11 +308,11 @@ def _init_data(data, allow_empty, default_name):
raise TypeError("Input must be NDArray, numpy.ndarray, " + \
"a list of them or dict with them as values")
for k, v in data.items():
if isinstance(v, NDArray):
data[k] = v.asnumpy()
for k, v in data.items():
if not isinstance(v, np.ndarray):
raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) + \
if not isinstance(v, NDArray):
try:
data[k] = array(v)
except:
raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) + \
"should be NDArray or numpy.ndarray")

return list(data.items())
Expand Down Expand Up @@ -348,8 +349,8 @@ def __init__(self, data, label=None, batch_size=1, shuffle=False, last_batch_han
if shuffle:
idx = np.arange(self.data[0][1].shape[0])
np.random.shuffle(idx)
self.data = [(k, v[idx]) for k, v in self.data]
self.label = [(k, v[idx]) for k, v in self.label]
self.data = [(k, array(v.asnumpy()[idx], v.context)) for k, v in self.data]
self.label = [(k, array(v.asnumpy()[idx], v.context)) for k, v in self.label]

self.data_list = [x[1] for x in self.data] + [x[1] for x in self.label]
self.num_source = len(self.data_list)
Expand Down Expand Up @@ -411,11 +412,10 @@ def _getdata(self, data_source):
"""Load data from underlying arrays, internal use only"""
assert(self.cursor < self.num_data), "DataIter needs reset."
if self.cursor + self.batch_size <= self.num_data:
return [array(x[1][self.cursor:self.cursor+self.batch_size]) for x in data_source]
return [x[1][self.cursor:self.cursor+self.batch_size] for x in data_source]
else:
pad = self.batch_size - self.num_data + self.cursor
return [array(np.concatenate((x[1][self.cursor:], x[1][:pad]),
axis=0)) for x in data_source]
return [concatenate([x[1][self.cursor:], x[1][:pad]]) for x in data_source]

def getdata(self):
return self._getdata(self.data)
Expand Down

0 comments on commit a1b404c

Please sign in to comment.