Skip to content

Commit

Permalink
fix numpy convert
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen committed May 16, 2014
1 parent a7f3d7e commit 2be3f6e
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 6 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (c) 2014 Tianqi Chen
Copyright (c) 2014 by Tianqi Chen and Contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 2 additions & 0 deletions python/example/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@
i += 1

csr = scipy.sparse.csr_matrix( (dat, (row,col)) )
print 'haha'
dtrain = xgb.DMatrix( csr )
print 'set label'
dtrain.set_label(labels)
evallist = [(dtest,'eval'), (dtrain,'train')]
bst = xgb.train( param, dtrain, num_round, evallist )
Expand Down
15 changes: 11 additions & 4 deletions python/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@ def ctypes2numpy( cptr, length ):
# data matrix used in xgboost
class DMatrix:
# constructor
def __init__(self, data=None, label=None):
def __init__(self, data=None, label=None, missing=0.0):
self.handle = xglib.XGDMatrixCreate()
if data == None:
return
if isinstance(data,str):
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)

xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
elif isinstance(data,scp.csr_matrix):
self.__init_from_csr(data)
elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
self.__init_from_npy2d(data, missing)
else:
try:
csr = scp.csr_matrix(data)
Expand All @@ -59,6 +60,12 @@ def __init_from_csr(self,csr):
( ctypes.c_uint * len(csr.indices) )(*csr.indices),
( ctypes.c_float * len(csr.data) )(*csr.data),
len(csr.indptr), len(csr.data) )
# convert data from numpy matrix
def __init_from_npy2d(self,mat,missing):
data = numpy.array( mat.reshape(mat.size), dtype='float32' )
xglib.XGDMatrixParseMat( self.handle,
data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
mat.shape[0], mat.shape[1], ctypes.c_float(missing) )
# destructor
def __del__(self):
xglib.XGDMatrixFree(self.handle)
Expand Down Expand Up @@ -103,7 +110,7 @@ def __getitem__(self, ridx):

class Booster:
"""learner class """
def __init__(self, params, cache=[]):
def __init__(self, params={}, cache=[]):
""" constructor, param: """
for d in cache:
assert isinstance(d,DMatrix)
Expand Down
29 changes: 29 additions & 0 deletions python/xgboost_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,28 @@ namespace xgboost{
for( size_t i = 0; i < nelem; ++ i ){
mat.row_data_[i] = XGEntry(indices[i], data[i]);
}
this->data.InitData();
this->init_col_ = true;
}

inline void ParseMat( const float *data,
size_t nrow,
size_t ncol,
float missing ){
xgboost::booster::FMatrixS &mat = this->data;
mat.Clear();
for( size_t i = 0; i < nrow; ++i, data += ncol ){
size_t nelem = 0;
for( size_t j = 0; j < ncol; ++j ){
if( data[j] != missing ){
mat.row_data_.push_back( XGEntry(j, data[j]) );
++ nelem;
}
}
mat.row_ptr_.push_back( mat.row_ptr_.back() + nelem );
}
this->data.InitData();
this->init_col_ = true;
}
inline void SetLabel( const float *label, size_t len ){
this->info.labels.resize( len );
Expand Down Expand Up @@ -163,6 +185,13 @@ extern "C"{
size_t nelem ){
static_cast<DMatrix*>(handle)->ParseCSR(indptr, indices, data, nindptr, nelem);
}
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing ){
static_cast<DMatrix*>(handle)->ParseMat(data, nrow, ncol, missing);
}
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
static_cast<DMatrix*>(handle)->SetLabel(label,len);
}
Expand Down
22 changes: 21 additions & 1 deletion python/xgboost_python.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,19 @@ extern "C"{
const float *data,
size_t nindptr,
size_t nelem );
/*!
* \brief set matrix content from data content
* \param handle a instance of data matrix
* \param data pointer to the data space
* \param nrow number of rows
* \param ncol number columns
* \param missing which value to represent missing value
*/
void XGDMatrixParseMat( void *handle,
const float *data,
size_t nrow,
size_t ncol,
float missing );
/*!
* \brief set label of the training matrix
* \param handle a instance of data matrix
Expand All @@ -74,9 +87,16 @@ extern "C"{
* \brief get label set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the row
* \return pointer to the label
*/
const float* XGDMatrixGetLabel( const void *handle, size_t* len );
/*!
* \brief get weight set from matrix
* \param handle a instance of data matrix
* \param len used to set result length
* \return pointer to the weight
*/
const float* XGDMatrixGetWeight( const void *handle, size_t* len );
/*!
* \brief clear all the records, including feature matrix and label
* \param handle a instance of data matrix
Expand Down

0 comments on commit 2be3f6e

Please sign in to comment.