Skip to content

Commit

Permalink
Implement a DMatrix Proxy. (dmlc#5803)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored and wbo4958 committed Jun 30, 2020
1 parent 4cfd79c commit 8b4f137
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 0 deletions.
7 changes: 7 additions & 0 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,13 @@ class DMatrix {
DMatrix() = default;
/*! \brief meta information of the dataset */
virtual MetaInfo& Info() = 0;
virtual void SetInfo(const char *key, const void *dptr, DataType dtype,
size_t num) {
this->Info().SetInfo(key, dptr, dtype, num);
}
virtual void SetInfo(const char* key, std::string const& interface_str) {
this->Info().SetInfo(key, interface_str);
}
/*! \brief meta information of the dataset */
virtual const MetaInfo& Info() const = 0;
/**
Expand Down
24 changes: 24 additions & 0 deletions src/data/proxy_dmatrix.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*!
* Copyright 2020 XGBoost contributors
*/
#include "proxy_dmatrix.h"
#include "device_adapter.cuh"

namespace xgboost {
namespace data {

void DMatrixProxy::FromCudaColumnar(std::string interface_str) {
std::shared_ptr<data::CudfAdapter> adapter {new data::CudfAdapter(interface_str)};
auto const& value = adapter->Value();
this->batch_ = adapter;
device_ = adapter->DeviceIdx();
}

void DMatrixProxy::FromCudaArray(std::string interface_str) {
std::shared_ptr<CupyAdapter> adapter(new CupyAdapter(interface_str));
this->batch_ = adapter;
device_ = adapter->DeviceIdx();
}

} // namespace data
} // namespace xgboost
104 changes: 104 additions & 0 deletions src/data/proxy_dmatrix.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*!
* Copyright 2020 XGBoost contributors
*/
#ifndef XGBOOST_DATA_PROXY_DMATRIX_H_
#define XGBOOST_DATA_PROXY_DMATRIX_H_

#include <dmlc/any.h>

#include <memory>
#include <string>
#include <utility>

#include "xgboost/data.h"
#include "xgboost/generic_parameters.h"
#include "xgboost/c_api.h"
#include "adapter.h"

namespace xgboost {
namespace data {
/*
* \brief A proxy to external iterator.
*/
template <typename ResetFn, typename NextFn>
class DataIterProxy {
DataIterHandle iter_;
ResetFn* reset_;
NextFn* next_;

public:
DataIterProxy(DataIterHandle iter, ResetFn* reset, NextFn* next) :
iter_{iter},
reset_{reset}, next_{next} {}

bool Next() {
return next_(iter_);
}
void Reset() {
reset_(iter_);
}
};

/*
* \brief A proxy of DMatrix used by external iterator.
*/
class DMatrixProxy : public DMatrix {
MetaInfo info_;
dmlc::any batch_;
int32_t device_ { xgboost::GenericParameter::kCpuId };

#if defined(XGBOOST_USE_CUDA)
void FromCudaColumnar(std::string interface_str);
void FromCudaArray(std::string interface_str);
#endif // defined(XGBOOST_USE_CUDA)

public:
int DeviceIdx() const { return device_; }

void SetData(char const* c_interface) {
common::AssertGPUSupport();
#if defined(XGBOOST_USE_CUDA)
std::string interface_str = c_interface;
Json json_array_interface =
Json::Load({interface_str.c_str(), interface_str.size()});
if (IsA<Array>(json_array_interface)) {
this->FromCudaColumnar(interface_str);
} else {
this->FromCudaArray(interface_str);
}
#endif // defined(XGBOOST_USE_CUDA)
}

MetaInfo& Info() override { return info_; }
MetaInfo const& Info() const override { return info_; }
bool SingleColBlock() const override { return true; }
bool EllpackExists() const override { return true; }
bool SparsePageExists() const override { return false; }
DMatrix *Slice(common::Span<int32_t const> ridxs) override {
LOG(FATAL) << "Slicing DMatrix is not supported for Proxy DMatrix.";
return nullptr;
}
BatchSet<SparsePage> GetRowBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<SparsePage>(BatchIterator<SparsePage>(nullptr));
}
BatchSet<CSCPage> GetColumnBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<CSCPage>(BatchIterator<CSCPage>(nullptr));
}
BatchSet<SortedCSCPage> GetSortedColumnBatches() override {
LOG(FATAL) << "Not implemented.";
return BatchSet<SortedCSCPage>(BatchIterator<SortedCSCPage>(nullptr));
}
BatchSet<EllpackPage> GetEllpackBatches(const BatchParam& param) override {
LOG(FATAL) << "Not implemented.";
return BatchSet<EllpackPage>(BatchIterator<EllpackPage>(nullptr));
}

dmlc::any Adapter() const {
return batch_;
}
};
} // namespace data
} // namespace xgboost
#endif // XGBOOST_DATA_PROXY_DMATRIX_H_
46 changes: 46 additions & 0 deletions tests/cpp/data/test_proxy_dmatrix.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <gtest/gtest.h>
#include <xgboost/host_device_vector.h>
#include <memory>
#include "../helpers.h"
#include "../../../src/data/device_adapter.cuh"
#include "../../../src/data/proxy_dmatrix.h"

namespace xgboost {
namespace data {
TEST(ProxyDMatrix, Basic) {
constexpr size_t kRows{100}, kCols{100};
HostDeviceVector<float> storage;
auto data = RandomDataGenerator(kRows, kCols, 0.5)
.Device(0)
.GenerateArrayInterface(&storage);
std::vector<HostDeviceVector<float>> label_storage(1);
auto labels = RandomDataGenerator(kRows, 1, 0)
.Device(0)
.GenerateColumnarArrayInterface(&label_storage);

DMatrixProxy proxy;
proxy.SetData(data.c_str());
proxy.SetInfo("label", labels.c_str());

ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CupyAdapter>));
ASSERT_EQ(proxy.Info().labels_.Size(), kRows);
ASSERT_EQ(dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(
dmlc::get<std::shared_ptr<CupyAdapter>>(proxy.Adapter())->NumColumns(),
kCols);

std::vector<HostDeviceVector<float>> columnar_storage(kCols);
data = RandomDataGenerator(kRows, kCols, 0)
.Device(0)
.GenerateColumnarArrayInterface(&columnar_storage);
proxy.SetData(data.c_str());
ASSERT_EQ(proxy.Adapter().type(), typeid(std::shared_ptr<CudfAdapter>));
ASSERT_EQ(dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumRows(),
kRows);
ASSERT_EQ(
dmlc::get<std::shared_ptr<CudfAdapter>>(proxy.Adapter())->NumColumns(),
kCols);
}
} // namespace data
} // namespace xgboost

0 comments on commit 8b4f137

Please sign in to comment.