Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ def RDataFrameAsNumpy(df, columns=None, exclude=None, lazy=False):
result_ptrs = {}
for column in columns:
column_type = df.GetColumnType(column)
# bool columns should be taken as unsigned chars, because NumPy stores
# bools in bytes - different from the std::vector<bool> returned by the
# action, which might do some space optimization
column_type = "unsigned char" if column_type == "bool" else column_type
result_ptrs[column] = df.Take[column_type](column)

result = AsNumpyResult(result_ptrs, columns)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,15 @@


_array_interface_dtype_map = {
"float": "f",
"Long64_t": "i",
"ULong64_t": "u",
"double": "f",
"float": "f",
"int": "i",
"long": "i",
"Long64_t": "i",
"unsigned char": "b",
"unsigned int": "u",
"unsigned long": "u",
"ULong64_t": "u",
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
import cppyy


def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs=None, tmp_path="/tmp", threshold_dtype="float"):
def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs, tmp_path="/tmp", threshold_dtype="float"):
# Extract objective
objective_map = {
"multi:softprob": "softmax", # Naming the objective softmax is more common today
"binary:logistic": "logistic",
"reg:linear": "identity",
"reg:squarederror": "identity",
}
model_objective = xgb_model.objective
if not model_objective in objective_map:
Expand Down Expand Up @@ -48,7 +49,8 @@ def SaveXGBoost(self, xgb_model, key_name, output_path, num_inputs=None, tmp_pat

import json

forest = json.load(open(tmp_path, "r"))
with open(tmp_path, "r") as json_file:
forest = json.load(json_file)

# Determine whether the model has a bias paramter and write bias trees
if hasattr(xgb_model, "base_score") and "reg:" in model_objective:
Expand Down Expand Up @@ -96,16 +98,6 @@ def fill_arrays(node, index, inputs_base, thresholds_base):
for i in range(num_trees):
outputs[i] = int(i % num_outputs)

# Determine number of input variables
if not num_inputs is None:
pass
elif hasattr(xgb_model, "_features_count"):
num_inputs = xgb_model._features_count
else:
raise Exception(
"Failed to get number of input variables from XGBoost model. Please provide the additional keyword argument 'num_inputs' to this function."
)

# Store arrays in a ROOT file in a folder with the given key name
# TODO: Write single values as simple integers and not vectors.
f = cppyy.gbl.TFile(output_path, "RECREATE")
Expand Down
12 changes: 12 additions & 0 deletions bindings/pyroot/pythonizations/test/rdataframe_asnumpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,18 @@ def test_cloning(self):
self.assertSequenceEqual(
asnumpyres.GetValue()["x"].tolist(), np.arange(begin, end).tolist())

def test_bool_column(self):
"""
Testing converting bool columns to NumPy arrays.
"""
name = "bool_branch"
n_events = 100
cut = 50
df = ROOT.RDataFrame(n_events).Define(name, f"(int)rdfentry_ > {cut}")
arr = df.AsNumpy([name])[name]
ref = np.arange(0, n_events) > cut
self.assertTrue(all(arr == ref)) # test values
self.assertEqual(arr.dtype, ref.dtype) # test type

if __name__ == '__main__':
unittest.main()
4 changes: 2 additions & 2 deletions cmake/modules/RootBuildOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ ROOT_BUILD_OPTION(qt6web OFF "Enable support for Qt6 web-based display (requires
ROOT_BUILD_OPTION(r OFF "Enable support for R bindings (requires R, Rcpp, and RInside)")
ROOT_BUILD_OPTION(roofit ON "Build the advanced fitting package RooFit, and RooStats for statistical tests. If xml is available, also build HistFactory.")
ROOT_BUILD_OPTION(roofit_multiprocess OFF "Build RooFit::MultiProcess and multi-process RooFit::TestStatistics classes (requires ZeroMQ >= 3.4.5 built with -DENABLE_DRAFTS and cppzmq).")
ROOT_BUILD_OPTION(webgui ON "Build Web-based UI components of ROOT (requires C++17 standard or higher)")
ROOT_BUILD_OPTION(root7 ON "Build ROOT 7 components of ROOT (requires C++17 standard or higher)")
ROOT_BUILD_OPTION(webgui ON "Build Web-based UI components of ROOT")
ROOT_BUILD_OPTION(root7 ON "Build ROOT 7 components of ROOT")
ROOT_BUILD_OPTION(rpath ON "Link libraries with built-in RPATH (run-time search path)")
ROOT_BUILD_OPTION(runtime_cxxmodules ON "Enable runtime support for C++ modules")
ROOT_BUILD_OPTION(shadowpw OFF "Enable support for shadow passwords")
Expand Down
5 changes: 3 additions & 2 deletions hist/hist/inc/TPrincipal.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,18 @@ class TPrincipal : public TNamed {
public:
TPrincipal();
~TPrincipal() override;
TPrincipal(Int_t nVariables, Option_t *opt="ND");
TPrincipal(Long64_t nVariables, Option_t *opt="ND");

virtual void AddRow(const Double_t *x);
void Browse(TBrowser *b) override;
void Clear(Option_t *option="") override;
/// Return the covariance matrix. \note Only the lower diagonal of the covariance matrix is computed by the class
const TMatrixD *GetCovarianceMatrix() const {return &fCovarianceMatrix;}
const TVectorD *GetEigenValues() const {return &fEigenValues;}
const TMatrixD *GetEigenVectors() const {return &fEigenVectors;}
TList *GetHistograms() const {return fHistograms;}
const TVectorD *GetMeanValues() const {return &fMeanValues;}
const Double_t *GetRow(Int_t row);
const Double_t *GetRow(Long64_t row);
const TVectorD *GetSigmas() const {return &fSigmas;}
const TVectorD *GetUserData() const {return &fUserData;}
Bool_t IsFolder() const override { return kTRUE;}
Expand Down
20 changes: 16 additions & 4 deletions hist/hist/src/TPrincipal.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ TPrincipal::TPrincipal()
///
/// The created object is named "principal" by default.

TPrincipal::TPrincipal(Int_t nVariables, Option_t *opt)
TPrincipal::TPrincipal(Long64_t nVariables, Option_t *opt)
: fMeanValues(nVariables),
fSigmas(nVariables),
fCovarianceMatrix(nVariables,nVariables),
Expand All @@ -260,7 +260,11 @@ TPrincipal::TPrincipal(Int_t nVariables, Option_t *opt)
fStoreData(kFALSE)
{
if (nVariables <= 1) {
Error("TPrincipal", "You can't be serious - nVariables == 1!!!");
Error("TPrincipal", "You can't be serious - nVariables <= 1!!!");
return;
}
if (nVariables > std::numeric_limits<Int_t>::max()) {
Error("TPrincipal", "`nVariables` input parameter %lld is larger than the allowed maximum %d", nVariables, std::numeric_limits<Int_t>::max());
return;
}

Expand Down Expand Up @@ -411,6 +415,10 @@ void TPrincipal::AddRow(const Double_t *p)
{
if (!p)
return;
if (fNumberOfDataPoints == std::numeric_limits<Int_t>::max()) {
Error("AddRow", "`fNumberOfDataPoints` has reached its allowed maximum %d, cannot add new row.", fNumberOfDataPoints);
return;
}

// Increment the data point counter
Int_t i,j;
Expand Down Expand Up @@ -510,15 +518,19 @@ void TPrincipal::Clear(Option_t *opt)
/// It's up to the user to delete the returned array.
/// Row 0 is the first row;

const Double_t *TPrincipal::GetRow(Int_t row)
const Double_t *TPrincipal::GetRow(Long64_t row)
{
if (row >= fNumberOfDataPoints)
return nullptr;

if (!fStoreData)
return nullptr;

Int_t index = row * fNumberOfVariables;
Long64_t index = row * fNumberOfVariables;
if (index > std::numeric_limits<Int_t>::max()) {
Error("GetRow", "Input parameter `row` %lld x fNumberOfVariables %d goes into overflow (%lld>%d), returning nullptr.", row, fNumberOfVariables, index, std::numeric_limits<Int_t>::max());
return nullptr;
}
return &fUserData(index);
}

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# PyROOT: Interoperability with numpy arrays
# TMVA: PyMVA interfaces
numpy>=1.4.1
scikit-learn
xgboost

# PyROOT: ROOT.Numba.Declare decorator
numba>=0.47.0 ; python_version < "3.11" # See https://github.com/numba/numba/issues/8304
Expand Down
Loading