Skip to content

Commit 992f505

Browse files
authored
factor out uses of omp_get_num_threads() and omp_get_max_threads() outside of OpenMP wrapper (#6133)
1 parent ad02551 commit 992f505

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

src/objective/rank_objective.hpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -289,17 +289,12 @@ class LambdarankNDCG : public RankingObjective {
289289

290290
void UpdatePositionBiasFactors(const score_t* lambdas, const score_t* hessians) const override {
291291
/// get number of threads
292-
int num_threads = 1;
293-
#pragma omp parallel
294-
#pragma omp master
295-
{
296-
num_threads = omp_get_num_threads();
297-
}
292+
int num_threads = OMP_NUM_THREADS();
298293
// create per-thread buffers for first and second derivatives of utility w.r.t. position bias factors
299294
std::vector<double> bias_first_derivatives(num_position_ids_ * num_threads, 0.0);
300295
std::vector<double> bias_second_derivatives(num_position_ids_ * num_threads, 0.0);
301296
std::vector<int> instance_counts(num_position_ids_ * num_threads, 0);
302-
#pragma omp parallel for schedule(guided)
297+
#pragma omp parallel for schedule(guided) num_threads(num_threads)
303298
for (data_size_t i = 0; i < num_data_; i++) {
304299
// get thread ID
305300
const int tid = omp_get_thread_num();
@@ -310,7 +305,7 @@ class LambdarankNDCG : public RankingObjective {
310305
bias_second_derivatives[offset] -= hessians[i];
311306
instance_counts[offset]++;
312307
}
313-
#pragma omp parallel for schedule(guided)
308+
#pragma omp parallel for schedule(guided) num_threads(num_threads)
314309
for (data_size_t i = 0; i < num_position_ids_; i++) {
315310
double bias_first_derivative = 0.0;
316311
double bias_second_derivative = 0.0;

src/treelearner/gpu_tree_learner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
344344
// for data transfer time
345345
auto start_time = std::chrono::steady_clock::now();
346346
// Now generate new data structure feature4, and copy data to the device
347-
int nthreads = std::min(omp_get_max_threads(), static_cast<int>(dense_feature_group_map_.size()) / dword_features_);
347+
int nthreads = std::min(OMP_NUM_THREADS(), static_cast<int>(dense_feature_group_map_.size()) / dword_features_);
348348
nthreads = std::max(nthreads, 1);
349349
std::vector<Feature4*> host4_vecs(nthreads);
350350
std::vector<boost::compute::buffer> host4_bufs(nthreads);

src/treelearner/linear_tree_learner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
5252
}
5353
XTHX_by_thread_.clear();
5454
XTg_by_thread_.clear();
55-
int max_threads = omp_get_max_threads();
55+
int max_threads = OMP_NUM_THREADS();
5656
for (int i = 0; i < max_threads; ++i) {
5757
XTHX_by_thread_.push_back(XTHX_);
5858
XTg_by_thread_.push_back(XTg_);

0 commit comments

Comments
 (0)