Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/prismspf/core/pde_problem.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <deal.II/fe/fe_system.h>
#include <deal.II/fe/mapping_q1.h>

#include <prismspf/core/timer.h>
#include <prismspf/core/types.h>
#include <prismspf/core/vector_mapping.h>

Expand Down Expand Up @@ -113,6 +114,11 @@ class PDEProblem
void
reinit_system();

/**
* @brief Timer instance for the class.
*/
Timer::Scope timer_scope;

/**
* @brief User-inputs.
*/
Expand Down
67 changes: 66 additions & 1 deletion include/prismspf/core/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,79 @@ PRISMS_PF_BEGIN_NAMESPACE

/**
* @brief Timer class for PRISMS-PF
*
* This class keeps track of nested timing sections so we can print a structured summary
* at the end of the simulation.
*
* Fortunately, `Caliper` handles this nicely (and more!) or us. `deal.II` doesn't do this
* so we have to keep track of a few additional objects. The logical way to represent this
* is with a tree node structure.
*/
class Timer
{
public:
/**
* @brief Constructor.
*/
Timer();
Timer() = default;

/**
* @brief Destructor.
*
* Calls `print_summary` upon destruction.
*/
~Timer();

Timer(const Timer &) = delete;
Timer &
operator=(const Timer &) = delete;
Timer(Timer &&) = delete;
Timer &
operator=(Timer &&) = delete;

/**
* @brief Timer scope guard.
*
* This allows you to use the timer like the following
* @code
* void f()
* {
* Timer::Scope outer("outer");
*
* {
* Timer::Scope inner("inner");
* // Work goes here
* } // Inner scope ends here
*
* // Work goes here
* } // Outer scope ends here
* @endcode
*
*/
struct Scope
{
public:
explicit Scope(const char *name)
: name(name)
{
Timer::start_section(name);
}

~Scope()
{
Timer::end_section(name);
}

Scope(const Scope &) = delete;
Scope &
operator=(const Scope &) = delete;
Scope(Scope &&) = delete;
Scope &
operator=(Scope &&) = delete;

private:
const char *name;
};

/**
* @brief Start a new timer section.
Expand Down
5 changes: 2 additions & 3 deletions src/core/pde_problem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ PDEProblem<dim, degree, number>::PDEProblem(
PhaseFieldTools<dim> &_pf_tools,
const std::shared_ptr<const PDEOperator<dim, degree, number>> &_pde_operator,
const std::shared_ptr<const PDEOperator<dim, degree, float>> &_pde_operator_float)
: user_inputs(&_user_inputs)
: timer_scope("Problem")
, user_inputs(&_user_inputs)
, pf_tools(&_pf_tools)
, mg_info(_user_inputs)
, triangulation_handler(_user_inputs, mg_info)
Expand Down Expand Up @@ -488,8 +489,6 @@ PDEProblem<dim, degree, number>::run()
<< std::flush;

solve();

Timer::print_summary();
}

#include "core/pde_problem.inst"
Expand Down
199 changes: 193 additions & 6 deletions src/core/timer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,41 +10,144 @@

#include <prismspf/config.h>

#include <iomanip>
#include <iostream>
#include <map>
#include <mpi.h>
#include <ostream>
#include <stack>
#include <string>
#include <vector>

#ifdef PRISMS_PF_WITH_CALIPER
# include <caliper/cali.h>
#endif

PRISMS_PF_BEGIN_NAMESPACE

namespace
{
struct TimerStack
{
/**
* @brief Stack of active section names.
*/
std::stack<std::string> active;

/**
* @brief List of sections, ordered by insertion order, for the summary.
*/
std::vector<std::string> insertion_order;

/**
* @brief Section depth and parent key.
*
* Parent key is empty if at the top of hierarchy.
*/
struct Meta
{
unsigned int depth = 0;
std::string parent;
};

/**
* @brief Parent and depth of each section.
*/
std::map<std::string, Meta> meta;

/**
* @brief Get the current active section.
*/
[[nodiscard]] std::string
current_section() const
{
return active.empty() ? "" : active.top();
}

/**
* @brief Add a new section.
*
* Returns the key string that should be passed to the deal.II timer class.
*/
std::string
push(const std::string &name)
{
const std::string parent = current_section();
const std::string key = parent.empty() ? name : parent + " > " + name;

if (!meta.contains(key))
{
Meta data;
data.depth = static_cast<unsigned int>(active.size());
data.parent = parent;
meta[key] = data;
insertion_order.push_back(key);
}

active.push(key);
return key;
}

/**
* @brief Remove the current section.
*/
void
pop()
{
AssertThrow(!active.empty(), dealii::ExcMessage("Timer stack underflow"));
active.pop();
}
};

TimerStack &
timer_stack()
{
static TimerStack instance;
return instance;
}

} // namespace

Timer::~Timer()
{
print_summary();
}

void
Timer::start_section(const char *name)
{
#ifdef PRISMS_PF_WITH_CALIPER
CALI_MARK_BEGIN(name);
#else
serial_timer().enter_subsection(name);
const std::string key = timer_stack().push(name);
serial_timer().enter_subsection(key);
#endif
}

void
Timer::end_section([[maybe_unused]] const char *name)
Timer::end_section(const char *name)
{
#ifdef PRISMS_PF_WITH_CALIPER
CALI_MARK_END(name);
#else
// Check that the name matches the active section
const std::string &active_section = timer_stack().current_section();
AssertThrow(
!active_section.empty() &&
(active_section == name ||
active_section.size() > std::string(name).size() &&
active_section.substr(active_section.size() - std::string(name).size()) == name),
dealii::ExcMessage(std::string("Timer::end_section mismatch: expected segment '") +
name + "' but top of stack is '" + active_section + "'."));
serial_timer().leave_subsection();
timer_stack().pop();
#endif
}

dealii::TimerOutput &
Timer::serial_timer()
{
static dealii::TimerOutput instance(ConditionalOStreams::pout_base(),
dealii::TimerOutput::summary,
dealii::TimerOutput::never,
dealii::TimerOutput::wall_times);

return instance;
Expand All @@ -69,8 +172,92 @@ Timer::print_summary()
return;
#endif

// TODO: Revisit the deal.II TimerOutput class and reorganize sections so they are
// tiered accordingly
const auto &stack = timer_stack();
const auto serial_n_calls_data =
serial_timer().get_summary_data(dealii::TimerOutput::OutputData::n_calls);
const auto serial_wall_time_data =
serial_timer().get_summary_data(dealii::TimerOutput::OutputData::total_wall_time);
const auto serial_cpu_time_data =
serial_timer().get_summary_data(dealii::TimerOutput::OutputData::total_cpu_time);

auto lookup = [](const std::map<std::string, double> &data,
const std::string &key) -> double
{
const auto iterator = data.find(key);
return iterator != data.end() ? iterator->second : 0.0;
};

// Compute the max depth to adjust column widths
unsigned int max_depth = 0;
for (const auto &key : stack.insertion_order)
{
max_depth = std::max(max_depth, stack.meta.at(key).depth);
}

// Each depth level adds 2 spaces + "|- " (3 chars) for non-root
const int depth_padding = (static_cast<int>(max_depth) * 2) + (max_depth > 0 ? 3 : 0);

// Column names
std::string calls = "N Calls";
std::string wall = "Wall Time (s)";
std::string cpu = "CPU Time (s)";
std::string parent = "% Parent";

// Column widths
const int w_label = 44 + depth_padding;
const int w_calls = static_cast<int>(calls.size()) + 2;
const int w_wall = static_cast<int>(wall.size()) + 2;
const int w_cpu = static_cast<int>(cpu.size()) + 2;
const int w_pct = static_cast<int>(parent.size()) + 2;
const int total_w = w_label + w_calls + w_wall + w_cpu + w_pct;

auto &out = ConditionalOStreams::pout_base();

out << "\n"
<< std::string(total_w, '=') << "\n"
<< " PRISMS-PF Timing Summary\n"
<< std::string(total_w, '=') << "\n"
<< std::left << std::setw(w_label) << "Section" << std::right << std::setw(w_calls)
<< calls << std::setw(w_wall) << wall << std::setw(w_cpu) << cpu << std::setw(w_pct)
<< parent << "\n"
<< std::string(total_w, '-') << "\n";

for (const auto &key : stack.insertion_order)
{
const auto &meta = stack.meta.at(key);
const unsigned int depth = meta.depth;

const double wall_time = lookup(serial_wall_time_data, key);
const double cpu_time = lookup(serial_cpu_time_data, key);
const auto n_calls = static_cast<unsigned int>(lookup(serial_n_calls_data, key));

// Percentage relative to parent (or 100% for roots)
double pct = 100.0;
if (!meta.parent.empty())
{
const double parent_wall = lookup(serial_wall_time_data, meta.parent);
if (parent_wall > 0.0)
{
pct = wall_time / parent_wall * 100.0;
}
}

// Bare section name (last segment after " > ")
const std::size_t separator = key.rfind(" > ");
const std::string bare =
separator == std::string::npos ? key : key.substr(separator + 3);

const std::string indent(static_cast<std::size_t>(depth) * 2, ' ');
const std::string label = indent + (depth > 0 ? "|- " : "") + bare;

out << std::left << std::setw(w_label) << label << std::right << std::fixed
<< std::setprecision(3) << std::setw(w_calls) << n_calls << std::setw(w_wall)
<< wall_time << std::setw(w_cpu) << cpu_time << std::setw(w_pct - 1) << pct
<< "%"
<< "\n";
}

out << std::string(total_w, '=') << "\n\n";
}

PRISMS_PF_END_NAMESPACE