Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 106 additions & 11 deletions D3D12CacheListener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,97 @@ struct CacheStats {
};

const size_t NUM_EVENT_TYPES = 3;
static const char* event_names[NUM_EVENT_TYPES] = { "PSOs", "state objects", "state object additions" };

// Update ProcessStats to track stats per event type
struct ProcessStats {
// Index 0: 161 (PSOs), 1: 162 (state objects), 2: 163 (state object additions)
size_t total_events[NUM_EVENT_TYPES] = {0, 0, 0};
size_t hit_events[NUM_EVENT_TYPES] = {0, 0, 0};
size_t last_printed_total_events[NUM_EVENT_TYPES] = {0, 0, 0};

// Per-thread begin timestamps for timing begin/end event pairs
std::unordered_map<DWORD, LONGLONG> begin_qpc[NUM_EVENT_TYPES];
// Accumulated total time in QPC ticks
LONGLONG total_time_qpc[NUM_EVENT_TYPES] = {};

bool has_psdb = false;
std::string exe_name;
};

// Per-executable stored times: [event_type] -> total ms
struct ExeTimes {
double total_time_ms[NUM_EVENT_TYPES] = {};
bool has_data = false;
};

// Global dictionary: exe name -> { asd_on times, asd_off times }
std::unordered_map<std::string, std::pair<ExeTimes, ExeTimes>> g_exe_times; // pair<asd_on, asd_off>

// QPC frequency for converting ETW timestamps to milliseconds
LARGE_INTEGER g_qpcFrequency;

std::unordered_map<DWORD, ProcessStats> process_stats;
std::unordered_set<DWORD> asdinit_pids;
std::mutex stats_mutex;
std::atomic<bool> running{ true };

std::string GetExeNameFromPID(DWORD pid) {
HANDLE hProcess = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid);
if (!hProcess) return "unknown";
char path[MAX_PATH] = {};
DWORD size = MAX_PATH;
if (QueryFullProcessImageNameA(hProcess, 0, path, &size)) {
CloseHandle(hProcess);
std::string full(path);
auto pos = full.find_last_of("\\/");
return pos != std::string::npos ? full.substr(pos + 1) : full;
}
CloseHandle(hProcess);
return "unknown";
}

void LogProcessTimes(DWORD pid, const ProcessStats& ps) {
const std::string& exe = ps.exe_name;
const char* asdMode = ps.has_psdb ? "ASD ON" : "ASD OFF";

auto& entry = g_exe_times[exe];
auto& times = ps.has_psdb ? entry.first : entry.second;

std::cout << "\n=== PID " << pid << " (" << exe << ", " << asdMode << ") exited ===\n";
for (int i = 0; i < NUM_EVENT_TYPES; ++i) {
double ms = g_qpcFrequency.QuadPart > 0 ? (double)ps.total_time_qpc[i] * 1000.0 / g_qpcFrequency.QuadPart : 0.0;
times.total_time_ms[i] = ms;
std::cout << " [" << event_names[i] << "] Total time: " << std::fixed << std::setprecision(2) << ms << " ms\n";
}
times.has_data = true;

// If both ASD ON and ASD OFF data exist, show comparison
if (entry.first.has_data && entry.second.has_data) {
std::cout << "\n--- Comparison for " << exe << " (ASD ON vs ASD OFF) ---\n";
for (int i = 0; i < NUM_EVENT_TYPES; ++i) {
double asd_on_ms = entry.first.total_time_ms[i];
double asd_off_ms = entry.second.total_time_ms[i];
if (asd_off_ms > 0.0) {
double pct_faster = (asd_off_ms - asd_on_ms) / asd_off_ms * 100.0;
std::cout << " [" << event_names[i] << "] ASD ON: " << std::fixed << std::setprecision(2) << asd_on_ms
<< " ms, ASD OFF: " << asd_off_ms << " ms, "
<< (pct_faster >= 0 ? "faster" : "slower") << " by " << std::abs(pct_faster) << "%\n";
} else {
std::cout << " [" << event_names[i] << "] ASD OFF time is 0, cannot compare\n";
}
}
std::cout << std::flush;
}
}

// Add global handles for cleanup
TRACEHANDLE g_sessionHandle = 0;
EVENT_TRACE_PROPERTIES* g_props = nullptr;
std::atomic<bool> g_stopRequested{ false };

// Print stats only if total_events for any event type changed since last print
void PrintStats() {
static const char* event_names[NUM_EVENT_TYPES] = { "PSOs", "state objects", "state object additions" };
std::lock_guard<std::mutex> lock(stats_mutex);
for (auto& kv : process_stats) {
DWORD pid = kv.first;
Expand All @@ -64,10 +133,13 @@ void PrintStats() {
for (int i = 0; i < NUM_EVENT_TYPES; ++i) {
if (stats.total_events[i] != stats.last_printed_total_events[i]) {
double hit_rate = stats.total_events[i] == 0 ? 0.0 : (double)stats.hit_events[i] / stats.total_events[i] * 100.0;
double total_time_ms = g_qpcFrequency.QuadPart > 0 ? (double)stats.total_time_qpc[i] * 1000.0 / g_qpcFrequency.QuadPart : 0.0;
std::cout << "PID " << pid << " [" << event_names[i] << "]: "
<< "Total events: " << stats.total_events[i] << ", "
<< "Hits: " << stats.hit_events[i] << ", "
<< "Hit rate: " << std::fixed << std::setprecision(2) << hit_rate << "%\n";
<< "Hit rate: " << std::fixed << std::setprecision(2) << hit_rate << "%, "
<< "Total time: " << total_time_ms << " ms\n";

stats.last_printed_total_events[i] = stats.total_events[i];
printed = true;
}
Expand Down Expand Up @@ -180,19 +252,20 @@ void WINAPI EventRecordCallback(PEVENT_RECORD pEvent) {
std::wstring eventName = GetTraceLoggingEventName(pEvent);
if (eventName == L"ASDInit") {
DWORD pid = pEvent->EventHeader.ProcessId;
AsdInitStep step = AsdInitStep::None;
auto status = GetAsdInitStep(pEvent, step);
bool hasPsdb = (status == ERROR_SUCCESS) && step == AsdInitStep::Success;
{
std::lock_guard<std::mutex> lock(stats_mutex);
process_stats.emplace(pid, ProcessStats{});
auto res = process_stats.emplace(pid, ProcessStats{});
res.first->second.has_psdb = hasPsdb;
if (res.first->second.exe_name.empty())
res.first->second.exe_name = GetExeNameFromPID(pid);
asdinit_pids.insert(pid);
}

AsdInitStep step = AsdInitStep::None;
auto status = GetAsdInitStep(pEvent, step);

auto hasDefaultPsdb = (status == ERROR_SUCCESS) && step == AsdInitStep::Success ? "true" : "false";
auto hasDefaultPsdb = hasPsdb ? "true" : "false";
auto stepString = (status == ERROR_SUCCESS) ? AsdInitStepToString(step) : "Error Parsing Event";


std::cout << "ASDInit event seen for PID " << pid << ", HasDefaultPsdb: " << hasDefaultPsdb << ", Step: " << stepString << "\n";
}
} else if (IsEqualGUID(pEvent->EventHeader.ProviderId, D3D12_MANIFEST_PROVIDER)) {
Expand All @@ -205,6 +278,25 @@ void WINAPI EventRecordCallback(PEVENT_RECORD pEvent) {
if (eid == 161 || eid == 162 || eid == 163) {
ParseManifestPayload(pEvent);
}
// Handle begin/end timing events (155/156=CreatePSO, 157/158=CreateStateObject, 159/160=AddStateObjects)
if (eid >= 155 && eid <= 160) {
int type_idx = (eid - 155) / 2;
bool is_begin = (eid % 2 == 1); // 155, 157, 159 are odd (begin)
DWORD tid = pEvent->EventHeader.ThreadId;
LONGLONG qpc = pEvent->EventHeader.TimeStamp.QuadPart;

std::lock_guard<std::mutex> lock(stats_mutex);
auto& ps = process_stats[pid];
if (is_begin) {
ps.begin_qpc[type_idx][tid] = qpc;
} else {
auto it = ps.begin_qpc[type_idx].find(tid);
if (it != ps.begin_qpc[type_idx].end()) {
ps.total_time_qpc[type_idx] += qpc - it->second;
ps.begin_qpc[type_idx].erase(it);
}
}
}
}
}

Expand Down Expand Up @@ -245,10 +337,10 @@ void StatsThreadFunc() {
std::this_thread::sleep_for(std::chrono::seconds(5));
{
std::lock_guard<std::mutex> lock(stats_mutex);
// Remove stats for dead processes
// Remove stats for dead processes, log their times
for (auto it = process_stats.begin(); it != process_stats.end(); ) {
if (!IsProcessAlive(it->first)) {
std::cout << "Process " << it->first << " exited, removing stats.\n";
LogProcessTimes(it->first, it->second);
asdinit_pids.erase(it->first);
it = process_stats.erase(it);
} else {
Expand Down Expand Up @@ -302,6 +394,9 @@ int main() {
const wchar_t* sessionName = L"D3D12CacheListenerSession";
ULONG bufferSize = sizeof(EVENT_TRACE_PROPERTIES) + 2 * 1024;

// Initialize QPC frequency for timestamp-to-ms conversion
QueryPerformanceFrequency(&g_qpcFrequency);

// Stop any existing session with the same name before starting
StopExistingSession(sessionName);

Expand Down