Skip to content

Commit f55c4de

Browse files
perf_reader: Change perf_reader_event_read to only read a maximum of
one ring buffer worth of data. Signed-off-by: James Bartlett <[email protected]>
1 parent 6496709 commit f55c4de

File tree

3 files changed

+166
-4
lines changed

3 files changed

+166
-4
lines changed

src/cc/perf_reader.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ void perf_reader_event_read(struct perf_reader *reader) {
172172
// Consume all the events on this ring, calling the cb function for each one.
173173
// The message may fall on the ring boundary, in which case copy the message
174174
// into a malloced buffer.
175-
for (data_head = read_data_head(perf_header); perf_header->data_tail != data_head;
176-
data_head = read_data_head(perf_header)) {
175+
data_head = read_data_head(perf_header);
176+
while (perf_header->data_tail != data_head) {
177177
uint64_t data_tail = perf_header->data_tail;
178178
uint8_t *ptr;
179179

tests/cc/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ set(TEST_LIBBCC_SOURCES
3636
test_sock_table.cc
3737
test_usdt_args.cc
3838
test_usdt_probes.cc
39+
test_perf_buffer_poll_full_ring_buf.cc
3940
utils.cc
4041
test_parse_tracepoint.cc)
4142

@@ -46,7 +47,7 @@ if(NOT CMAKE_USE_LIBBPF_PACKAGE)
4647
add_executable(test_libbcc ${TEST_LIBBCC_SOURCES})
4748
add_dependencies(test_libbcc bcc-shared)
4849

49-
target_link_libraries(test_libbcc ${PROJECT_BINARY_DIR}/src/cc/libbcc.so dl usdt_test_lib)
50+
target_link_libraries(test_libbcc ${PROJECT_BINARY_DIR}/src/cc/libbcc.so dl usdt_test_lib pthread)
5051
set_target_properties(test_libbcc PROPERTIES INSTALL_RPATH ${PROJECT_BINARY_DIR}/src/cc)
5152
target_compile_definitions(test_libbcc PRIVATE -DLIBBCC_NAME=\"libbcc.so\")
5253

@@ -57,7 +58,7 @@ if(LIBBPF_FOUND)
5758
add_executable(test_libbcc_no_libbpf ${TEST_LIBBCC_SOURCES})
5859
add_dependencies(test_libbcc_no_libbpf bcc-shared)
5960

60-
target_link_libraries(test_libbcc_no_libbpf ${PROJECT_BINARY_DIR}/src/cc/libbcc.so dl usdt_test_lib ${LIBBPF_LIBRARIES})
61+
target_link_libraries(test_libbcc_no_libbpf ${PROJECT_BINARY_DIR}/src/cc/libbcc.so dl usdt_test_lib ${LIBBPF_LIBRARIES} pthread)
6162
set_target_properties(test_libbcc_no_libbpf PROPERTIES INSTALL_RPATH ${PROJECT_BINARY_DIR}/src/cc)
6263
target_compile_definitions(test_libbcc_no_libbpf PRIVATE -DLIBBCC_NAME=\"libbcc.so\")
6364

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#include <linux/perf_event.h>
2+
#include <linux/version.h>
3+
#include <unistd.h>
4+
5+
#include <atomic>
6+
#include <chrono>
7+
#include <fstream>
8+
#include <iostream>
9+
#include <string>
10+
#include <thread>
11+
12+
#include "BPF.h"
13+
#include "catch.hpp"
14+
15+
struct cb_data {
16+
uint64_t data_received;
17+
std::atomic<size_t> sleep_ms;
18+
};
19+
20+
void handle_data_fn(void* cb_cookie, void*, int data_size) {
21+
struct cb_data* cookie_data = static_cast<struct cb_data*>(cb_cookie);
22+
cookie_data->data_received += data_size;
23+
// Force the handler to take a little while so that the ring buffer consumer
24+
// is slower than the producer.
25+
std::this_thread::sleep_for(
26+
std::chrono::milliseconds(cookie_data->sleep_ms.load()));
27+
}
28+
void handle_data_loss_fn(void*, unsigned long) {}
29+
30+
class RAIIThreadCloser {
31+
public:
32+
RAIIThreadCloser(std::atomic<bool>* done, std::thread* thread)
33+
: done_(done), thread_(thread) {}
34+
~RAIIThreadCloser() {
35+
done_->store(true);
36+
thread_->join();
37+
}
38+
39+
private:
40+
std::atomic<bool>* done_;
41+
std::thread* thread_;
42+
};
43+
44+
/**
45+
* This test demonstrates a bug in perf_reader where perf_reader_event_read can
46+
* loop over the ring buffer more than once in a single call, if the consumer of
47+
* the event data (i.e. raw_cb) is slower than the producer (the kernel pushing
48+
* events from ebpf). To demonstrate this bug we have a thread that countinually
49+
* writes to /dev/null, then we deploy a BPF program that looks for writes from
50+
* this PID and for each write it will submit 30kB to the perf buffer. Then we
51+
* artificially slow down the perf buffer data callback so that its slower than
52+
* the kernel producing data. If we removed the timeout in the test the
53+
* perf_reader->poll() call could potentially run indefinitely (depending on the
54+
* num_pages and sleep_ms it might not run forever but if sleep_ms is large
55+
* enough it will). Instead we set a timeout and check that the amount of data
56+
* read from a single `poll()` call is less than the size of the kernel ring
57+
* buffer.
58+
*/
59+
TEST_CASE("test perf buffer poll full ring buf", "[bpf_perf_event]") {
60+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
61+
std::atomic<bool> write_done(false);
62+
// This thread writes to /dev/null continuouosly so it should trigger many
63+
// write syscalls which will fill up the perf ring buf.
64+
std::thread write_thread([&]() {
65+
std::ofstream out("/dev/null");
66+
while (!write_done.load()) {
67+
out << "test";
68+
out.flush();
69+
}
70+
});
71+
RAIIThreadCloser closer(&write_done, &write_thread);
72+
73+
#define MSG_SIZE (32 * 1024)
74+
const std::string BPF_PROGRAM_FORMAT_STR = R"(
75+
#include <uapi/linux/ptrace.h>
76+
#define MSG_SIZE %d
77+
struct event_t {
78+
char msg[MSG_SIZE];
79+
};
80+
BPF_PERF_OUTPUT(events);
81+
BPF_PERCPU_ARRAY(events_heap, struct event_t, 1);
82+
83+
84+
// Probe that submits a 32kB event every time write is called.
85+
int syscall__probe_entry_write(struct pt_regs* ctx, int fd, char* buf, size_t count) {
86+
uint32_t kZero = 0;
87+
struct event_t* event = events_heap.lookup(&kZero);
88+
if (event == NULL) {
89+
return 0;
90+
}
91+
uint32_t tgid = bpf_get_current_pid_tgid() >> 32;
92+
if (tgid != %lu) {
93+
return 0;
94+
}
95+
events.perf_submit(ctx, event, sizeof(struct event_t));
96+
return 0;
97+
}
98+
)";
99+
unsigned long pid = getpid();
100+
101+
// Use the BPF program as a format string to insert the current test
102+
// processes' PID so that we only submit events generated by the current
103+
// process.
104+
auto extra_length = snprintf(nullptr, 0, "%d %lu", MSG_SIZE, pid);
105+
char bpf_program[BPF_PROGRAM_FORMAT_STR.size() + extra_length];
106+
auto n = snprintf(bpf_program, sizeof(bpf_program),
107+
BPF_PROGRAM_FORMAT_STR.c_str(), MSG_SIZE, pid);
108+
const std::string BPF_PROGRAM(bpf_program, n);
109+
110+
auto num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
111+
auto page_size = sysconf(_SC_PAGE_SIZE);
112+
113+
ebpf::BPF bpf;
114+
ebpf::StatusTuple res(0);
115+
res = bpf.init(BPF_PROGRAM, {"-DNUM_CPUS=" + std::to_string(num_cpus)}, {});
116+
REQUIRE(res.code() == 0);
117+
118+
std::string write_fnname = bpf.get_syscall_fnname("write");
119+
res = bpf.attach_kprobe(write_fnname, "syscall__probe_entry_write");
120+
REQUIRE(res.code() == 0);
121+
122+
struct cb_data cb_cookie = {};
123+
cb_cookie.sleep_ms = 200;
124+
cb_cookie.data_received = 0;
125+
126+
int num_pages = 64;
127+
std::string perf_buffer_name("events");
128+
res = bpf.open_perf_buffer(perf_buffer_name, handle_data_fn,
129+
handle_data_loss_fn, &cb_cookie, num_pages);
130+
REQUIRE(res.code() == 0);
131+
132+
std::atomic<bool> poll_done(false);
133+
int cnt = 0;
134+
std::thread poll_thread([&]() {
135+
auto perf_buffer = bpf.get_perf_buffer(perf_buffer_name);
136+
REQUIRE(perf_buffer != nullptr);
137+
cnt = perf_buffer->poll(0);
138+
poll_done = true;
139+
});
140+
141+
auto start = std::chrono::steady_clock::now();
142+
std::chrono::seconds timeout(20);
143+
while (!poll_done.load() &&
144+
(std::chrono::steady_clock::now() - start) < timeout) {
145+
std::this_thread::sleep_for(std::chrono::milliseconds{10});
146+
}
147+
// After the timeout we set the sleep time to 0, so the reader should catch up
148+
// and terminate.
149+
cb_cookie.sleep_ms = 0;
150+
poll_thread.join();
151+
152+
res = bpf.close_perf_buffer(perf_buffer_name);
153+
REQUIRE(res.code() == 0);
154+
res = bpf.detach_kprobe(write_fnname);
155+
REQUIRE(res.code() == 0);
156+
157+
// cnt is the number of perf_readers the poll() call read from. So we should
158+
// not have received more data then 1 full ring buffer per perf_reader.
159+
REQUIRE(cb_cookie.data_received <= (cnt * num_pages * page_size));
160+
#endif
161+
}

0 commit comments

Comments
 (0)