Skip to content

Commit d29f920

Browse files
committed
trace_processor: Parse adreno_cmdbatch_retired/submitted events
Add custom parsing for kgsl adreno_cmdbatch_retired and adreno_cmdbatch_submitted ftrace events to produce GPU timeline slices in the trace processor.
1 parent 82980e3 commit d29f920

5 files changed

Lines changed: 105 additions & 3 deletions

File tree

src/trace_processor/importers/ftrace/ftrace_parser.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ FtraceParser::FtraceParser(TraceProcessorContext* context,
541541
gpu_power_state_off_id_(context->storage->InternString("OFF")),
542542
gpu_power_state_pg_id_(context->storage->InternString("PG")),
543543
gpu_power_state_on_id_(context->storage->InternString("ON")),
544+
gpu_cmdbatch_slice_name_id_(context->storage->InternString("GPU")),
544545
ddic_underrun_id_(context_->storage->InternString("ddic_underrun")),
545546
memcg_reclaim_order_id_(
546547
context->storage->InternString("memcg_reclaim_order")),
@@ -925,6 +926,10 @@ base::Status FtraceParser::ParseFtraceEvent(uint32_t cpu,
925926
ParseKgslGpuFreq(ts, fld_bytes);
926927
break;
927928
}
929+
case FtraceEvent::kKgslAdrenoCmdbatchRetiredFieldNumber: {
930+
ParseKgslAdrenoCmdbatchRetired(ts, fld_bytes);
931+
break;
932+
}
928933
case FtraceEvent::kCpuIdleFieldNumber: {
929934
ParseCpuIdle(ts, fld_bytes);
930935
break;
@@ -1920,6 +1925,37 @@ void FtraceParser::ParseKgslGpuFreq(int64_t timestamp, ConstBytes blob) {
19201925
context_->event_tracker->PushCounter(timestamp, new_freq, track);
19211926
}
19221927

1928+
void FtraceParser::ParseKgslAdrenoCmdbatchRetired(int64_t timestamp,
1929+
protozero::ConstBytes data) {
1930+
protos::pbzero::KgslAdrenoCmdbatchRetiredFtraceEvent::Decoder evt(data);
1931+
1932+
static constexpr auto kBlueprint = TrackCompressor::SliceBlueprint(
1933+
"adreno_gpu_cmdbatch",
1934+
tracks::DimensionBlueprints(tracks::UintDimensionBlueprint("context_id"),
1935+
tracks::UintDimensionBlueprint("prio")),
1936+
tracks::FnNameBlueprint([](uint32_t context_id, uint32_t prio) {
1937+
return base::StackString<64>("Adreno GPU Cmdbatch (Ctx=%u, Prio=%u)",
1938+
context_id, prio);
1939+
}));
1940+
1941+
if (evt.retire() < evt.start()) {
1942+
return;
1943+
}
1944+
// Adreno GPU ticks run at 19.2 MHz, fixed across all Qualcomm mobile SoCs
1945+
// (see KGSL_XO_CLK_FREQ in kgsl_pwrctrl.h).
1946+
constexpr uint64_t kAdrenoGpuTicksPerUs = 19200;
1947+
const int64_t duration = static_cast<int64_t>((evt.retire() - evt.start()) *
1948+
1000000 / kAdrenoGpuTicksPerUs);
1949+
1950+
const uint32_t context_id = evt.id();
1951+
TrackId track_id = context_->track_compressor->InternScoped(
1952+
kBlueprint,
1953+
tracks::Dimensions(context_id, static_cast<uint32_t>(evt.prio())),
1954+
timestamp, duration);
1955+
context_->slice_tracker->Scoped(timestamp, track_id, kNullStringId,
1956+
gpu_cmdbatch_slice_name_id_, duration);
1957+
}
1958+
19231959
void FtraceParser::ParseCpuIdle(int64_t timestamp, ConstBytes blob) {
19241960
protos::pbzero::CpuIdleFtraceEvent::Decoder idle(blob);
19251961
TrackId track = context_->track_tracker->InternTrack(

src/trace_processor/importers/ftrace/ftrace_parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class FtraceParser {
9595
void ParseCpuFreqThrottle(int64_t timestamp, protozero::ConstBytes);
9696
void ParseGpuFreq(int64_t timestamp, protozero::ConstBytes);
9797
void ParseKgslGpuFreq(int64_t timestamp, protozero::ConstBytes);
98+
void ParseKgslAdrenoCmdbatchRetired(int64_t timestamp, protozero::ConstBytes);
9899
void ParseCpuIdle(int64_t timestamp, protozero::ConstBytes);
99100
void ParsePrint(int64_t timestamp, uint32_t pid, protozero::ConstBytes);
100101
void ParseZero(int64_t timestamp, uint32_t pid, protozero::ConstBytes);
@@ -463,6 +464,7 @@ class FtraceParser {
463464
const StringId gpu_power_state_off_id_;
464465
const StringId gpu_power_state_pg_id_;
465466
const StringId gpu_power_state_on_id_;
467+
const StringId gpu_cmdbatch_slice_name_id_;
466468
const StringId ddic_underrun_id_;
467469
std::array<StringId, 8> f2fs_checkpoint_reason_ids_;
468470

src/trace_processor/importers/ftrace/ftrace_tokenizer.cc

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
5252
#include "protos/perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
5353
#include "protos/perfetto/trace/ftrace/fwtp_ftrace.pbzero.h"
54+
#include "protos/perfetto/trace/ftrace/kgsl.pbzero.h"
5455
#include "protos/perfetto/trace/ftrace/power.pbzero.h"
5556
#include "protos/perfetto/trace/ftrace/thermal_exynos.pbzero.h"
5657
#include "src/trace_processor/util/clock_synchronizer.h"
@@ -68,6 +69,7 @@ using protos::pbzero::FtraceEventBundle;
6869
namespace {
6970

7071
constexpr uint32_t kSequenceScopedClockId = 64;
72+
constexpr uint32_t kAdrenoGpuClockId = 65;
7173

7274
// Fast path for parsing the event id of an ftrace event.
7375
// Speculate on the fact that, if the timestamp was found, the common pid
@@ -154,7 +156,7 @@ base::Status FtraceTokenizer::TokenizeFtraceBundle(
154156

155157
for (auto it = decoder.event(); it; ++it) {
156158
TokenizeFtraceEvent(cpu, clock_id, bundle.slice(it->data(), it->size()),
157-
state);
159+
state, packet_sequence_id);
158160
}
159161

160162
// v50+: optional proto descriptors for generic (i.e. not known at
@@ -214,7 +216,8 @@ void FtraceTokenizer::TokenizeFtraceEvent(
214216
uint32_t cpu,
215217
ClockTracker::ClockId clock_id,
216218
TraceBlobView event,
217-
RefPtr<PacketSequenceStateGeneration> state) {
219+
RefPtr<PacketSequenceStateGeneration> state,
220+
uint32_t packet_sequence_id) {
218221
constexpr auto kTimestampFieldNumber =
219222
protos::pbzero::FtraceEvent::kTimestampFieldNumber;
220223
constexpr auto kTimestampFieldTag = MakeTagVarInt(kTimestampFieldNumber);
@@ -301,6 +304,60 @@ void FtraceTokenizer::TokenizeFtraceEvent(
301304
return;
302305
}
303306

307+
// The submitted event provides a CPU <-> GPU clock sync point; we register
308+
// it as a sequence-scoped clock so the clock tracker can convert GPU tick
309+
// timestamps to trace time, skipping duplicates since the GPU clock runs at
310+
// a fixed frequency and a single snapshot is sufficient.
311+
// Adreno GPU ticks run at 19.2 MHz (KGSL_XO_CLK_FREQ in kgsl_pwrctrl.h).
312+
constexpr int64_t kAdrenoGpuTicksPerUs = 19200;
313+
if (event_id ==
314+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchSubmittedFieldNumber) {
315+
auto submitted_field = GetFtraceEventField(
316+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchSubmittedFieldNumber,
317+
event);
318+
if (submitted_field.has_value()) {
319+
protos::pbzero::KgslAdrenoCmdbatchSubmittedFtraceEvent::Decoder evt(
320+
submitted_field->as_bytes());
321+
if (!adreno_gpu_clock_registered_) {
322+
const int64_t sync_time_ns =
323+
static_cast<int64_t>(evt.secs()) * 1000000000LL +
324+
static_cast<int64_t>(evt.usecs()) * 1000LL;
325+
if (sync_time_ns > 0) {
326+
const int64_t gpu_ticks_ns = static_cast<int64_t>(evt.ticks()) *
327+
1000000 / kAdrenoGpuTicksPerUs;
328+
auto gpu_clock =
329+
ClockId::Sequence(context_->trace_id().value, packet_sequence_id,
330+
kAdrenoGpuClockId);
331+
context_->clock_tracker->AddSnapshot({
332+
ClockTracker::ClockTimestamp(gpu_clock, gpu_ticks_ns),
333+
ClockTracker::ClockTimestamp(clock_id, sync_time_ns),
334+
});
335+
adreno_gpu_clock_registered_ = true;
336+
}
337+
}
338+
}
339+
} else if (event_id == protos::pbzero::FtraceEvent::
340+
kKgslAdrenoCmdbatchRetiredFieldNumber) {
341+
auto retired_field = GetFtraceEventField(
342+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchRetiredFieldNumber,
343+
event);
344+
if (retired_field.has_value()) {
345+
protos::pbzero::KgslAdrenoCmdbatchRetiredFtraceEvent::Decoder evt(
346+
retired_field->as_bytes());
347+
auto gpu_clock = ClockId::Sequence(context_->trace_id().value,
348+
packet_sequence_id, kAdrenoGpuClockId);
349+
const int64_t gpu_start_ns =
350+
static_cast<int64_t>(evt.start()) * 1000000 / kAdrenoGpuTicksPerUs;
351+
auto ts = context_->clock_tracker->ToTraceTime(
352+
gpu_clock, gpu_start_ns, std::nullopt, /*suppress_errors=*/true);
353+
if (ts.has_value()) {
354+
module_context_->PushFtraceEvent(
355+
cpu, *ts, TracePacketData{std::move(event), std::move(state)});
356+
return;
357+
}
358+
}
359+
}
360+
304361
std::optional<int64_t> timestamp = context_->clock_tracker->ToTraceTime(
305362
clock_id, static_cast<int64_t>(raw_timestamp));
306363
// ClockTracker will increment some error stats if it failed to convert the

src/trace_processor/importers/ftrace/ftrace_tokenizer.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ class FtraceTokenizer {
5858
void TokenizeFtraceEvent(uint32_t cpu,
5959
ClockTracker::ClockId,
6060
TraceBlobView event,
61-
RefPtr<PacketSequenceStateGeneration> state);
61+
RefPtr<PacketSequenceStateGeneration> state,
62+
uint32_t packet_sequence_id);
6263
void TokenizeFtraceCompactSched(uint32_t cpu,
6364
ClockTracker::ClockId,
6465
protozero::ConstBytes);
@@ -109,6 +110,7 @@ class FtraceTokenizer {
109110
GenericFtraceTracker* generic_tracker_;
110111

111112
int64_t latest_ftrace_clock_snapshot_ts_ = 0;
113+
bool adreno_gpu_clock_registered_ = false;
112114
std::vector<bool> per_cpu_seen_first_bundle_;
113115
};
114116

ui/src/plugins/dev.perfetto.TraceProcessorTrack/slice_tracks.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ export const SLICE_TRACK_SCHEMAS: ReadonlyArray<SliceTrackTypeSchema> = [
248248
topLevelGroup: 'HARDWARE',
249249
group: undefined,
250250
},
251+
{
252+
type: 'adreno_gpu_cmdbatch',
253+
topLevelGroup: 'HARDWARE',
254+
group: 'GPU Adreno',
255+
},
251256
{
252257
type: 'triggers',
253258
topLevelGroup: 'SYSTEM',

0 commit comments

Comments
 (0)