Skip to content

Commit 609564a

Browse files
rossning92claude
andcommitted
trace_processor: Parse adreno_cmdbatch_retired/submitted events
Add custom parsing for kgsl adreno_cmdbatch_retired and adreno_cmdbatch_submitted ftrace events to produce GPU timeline slices in the trace processor. - Tokenizer: adjust timestamps using GPU tick-based sync info from submitted events to correct retired event ordering before sorting. - Parser: create scoped GPU slices per context ID using TrackCompressor, visible as "GPU (Ctx=N)" tracks. - UI: register gpu_adreno_cmdbatch track type so tracks appear in the timeline under HARDWARE / GPU Adreno. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 82980e3 commit 609564a

5 files changed

Lines changed: 96 additions & 0 deletions

File tree

src/trace_processor/importers/ftrace/ftrace_parser.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ using protos::pbzero::perfetto_pbzero_enum_KprobeEvent::KprobeType;
136136
using protozero::ConstBytes;
137137
using protozero::ProtoDecoder;
138138

139+
// Adreno GPU ticks run at 19.2 MHz.
140+
constexpr uint64_t kGpuTicksPerUs = 19200;
141+
142+
uint64_t GpuTicksToNs(uint64_t ticks) {
143+
return ticks * 1000000 / kGpuTicksPerUs;
144+
}
145+
139146
struct FtraceEventAndFieldId {
140147
uint32_t event_id;
141148
uint32_t field_id;
@@ -541,6 +548,7 @@ FtraceParser::FtraceParser(TraceProcessorContext* context,
541548
gpu_power_state_off_id_(context->storage->InternString("OFF")),
542549
gpu_power_state_pg_id_(context->storage->InternString("PG")),
543550
gpu_power_state_on_id_(context->storage->InternString("ON")),
551+
gpu_cmdbatch_slice_name_id_(context->storage->InternString("GPU")),
544552
ddic_underrun_id_(context_->storage->InternString("ddic_underrun")),
545553
memcg_reclaim_order_id_(
546554
context->storage->InternString("memcg_reclaim_order")),
@@ -925,6 +933,10 @@ base::Status FtraceParser::ParseFtraceEvent(uint32_t cpu,
925933
ParseKgslGpuFreq(ts, fld_bytes);
926934
break;
927935
}
936+
case FtraceEvent::kKgslAdrenoCmdbatchRetiredFieldNumber: {
937+
ParseKgslAdrenoCmdbatchRetired(ts, fld_bytes);
938+
break;
939+
}
928940
case FtraceEvent::kCpuIdleFieldNumber: {
929941
ParseCpuIdle(ts, fld_bytes);
930942
break;
@@ -1920,6 +1932,33 @@ void FtraceParser::ParseKgslGpuFreq(int64_t timestamp, ConstBytes blob) {
19201932
context_->event_tracker->PushCounter(timestamp, new_freq, track);
19211933
}
19221934

1935+
void FtraceParser::ParseKgslAdrenoCmdbatchRetired(int64_t timestamp,
1936+
protozero::ConstBytes data) {
1937+
protos::pbzero::KgslAdrenoCmdbatchRetiredFtraceEvent::Decoder evt(data);
1938+
1939+
static constexpr auto kBlueprint = TrackCompressor::SliceBlueprint(
1940+
"gpu_adreno_cmdbatch",
1941+
tracks::DimensionBlueprints(tracks::UintDimensionBlueprint("context_id"),
1942+
tracks::UintDimensionBlueprint("prio")),
1943+
tracks::FnNameBlueprint([](uint32_t context_id, uint32_t prio) {
1944+
return base::StackString<64>("GPU (Ctx=%u, Prio=%u)", context_id, prio);
1945+
}));
1946+
1947+
if (evt.retire() < evt.start()) {
1948+
return;
1949+
}
1950+
const int64_t duration =
1951+
static_cast<int64_t>(GpuTicksToNs(evt.retire() - evt.start()));
1952+
1953+
const uint32_t context_id = evt.id();
1954+
TrackId track_id = context_->track_compressor->InternScoped(
1955+
kBlueprint,
1956+
tracks::Dimensions(context_id, static_cast<uint32_t>(evt.prio())),
1957+
timestamp, duration);
1958+
context_->slice_tracker->Scoped(timestamp, track_id, kNullStringId,
1959+
gpu_cmdbatch_slice_name_id_, duration);
1960+
}
1961+
19231962
void FtraceParser::ParseCpuIdle(int64_t timestamp, ConstBytes blob) {
19241963
protos::pbzero::CpuIdleFtraceEvent::Decoder idle(blob);
19251964
TrackId track = context_->track_tracker->InternTrack(

src/trace_processor/importers/ftrace/ftrace_parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class FtraceParser {
9595
void ParseCpuFreqThrottle(int64_t timestamp, protozero::ConstBytes);
9696
void ParseGpuFreq(int64_t timestamp, protozero::ConstBytes);
9797
void ParseKgslGpuFreq(int64_t timestamp, protozero::ConstBytes);
98+
void ParseKgslAdrenoCmdbatchRetired(int64_t timestamp, protozero::ConstBytes);
9899
void ParseCpuIdle(int64_t timestamp, protozero::ConstBytes);
99100
void ParsePrint(int64_t timestamp, uint32_t pid, protozero::ConstBytes);
100101
void ParseZero(int64_t timestamp, uint32_t pid, protozero::ConstBytes);
@@ -463,6 +464,7 @@ class FtraceParser {
463464
const StringId gpu_power_state_off_id_;
464465
const StringId gpu_power_state_pg_id_;
465466
const StringId gpu_power_state_on_id_;
467+
const StringId gpu_cmdbatch_slice_name_id_;
466468
const StringId ddic_underrun_id_;
467469
std::array<StringId, 8> f2fs_checkpoint_reason_ids_;
468470

src/trace_processor/importers/ftrace/ftrace_tokenizer.cc

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
5252
#include "protos/perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
5353
#include "protos/perfetto/trace/ftrace/fwtp_ftrace.pbzero.h"
54+
#include "protos/perfetto/trace/ftrace/kgsl.pbzero.h"
5455
#include "protos/perfetto/trace/ftrace/power.pbzero.h"
5556
#include "protos/perfetto/trace/ftrace/thermal_exynos.pbzero.h"
5657
#include "src/trace_processor/util/clock_synchronizer.h"
@@ -69,6 +70,13 @@ namespace {
6970

7071
constexpr uint32_t kSequenceScopedClockId = 64;
7172

73+
// Adreno GPU ticks run at 19.2 MHz.
74+
constexpr uint64_t kGpuTicksPerUs = 19200;
75+
76+
uint64_t GpuTicksToNs(uint64_t ticks) {
77+
return ticks * 1000000 / kGpuTicksPerUs;
78+
}
79+
7280
// Fast path for parsing the event id of an ftrace event.
7381
// Speculate on the fact that, if the timestamp was found, the common pid
7482
// will appear immediately after and the event id immediately after that.
@@ -301,6 +309,41 @@ void FtraceTokenizer::TokenizeFtraceEvent(
301309
return;
302310
}
303311

312+
if (event_id ==
313+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchRetiredFieldNumber) {
314+
auto retired_field = GetFtraceEventField(
315+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchRetiredFieldNumber,
316+
event);
317+
if (retired_field.has_value()) {
318+
protos::pbzero::KgslAdrenoCmdbatchRetiredFtraceEvent::Decoder evt(
319+
retired_field->as_bytes());
320+
321+
const uint32_t ts = evt.timestamp();
322+
auto* info = cmdbatch_sync_map_.Find(ts);
323+
if (info && evt.start() >= info->ticks) {
324+
raw_timestamp =
325+
info->sync_time + GpuTicksToNs(evt.start() - info->ticks);
326+
cmdbatch_sync_map_.Erase(ts);
327+
} else if (evt.retire() >= evt.start()) {
328+
raw_timestamp -= GpuTicksToNs(evt.retire() - evt.start());
329+
}
330+
}
331+
} else if (event_id == protos::pbzero::FtraceEvent::
332+
kKgslAdrenoCmdbatchSubmittedFieldNumber) {
333+
auto submitted_field = GetFtraceEventField(
334+
protos::pbzero::FtraceEvent::kKgslAdrenoCmdbatchSubmittedFieldNumber,
335+
event);
336+
if (submitted_field.has_value()) {
337+
protos::pbzero::KgslAdrenoCmdbatchSubmittedFtraceEvent::Decoder evt(
338+
submitted_field->as_bytes());
339+
const uint64_t sync_time_ns =
340+
static_cast<uint64_t>(evt.secs()) * 1000000000ULL +
341+
static_cast<uint64_t>(evt.usecs()) * 1000ULL;
342+
const uint32_t ts = evt.timestamp();
343+
cmdbatch_sync_map_.Insert(ts, {sync_time_ns, evt.ticks()});
344+
}
345+
}
346+
304347
std::optional<int64_t> timestamp = context_->clock_tracker->ToTraceTime(
305348
clock_id, static_cast<int64_t>(raw_timestamp));
306349
// ClockTracker will increment some error stats if it failed to convert the

src/trace_processor/importers/ftrace/ftrace_tokenizer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "perfetto/base/compiler.h"
2626
#include "perfetto/base/logging.h"
2727
#include "perfetto/base/status.h"
28+
#include "perfetto/ext/base/flat_hash_map.h"
2829
#include "perfetto/ext/base/status_or.h"
2930
#include "perfetto/protozero/field.h"
3031
#include "perfetto/trace_processor/ref_counted.h"
@@ -110,6 +111,12 @@ class FtraceTokenizer {
110111

111112
int64_t latest_ftrace_clock_snapshot_ts_ = 0;
112113
std::vector<bool> per_cpu_seen_first_bundle_;
114+
115+
struct CmdbatchSyncInfo {
116+
uint64_t sync_time;
117+
uint64_t ticks;
118+
};
119+
base::FlatHashMap<uint32_t, CmdbatchSyncInfo> cmdbatch_sync_map_;
113120
};
114121

115122
} // namespace perfetto::trace_processor

ui/src/plugins/dev.perfetto.TraceProcessorTrack/slice_tracks.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,11 @@ export const SLICE_TRACK_SCHEMAS: ReadonlyArray<SliceTrackTypeSchema> = [
248248
topLevelGroup: 'HARDWARE',
249249
group: undefined,
250250
},
251+
{
252+
type: 'gpu_adreno_cmdbatch',
253+
topLevelGroup: 'HARDWARE',
254+
group: 'GPU Adreno',
255+
},
251256
{
252257
type: 'triggers',
253258
topLevelGroup: 'SYSTEM',

0 commit comments

Comments
 (0)