A small code snippet implementing a flame graph for DearImgui.
The whole data model + logic (without convenience procedures and debug checks) fits into ~60 LOC.
The UI code itself is another ~50 LOC.
The code snippet is written in Odin and can also be found on GitHub:
package FlameGraph
import "core:slice"
import "core:hash"
import "core:strings"
import "core:mem"
import "core:fmt"
import "core:log"
import win32 "core:sys/windows"
import "../../Libraries/imgui" //adjust accordingly
/* --- Platform timing procedures --- */
GetTicks :: proc() -> i64 {
ticks : win32.LARGE_INTEGER
win32.QueryPerformanceCounter(&ticks)
return i64(ticks)
}
GetTicksPerSecond :: proc() -> i64 {
freq : win32.LARGE_INTEGER
win32.QueryPerformanceFrequency(&freq)
return i64(freq)
}
/* --- Flame graph data structures --- */
Slot :: struct {
Name: string,
Depth: i64,
Start: i64,
End : i64,
}
//The main data structure is just a regular dynamic array. It only ever gets appended to.
//Every slot is followed by its child slots. Iterating the array acts as a pre-order tree traversal.
Graph :: struct {
Slots: [dynamic]Slot,
CurrDepth: i64,
}
SlotHandle :: distinct u64 //Index into the Graph.Slots dynamic array
/* --- Flame graph procedures --- */
SlotIsActive :: proc(slot: Slot) -> bool { return slot.End == 0 }
SlotGetMicroseconds :: proc(slot: Slot) -> f64 { return f64((slot.End - slot.Start) * 1000000) / f64(GetTicksPerSecond()) }
SlotGetMilliseconds :: proc(slot: Slot) -> f64 { return f64((slot.End - slot.Start) * 1000 ) / f64(GetTicksPerSecond()) }
SlotStart :: proc(graph: ^Graph, name: string) -> SlotHandle {
when ODIN_DEBUG {
for slot in graph.Slots {
if SlotIsActive(slot) && slot.Name == name {
log.warn("Slot with name %v already exists at the current depth", name)
return 0
}
}
}
index := i64(len(graph.Slots))
append(&graph.Slots, Slot{ Name = name, Depth = graph.CurrDepth, Start = GetTicks() })
graph.CurrDepth += 1
return SlotHandle(index)
}
//Accessing a slot by handle should in theory be faster (not tested) than accessing
//by name. Using the name is however often more convenient.
//An 'unsafe' version of this method could also be considered with no bounds-checking.
SlotEndByHandle :: proc(graph: ^Graph, handle: SlotHandle) {
end := GetTicks() //Get the end time as soon as possible for more accurate times
if i64(handle) < i64(len(graph.Slots)) {
slot := &graph.Slots[handle]
assert(slot.Depth + 1 == graph.CurrDepth)
slot.End = end
graph.CurrDepth -= 1
}
else {
log.warnf("Invalid handle %v", handle)
}
}
SlotEndByName :: proc(graph: ^Graph, name: string) {
end := GetTicks()
#reverse for &slot in graph.Slots {
if SlotIsActive(slot) && slot.Name == name {
assert(slot.Depth + 1 == graph.CurrDepth)
slot.End = end
graph.CurrDepth -= 1
return
}
}
log.warnf("Slot %v does not exist or is not active", name)
}
SlotEnd :: proc { SlotEndByHandle, SlotEndByName }
/* --- Drawing the graph with DearImgui --- */
Draw :: proc(graph: Graph) {
assert(graph.CurrDepth == 0)
//You'd probably want to add some more colors
colors := [?][3]f32 {
{ 120, 85 , 150 },
{ 216, 63 , 71 },
{ 207, 164, 57 },
};
scale : f32 = 1 //Could be used to implement zooming
totalMs: f32
for slot in graph.Slots {
if slot.Depth == 0 {
totalMs += f32(SlotGetMilliseconds(slot))
}
}
SLOT_HEIGHT : f32 : 20
dl := imgui.GetWindowDrawList()
topLeft := imgui.GetCursorScreenPos()
for slot in graph.Slots {
minMs := f32((slot.Start - graph.Slots[0].Start) * 1000) / f32(GetTicksPerSecond());
maxMs := f32((slot.End - graph.Slots[0].Start) * 1000) / f32(GetTicksPerSecond());
min := [2]f32{ (minMs / totalMs * scale) * imgui.GetContentRegionAvail().x, SLOT_HEIGHT * f32(slot.Depth + 0) };
max := [2]f32{ (maxMs / totalMs * scale) * imgui.GetContentRegionAvail().x, SLOT_HEIGHT * f32(slot.Depth + 1) };
nameAsSlice := slice.from_ptr((^u8)(raw_data(slot.Name)), len(slot.Name))
color := colors[hash.crc32(nameAsSlice) % len(colors)]
imgui.DrawList_AddRectFilled(dl, topLeft + min, topLeft + max, imgui.ColorConvertFloat4ToU32({ color.x, color.y, color.z, 255} / 255))
imgui.DrawList_AddRect (dl, topLeft + min, topLeft + max, 0xFF000000)
nameStart := strings.unsafe_string_to_cstring(slot.Name)
nameEnd := cstring(mem.ptr_offset(raw_data(slot.Name), len(slot.Name)))
textSize := imgui.CalcTextSize(nameStart, nameEnd)
if textSize.x <= max.x - min.x {
pos := topLeft + min
imgui.DrawList_AddText(dl, topLeft + (max - min) * 0.5 + min - textSize * 0.5, 0xFFFFFFFF, nameStart, nameEnd)
}
rect := imgui.Rect{ topLeft + min, max + topLeft }
if imgui.Rect_Contains(&rect, imgui.GetMousePos()) {
tooltip := fmt.tprintf("%v\n%.4vms", slot.Name, SlotGetMilliseconds(slot))
imgui.SetTooltip(strings.clone_to_cstring(tooltip, context.temp_allocator))
}
}
}
//Usage example
ImGuiExample :: proc() {
graph: Graph
defer delete(graph.Slots)
SlotStart(&graph, "Total")
{
SlotStart(&graph, "Update")
{
//Some update code. You can nest Slots as deep as you like
}
SlotEnd(&graph, "Update")
//...
//Some code that is not profiled. It will show up as a gap in the flame graph
}
{
SlotStart(&graph, "Render")
SlotEnd(&graph, "Render")
}
SlotEnd(&graph, "Total")
if imgui.Begin("MyFlameGraph") {
Draw(graph)
}
imgui.End()
}