From 68f6eb8c53c3363643faf5993fe19290298c7462 Mon Sep 17 00:00:00 2001 From: Lukas von Briel <91612110+lvb2000@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:23:37 +0100 Subject: [PATCH] [ext] Add instruction tracing to Orbetto See #10. --- .github/workflows/build.yml | 4 +- .gitignore | 4 + ext/orbetto/meson.build | 15 +- ext/orbetto/metrics/METRICS.md | 68 ++ ext/orbetto/metrics/display.py | 475 ++++++++ ext/orbetto/metrics/elfFileDecode.py | 213 ++++ ext/orbetto/metrics/metrics.py | 236 ++++ ext/orbetto/metrics/metrics.yml | 55 + ext/orbetto/metrics/queries.py | 175 +++ ext/orbetto/src/LRUCache.hpp | 71 ++ ext/orbetto/src/TRACE.md | 169 +++ ext/orbetto/src/device.cpp | 10 + ext/orbetto/src/device.hpp | 15 + ext/orbetto/src/mortrall.hpp | 1220 +++++++++++++++++++++ ext/orbetto/src/orbetto.cpp | 151 ++- ext/orbetto/subprojects/croaring.wrap | 4 + ext/orbetto/subprojects/orbuculum.wrap | 2 +- src/emdbg/bench/data/fmu_v5x_orbtrace.cfg | 2 +- src/emdbg/bench/data/fmu_v6x_orbtrace.cfg | 2 +- src/emdbg/debug/data/cortex_m.gdb | 34 +- src/emdbg/debug/data/orbuculum.gdb | 79 +- 21 files changed, 2962 insertions(+), 42 deletions(-) create mode 100644 ext/orbetto/metrics/METRICS.md create mode 100644 ext/orbetto/metrics/display.py create mode 100644 ext/orbetto/metrics/elfFileDecode.py create mode 100644 ext/orbetto/metrics/metrics.py create mode 100644 ext/orbetto/metrics/metrics.yml create mode 100644 ext/orbetto/metrics/queries.py create mode 100644 ext/orbetto/src/LRUCache.hpp create mode 100644 ext/orbetto/src/TRACE.md create mode 100644 ext/orbetto/src/mortrall.hpp create mode 100644 ext/orbetto/subprojects/croaring.wrap diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 421e1bd..725dca1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - run: sudo apt-get update - - run: sudo apt-get install -y libusb-1.0-0-dev libzmq3-dev meson libsdl2-dev libdwarf-dev libdw-dev libelf-dev libcapstone-dev python3-pip ninja-build protobuf-compiler + - run: sudo apt-get install -y libusb-1.0-0-dev libzmq3-dev meson libsdl2-dev libdwarf-dev libdw-dev libelf-dev libcapstone-dev python3-pip ninja-build protobuf-compiler cmake - run: sudo pip3 install meson==1.2.0 - uses: actions/checkout@v3 - run: meson setup ./build @@ -27,7 +27,7 @@ jobs: name: Build and Test Orbetto on macOS runs-on: macos-13 steps: - - run: brew install zmq sdl2 libelf protobuf meson ninja capstone dwarfutils + - run: brew install zmq sdl2 libelf protobuf meson ninja capstone dwarfutils cmake - uses: actions/checkout@v3 - run: meson setup ./build working-directory: ext/orbetto diff --git a/.gitignore b/.gitignore index 7066030..e46c628 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ dist/ /ext/orbetto/subprojects/orbuculum /ext/orbetto/subprojects/libdwarf* /ext/orbetto/subprojects/packagecache +/ext/orbetto/subprojects/CRoaring /ext/orbetto/*.perf /ext/orbetto/*.gz /ext/orbetto/*.debug +/ext/orbetto/*.zip +/ext/orbetto/*.roar +/ext/orbetto/trace.swo \ No newline at end of file diff --git a/ext/orbetto/meson.build b/ext/orbetto/meson.build index 822c021..19693a9 100644 --- a/ext/orbetto/meson.build +++ b/ext/orbetto/meson.build @@ -15,6 +15,17 @@ perfbuf_sources = cpp_gen.process(proto_sources, preserve_path_from: meson.curre # Compile and link the protobuf source files perfbuf_lib = static_library('perfbuf', sources: perfbuf_sources, dependencies: [dependency('protobuf')]) +# Specify the include directory for Roaring Bitmap +cmake = import('cmake') +rm = cmake.subproject('croaring') +# Fetch the dependency object +message('CMake targets:\n - ' + '\n - '.join(rm.target_list())) +rm_dep1 = rm.dependency('roaring-headers-cpp') +rm_dep2 = rm.dependency('roaring-headers') +rm_dep = rm.dependency('roaring') +rm_inc = rm.get_variable('roaring_headers_cpp_inc') + rm.get_variable('roaring_headers_inc') + + # Clone the orbuculum git repository orbuculum = subproject('orbuculum') liborb = orbuculum.get_variable('liborb') @@ -24,8 +35,8 @@ git_version_info_h = orbuculum.get_variable('git_version_info_h') # Compile and link everything together executable('orbetto', sources: ['src/orbetto.cpp', 'src/device.cpp', git_version_info_h], - include_directories: ['src'] + perfbuf_lib.private_dir_include() + orbinc, - dependencies: [dependency('protobuf')] + orbuculum.get_variable('dependencies'), + include_directories: ['src'] + perfbuf_lib.private_dir_include() + orbinc + rm_inc, + dependencies: [dependency('protobuf')] + orbuculum.get_variable('dependencies') + [rm_dep] + [rm_dep1] + [rm_dep2], link_with: [liborb, perfbuf_lib], install: true, ) diff --git a/ext/orbetto/metrics/METRICS.md b/ext/orbetto/metrics/METRICS.md new file mode 100644 index 0000000..42a7006 --- /dev/null +++ b/ext/orbetto/metrics/METRICS.md @@ -0,0 +1,68 @@ +# Trace Metrics with Plotly in Dash + +As explained in [TRACE.md](../src/TRACE.md), Perfetto is a very useful tool for +displaying trace data, but is limited in data size and display variability. +Therefore, as a proof of concept, some metrics are calculated in Python. We use +the Protobuf file generated by Orbetto, the SQL trace processor from Perfetto +and a program counter bitmap generated by CRoaring in Orbetto. + + +## Visualising metrics + +To start the local dash server you need to pass a Protobuf file when analyzing +ITM data or a bitmap for code coverage data. Depending on which argument you +pass, different metrics can be displayed. Use `-h` to see all options. If you +are using the `-diff` flag, you will need to provide a second Protobuf file to +display the metrics based on the difference between the between the two files. +This is very useful for comparing different versions or settings between two +runs. + +```sh +# cd orbetto/metrics +# two metrics +python3 metrics.py -f /path/to/orbetto.perf -wq -dwq +# difference between two traces on one metric +python3 metrics.py -f /path/to/orbetto_1.perf -f2 /path/to/orbetto_1.perf -hp -diff +# Code coverage for pc bitmap (beta not completely accurate) +python3 metrics.py -bm /path/to/bitmap.roar -elf /path/to/elf_file.elf -cc +``` + +Once executed, copy the URL into your browser and use the plotly interface to +display the desired plot. + +> [!NOTE] +> Some metrics take a while to calculate (mainly code coverage). + + +## Advanced metrics and goal + +There are already some simple statistical techniques included that can detect +outliers, but the bigger picture of this work is to do regression testing and +fuzzing with statistics derived from the instruction trace. For example: + +- [x] Regularity of sensor readings. +- [ ] Comm link throughputs. +- [x] Scheduling latency vs timeouts. +- [ ] Thread progress vs semaphores. +- [x] Callstack changes (code coverage). + +All these metrics need to be combined into a large latent space that can be +diffed (a kind of distance measure) between different versions of PX4 to detect +bugs in the code before it flies. + + +## GCOV + +There has been a quick investigation to use a more advanced and also tested +tool. The most common is gcov, which is a test coverage program from GCC. Two +files are required for gcov to display code coverage: + +- `.gcno`, which is an advanced source file you get when compiling with the + GCC `-ftest-coverage` option. +- `.gcda`, which is created when the GCC `-fprofile-arcs` option is run. + +It should be simple to get the .gcno file as the flag is already a compile +option in PX4. Theoretically it should be possible to reverse engineer +the `.gcda` file just from the PC values, ([documentation can be found here] +(https://github.com/gcc-mirror/gcc/blob/master/gcc/gcov-io.h)) However, it is +unclear how long this will take and how robust it will be. diff --git a/ext/orbetto/metrics/display.py b/ext/orbetto/metrics/display.py new file mode 100644 index 0000000..a4bd1aa --- /dev/null +++ b/ext/orbetto/metrics/display.py @@ -0,0 +1,475 @@ +import plotly.graph_objects as go +import plotly.express as px +import numpy as np +import math as m +from dash import Dash, dcc, html, callback, Output, Input +import plotly.figure_factory as ff +from plotly.subplots import make_subplots +from queries import * +from elfFileDecode import * + +app = Dash(__name__) + +widgets = [] +active_thread = None +df_functions = None +df_heap = {} +heap_title = None +hist_data = {} +df_detailed_thread_state = None +df_code_coverage = None +code_coverage_file = None + +def _find_outliers(thread_intervals,thread_ids): + outlier_id = [] + for function_intervals,ids in zip(thread_intervals,thread_ids): + # get mean + mean = np.mean(function_intervals) + # get std + std = np.std(function_intervals) + # get outlier indexes + idx = np.where(np.abs(function_intervals - mean) > 3 * std)[0] + # get ids of outliers + outlier_id.extend(ids[idx].tolist()) + if len(outlier_id) > 0: + return 'Outliers with Slice IDs: ' + ', '.join(map(lambda x:str(int(x)), outlier_id)) + else: + return "No outliers found." + +def _add_button_for_norm_switch(fig): + # Create and add button + fig.update_layout( + updatemenus=[ + dict( + buttons=list([ + dict(label="Percentage", + method="update", + args=[{'histnorm': 'percent'}] + ), + dict(label="Count", + method="update", + args=[{'histnorm': 'count'}] + ), + ]), + direction="down", + pad={"r": 10, "t": 10}, + showactive=True, + x=0, + xanchor="left", + y=1.2, + yanchor="top" + ), + ] + ) + +def _add_slider_for_bin_size(fig,arr,title,unit): + # Create and add slider + num_steps = 100 + min_value = min(arr) + max_value = max(arr) + diff = max_value - min_value + steps = [] + for i in range(num_steps): + bin_size = diff * ((i+1)/100)**2 + step = dict( + method="update", + args=[{'xbins.size': bin_size},{'xbins.start': 0}], # layout attribute + label = f"{bin_size:.2f} {unit}" + ) + # step["args"][0]["visible"][i] = True # Toggle i'th trace to "visible" + steps.append(step) + + sliders = [dict( + active=10, + currentvalue={"prefix": f"{title} "}, + steps=steps + )] + + fig.update_layout( + sliders=sliders + ) + +def _add_dropdown_menu_bar_chart(fig): + # Add dropdown + fig.update_layout( + updatemenus=[ + dict( + buttons=list([ + dict(label="Percentage", + method="update", + args=[{"visible": [True,False,True,False]},{'yaxis': {'title': 'Total CPU Time (%)'}}] + ), + dict(label="Time Values", + method="update", + args=[{"visible": [False,True,False,True]},{'yaxis': {'title': 'Total CPU Time (ns)'}}] + ), + ]), + direction="down", + pad={"r": 10, "t": 10}, + showactive=True, + x=0, + xanchor="left", + y=1.2, + yanchor="top" + ), + ] + ) + +def bar_chart(df1,df2,x,y,z): + widgets.append(html.H1("Thread CPU Time Overview",style={'textAlign': 'center'})) + fig = go.Figure() + fig.add_trace(go.Bar(x=df1[x], y=df1[y],visible=True,name='Running')) + fig.add_trace(go.Bar(x=df1[x], y=df1[z],visible=False,name='Running')) + fig.add_trace(go.Bar(x=df2[x], y=df2[y],visible=True,name='Runnable')) + fig.add_trace(go.Bar(x=df2[x], y=df2[z],visible=False,name='Runnable')) + fig.update_layout( + xaxis_title='Thread Name', + yaxis_title='Total CPU Time (%)', + # change angle of xAxis labels + xaxis_tickangle=45, + legend_title_text='Thread State' + ) + _add_dropdown_menu_bar_chart(fig) + widgets.append(dcc.Graph(figure=fig)) + +def _add_dropdown_menu_threads(thread_list, type,title, multi,initial_value=[]): + widgets.append(html.H4("Select Threads:",style={'margin-bottom': '10px'})) + dropdown = dcc.Dropdown(id='thread-select'+ '-' + type + '-' + title, + options=thread_list, + multi= multi, + clearable=True, + value=initial_value, + style={'width': '900px'}) + widgets.append(dropdown) + +def _add_dropdown_menu_functions(id,multi): + widgets.append(html.H4("Select Function:",style={'margin-bottom': '10px','margin-left': '20px'})) + dropdown = dcc.Dropdown(id=id, + options=[], + multi= multi, + clearable=True, + value=[], + style={'margin-left': '10px','width': '500px'}) + widgets.append(dropdown) + + +def custom_data_bar_chart(df): + global df_functions + df_functions = df + widgets.append(html.H1("Detailed Function Overview", style={'textAlign': 'center'})) + _add_dropdown_menu_threads(df['thread_name'].unique(),'function',False) + _add_dropdown_menu_functions('function-select',multi=True) + widgets.append(dcc.Graph(id='thread-graph',figure={})) + +def _add_dropdown_menu_reg_function(function_list): + widgets.append(html.H4("Select Function:",style={'margin-bottom': '10px'})) + dropdown = dcc.Dropdown(id='reg-function-select', + options=function_list, + multi= False, + clearable=True, + value=[]) + widgets.append(dropdown) + # include button for reciprocal with increase size + widgets.append(html.Button('Toggle Reciprocal', id='toggle-reciprocal', n_clicks=0,style={'font-size': '15px','margin-top': '10px'})) + +def histogram(dict): + global hist_data + hist_data = dict + widgets.append(html.H1("Regularity Function Overview",style={'textAlign': 'center'})) + _add_dropdown_menu_reg_function(list(dict.keys())) + widgets.append(dcc.Graph(id='regularity-graph',figure={})) + widgets.append(html.H3("PDF",style={'textAlign': 'center','margin-bottom': '10px'})) + widgets.append(dcc.Graph(id='normal-dist-graph',figure={})) + widgets.append(html.H5(f"No outliers found.",id='outliers')) + +def _add_dropdown_menu_threads_runtime(thread_list): + widgets.append(html.H4("Select Thread:",style={'margin-bottom': '10px'})) + dropdown = dcc.Dropdown(id='thread-overview-select', + options=thread_list, + multi= False, + clearable=True, + value=[], + style={'margin-left': '10px','width': '500px'}) + widgets.append(dropdown) + +def cpu_waiting_time_pie_chart(df): + global df_detailed_thread_state + df_detailed_thread_state = df + widgets.append(html.H1("Thread CPU Time Detailed",style={'textAlign': 'center'})) + _add_dropdown_menu_threads_runtime(df['thread_name'].unique()) + widgets.append(html.Button('Toggle Normalization', id='toggle-normalization', n_clicks=0,style={'font-size': '15px','margin-top': '10px'})) + widgets.append(dcc.Graph(id='aggr-thread-overview-graph',figure={})) + widgets.append(dcc.Graph(id='thread-overview-graph-runnable',figure={})) + widgets.append(dcc.Graph(id='thread-overview-graph-running',figure={})) + widgets.append(dcc.Graph(id='thread-overview-graph-sleeping',figure={})) + +def compute_heap_sum(df): + return df.groupby('thread_name').agg({'allocation_size': 'sum'}).reset_index() + +def heap_pie_chart(df,title): + widgets.append(html.H1("Heap Profile"+ " - (" + title + ')',style ={'textAlign': 'center'})) + df_aggr = compute_heap_sum(df) + df_aggr['allocation_size'] = df_aggr['allocation_size'].abs() + if df_aggr['allocation_size'].sum() == 0: + widgets.append(html.H3("Traces do not show memory differences when matched.",style={'textAlign': 'center'})) + return + else: + widgets.append(dcc.Graph(figure=pie_chart(df_aggr,'thread_name','allocation_size','Total Heap Usage per Thread'))) + + +def heap_counter(df,title): + global df_heap + df_heap[title] = df + _add_dropdown_menu_threads(df['thread_name'].unique(),type='heap',title=title,multi=True) + widgets.append(dcc.Graph(id='heap-graph' + '-' + title,figure={})) + +def display_code_coverage(df): + global df_code_coverage + df_code_coverage = df + widgets.append(html.H1('Code Coverage',style ={'textAlign': 'center'})) + _add_dropdown_menu_threads(df['File'].unique(),type='code-coverage',title='file-select',multi=False) + _add_dropdown_menu_functions('function-select-code-coverage',multi=False) + #widgets.append(html.Pre(id='highlighted-code', style={'overflow': 'auto', 'height': '500px'})) + widgets.append(html.Div(id='highlighted-code', style={ + 'overflow': 'auto', + 'height': '500px', + 'border': '1px solid black', + 'font-family': 'monospace', + 'white-space': 'pre-wrap', # Preserve whitespace + 'background-color': '#f0f0f0', # Optional: light gray background for better visibility + })) + +def show(debug): + app.layout = html.Div(widgets,style={ + 'display': 'flex', + 'flexDirection': 'column', + 'alignItems': 'center', + 'justifyContent': 'center', + }) + app.run_server(debug=debug) + +#---------------------- Dash CallBacks ----------------------# +@callback( + Output('function-select', 'options'), + Input('thread-select-function', 'value'), +) +def update_function_callback(thread): + if thread is None: + return [] + global active_thread + active_thread = str(thread) + return df_functions[df_functions['thread_name'] == active_thread]['slice_name'].unique().tolist() + +@callback( + Output('thread-graph', 'figure'), + Input('function-select', 'value'), +) +def update_counter_function(selected_functions): + fig = go.Figure() + if active_thread is None: + return fig + df = df_functions[df_functions['thread_name'] == active_thread] + for function in selected_functions: + x_axis = np.sort(np.array(df[df['slice_name'] == function]['tss'].iloc[0].split(',')).astype(int)) / 1e6 + count = df[df['slice_name'] == function]['count'].iloc[0] + y_axis = [i for i in range(1,count + 1)] + fig.add_trace(go.Scatter(x=x_axis, + y=y_axis, + mode='lines+markers', + name=function)) + fig.update_layout( + xaxis_title='Time [ms]', + yaxis_title='Appearence', + ) + return fig + +@callback( + Output('regularity-graph', 'figure'), + Output('normal-dist-graph', 'figure'), + Output('outliers', 'children'), + Input('reg-function-select', 'value'), + Input('toggle-reciprocal', 'n_clicks'), +) +def update_reg_graph(selected_function,reciprocal): + fig = go.Figure() + if selected_function is None or len(selected_function) == 0: + return fig,fig,"No outliers found." + title = 'Interval [ms]' + unit = 'ms' + if reciprocal % 2 == 1: + title = 'Frequency [kHz]' + unit = 'kHz' + np_hist = np.array(hist_data[selected_function]) + + threads = np.unique(np_hist[:,2]) + thread_names = [] + thread_ids = [] + thread_intervals = [] + for thread in threads: + function_intervals = np_hist[np_hist[:,2] == thread][:,0].tolist() + # remove None values + function_intervals = [x for x in function_intervals if x is not None] + function_intervals = np.divide(function_intervals,1e6) + if reciprocal % 2 == 1: + function_intervals = np.reciprocal(function_intervals) + if len(function_intervals) >= 2: + thread_intervals.append(function_intervals) + thread_names.append(thread) + thread_ids.append(np_hist[np_hist[:,2] == thread][:,1]) + fig.add_trace( + go.Histogram( + x=function_intervals, + histnorm='percent', + name=thread, + ) + ) + fig.update_layout( + xaxis_title=title, + ) + _add_slider_for_bin_size(fig,function_intervals,title,unit) + _add_button_for_norm_switch(fig) + normal_dist_fig = ff.create_distplot(thread_intervals,group_labels=thread_names,bin_size=.1) + normal_dist_fig.update_layout( + xaxis_title=title, + yaxis_title='Probability Density', + ) + outliers = _find_outliers(thread_intervals,thread_ids) + return fig,normal_dist_fig,outliers + +@callback( + Output('aggr-thread-overview-graph', 'figure'), + Output('thread-overview-graph-running', 'figure'), + Output('thread-overview-graph-runnable', 'figure'), + Output('thread-overview-graph-sleeping', 'figure'), + Input('thread-overview-select', 'value'), + Input('toggle-normalization', 'n_clicks') +) +def update_thread_overview(selected_thread,n_clicks): + fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]]) + fig2 = go.Figure() + fig3 = go.Figure() + fig4 = go.Figure() + if selected_thread is None: + return fig,fig2,fig3,fig4 + df = df_detailed_thread_state[df_detailed_thread_state['thread_name'] == str(selected_thread)] + if len(df) == 0: + return fig,fig2,fig3,fig4 + df['runnable_interval'] = df['runnable_interval']/1e6 + df['running_interval'] = df['running_interval']/1e6 + df['sleeping_interval'] = df['sleeping_interval']/1e6 + df_aggr = df.groupby('slice_name').agg({'runnable_interval': 'sum', 'running_interval': 'sum', 'sleeping_interval': 'sum'}).reset_index() + title1 = 'Runnable' + title2 = 'Running' + title3 = 'Sleeping' + if n_clicks % 2 == 1: + df_aggr = df.groupby('slice_name').agg({'runnable_interval': 'mean', 'running_interval': 'mean', 'sleeping_interval': 'mean'}).reset_index() + title1 = 'Average Runnable' + title2 = 'Average Running' + title3 = 'Average Sleeping' + fig.add_trace(go.Pie(labels=df_aggr['slice_name'], + values=df_aggr['runnable_interval'], + title=title1), + row = 1, col = 1) + fig.add_trace(go.Pie(labels=df_aggr['slice_name'], + values=df_aggr['running_interval'], + title=title2), + row = 1, col = 2 ) + fig.add_trace(go.Pie(labels=df_aggr['slice_name'], + values=df_aggr['sleeping_interval'], + title=title3), + row = 1, col = 3 ) + fig2 = px.histogram(df, x='runnable_interval',color = 'slice_name', marginal="rug", hover_data=['slice_id','slice_name'], title='Runnable') + fig2.update_layout( + xaxis_title='Duration [ms]' + ) + fig3 = px.histogram(df, x='running_interval',color = 'slice_name', marginal="rug", hover_data=['slice_id','slice_name'], title='Running') + fig3.update_layout( + xaxis_title='Duration [ms]' + ) + fig4 = px.histogram(df, x='sleeping_interval',color = 'slice_name', marginal="rug", hover_data=['slice_id','slice_name'], title='Sleeping') + fig4.update_layout( + xaxis_title='Duration [ms]' + ) + return fig,fig2,fig3,fig4 + +@callback( + Output('heap-graph-counter', 'figure'), + Input('thread-select-heap-counter', 'value'), +) +def update_counter_heap(selected_threads): + return counter_bar_chart(df_heap['counter'],selected_threads,'thread_name','allocation_size','ts','Heap Counter [Bytes]') + +@callback( + Output('heap-graph-counter_matched', 'figure'), + Input('thread-select-heap-counter_matched', 'value'), +) +def update_counter_heap(selected_threads): + return counter_bar_chart(df_heap['counter_matched'],selected_threads,'thread_name','allocation_size','ts','Heap Counter [Bytes]') + +@callback( + Output('function-select-code-coverage', 'options'), + Input('thread-select-code-coverage-file-select', 'value'), +) +def update_code_coverage_function_select(selected_file): + global code_coverage_file + if selected_file is None or len(selected_file) == 0: + print(selected_file) + return ['empty'] + code_coverage_file = selected_file + return df_code_coverage[df_code_coverage['File'] == selected_file]['Function Name'].unique().tolist() + +@callback( + Output('highlighted-code', 'children'), + Input('function-select-code-coverage', 'value'), +) +def update_code_coverage_highlighted_code(selected_function): + if selected_function is None or len(selected_function) == 0: + return "" + source_code = get_function_info(bytes(code_coverage_file, 'utf-8'), bytes(selected_function, 'utf-8')) + if len(source_code) == 0: + return f"Function {selected_function} not found in {code_coverage_file}." + return html.Div(children=[html.Div(children=source_code, style={'white-space': 'pre-wrap'})]) + +#---------------------- General Plots ----------------------# + +def counter_bar_chart(df,selection,category,value,ts,title): + fig = go.Figure() + if selection is None: + return fig + max_ts = df[ts].max() + for s in selection: + df_s = df[df[category] == s] + df_s = df_s.sort_values(by=[ts]) + x_axis = np.array(df_s[ts]) / 1e6 + y_axis = df_s[value].cumsum().to_numpy() + if(x_axis != [] and x_axis[-1] < max_ts): + x_axis = np.append(x_axis,max_ts / 1e6) + y_axis = np.append(y_axis,y_axis[-1]) + fig.add_trace(go.Scatter(x=x_axis, + y=y_axis, + mode='lines+markers', + name=s)) + fig.update_layout( + xaxis_title='Time [ms]', + yaxis_title=title, + height=700, + width=1400 + ) + return fig + +def pie_chart(df,category,value,title): + fig = go.Figure() + fig.add_trace(go.Pie(labels = df[category], + values = df[value], + title=title + ) + ) + fig.update_layout( + height=800, + width=800, + ) + fig.update_traces( + textposition='inside', + automargin=True + ) + return fig diff --git a/ext/orbetto/metrics/elfFileDecode.py b/ext/orbetto/metrics/elfFileDecode.py new file mode 100644 index 0000000..6d43015 --- /dev/null +++ b/ext/orbetto/metrics/elfFileDecode.py @@ -0,0 +1,213 @@ +from elftools.common.utils import bytes2str +from elftools.dwarf.descriptions import describe_form_class +from elftools.elf.elffile import ELFFile +import pandas as pd +from pyroaring import BitMap +import numpy as np +from dash import html + +GREEN = "\033[92m" # Green color code +RESET = "\033[0m" # Reset color code + +elf_path = None +roar_path = None +bitmap = None + +# Documentation for dwarf classes in pyelftools can be found here: +# https://github.com/eliben/pyelftools/tree/main/elftools/dwarf + +def set_elffile_and_bitmap(elf_path_input, roar_path_input): + global elf_path + global roar_path + elf_path = elf_path_input + roar_path = roar_path_input + +def get_all_function_names(): + if elf_path is None: + print("Please set the elf file") + return + + print('Processing file:', elf_path) + with open(elf_path, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # Return Pandas DataFrame with all function information + df = pd.DataFrame(columns=['File','Function Name']) + + #count = 0 + for CU in dwarfinfo.iter_CUs(): + #if count > 100: + # break + #count += 1 + TOP_DIE = CU.get_top_DIE() + if ".cpp" in bytes2str(TOP_DIE.attributes['DW_AT_name'].value): + path = TOP_DIE.attributes['DW_AT_name'].value + else: + path = TOP_DIE.attributes['DW_AT_comp_dir'].value + bytes('/', 'utf-8') + TOP_DIE.attributes['DW_AT_name'].value + for DIE in CU.iter_DIEs(): + # check for function tag + if DIE.tag == 'DW_TAG_subprogram': + try: + if DIE.attributes['DW_AT_inline'].value == 3: + continue + except KeyError: + pass + try: + DIE.attributes['DW_AT_low_pc'].value + DIE.attributes['DW_AT_high_pc'].value + except KeyError: + continue + # check for function name + try : + if DIE.attributes['DW_AT_name'] is not None: + df = pd.concat([df, pd.DataFrame({'File': [bytes2str(path)], 'Function Name': [bytes2str(DIE.attributes['DW_AT_name'].value)]})]) + except KeyError: + continue + return df + + +def get_function_info(function_path, function_name): + + if elf_path is None: + print("Please set the elf file") + return + + with open(elf_path, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # Return Pandas DataFrame with all function information + source_code = decode_funcname( dwarfinfo, function_path, function_name) + + return source_code + + +def decode_funcname(dwarfinfo, function_path, function_name): + source_code = [] + for CU in dwarfinfo.iter_CUs(): + try: + TOP_DIE = CU.get_top_DIE() + if ".cpp" in bytes2str(TOP_DIE.attributes['DW_AT_name'].value): + path = TOP_DIE.attributes['DW_AT_name'].value + else: + path = TOP_DIE.attributes['DW_AT_comp_dir'].value + bytes('/', 'utf-8') + TOP_DIE.attributes['DW_AT_name'].value + if(path == function_path): + for DIE in CU.iter_DIEs(): + # check for function tag + if DIE.tag == 'DW_TAG_subprogram': + # check for function name + try : + DIE.attributes['DW_AT_name'].value + except KeyError: + continue + if DIE.attributes['DW_AT_name'].value == function_name: + low_line, high_line, l, a, inline = get_max_min_line(CU, dwarfinfo) + if low_line is not None and high_line is not None: + source_code = get_code_lines(function_path, low_line, high_line) + lines = np.arange(low_line, high_line) + for i,line in enumerate(lines): + idx = np.where(np.isin(l, line)) + if len(idx[0]) > 0 and len(source_code) > i: + # check if address is in bitmap + if a[idx[0][0]] in bitmap: + print(f"Line {line} with Address {hex(a[idx[0][0]])} is covered") + source_code[i] = html.Mark(source_code[i], style={'background-color': "rgba(0, 255, 0, 0.1)"}) + else: + source_code[i] = html.Mark(source_code[i], style={'background-color': "rgba(255, 0, 0, 0.1)"}) + except KeyError: + continue + return source_code + +def get_max_min_line(CU, dwarfinfo): + lineprog = dwarfinfo.line_program_for_CU(CU) + max = 0 + min = 100000 + prev_addr = 0 + lines = [] + addresses = [] + inline = [] + for entry in lineprog.get_entries(): + if entry.state is None: + continue + if prev_addr != entry.state.address: + prev_addr = entry.state.address + if entry.state.line not in lines: + lines.append(entry.state.line) + addresses.append(entry.state.address) + if entry.state.file != 1: + inline.append(True) + else: + inline.append(False) + if entry.state.line > max: + max = entry.state.line + if entry.state.line < min: + min = entry.state.line + if max == 0 or min == 100000: + return None, None, None, None, None + + return min, max, lines, addresses, inline + +def get_code_lines(file_name, low_line, high_line): + try: + with open(file_name, 'r') as file: + lines = file.readlines() + return lines[low_line - 1:high_line] + except FileNotFoundError: + return f"File {file_name} not found." + except IndexError: + return f"Lines {low_line}:{high_line} out of range in {file_name}." + +def read_roar_file(): + """ + Read a Roaring bitmap from a .roar file, assuming the file contains + serialized 8-bit unsigned integers. + """ + values = [] + with open(roar_path, 'rb') as f: + # Read the entire file content as bytes + data = f.read() + + # Ensure that the length of the data is not empty + if len(data) == 0: + raise ValueError("File is empty. No data to read.") + + # Each byte is an 8-bit unsigned integer + for byte in data: + values.append(byte) # Append each byte (0-255) to the list + + return values + +def init_bitmap(): + global bitmap + #Load values from the .roar file + values_from_file = read_roar_file() + + # Create a BitMap instance and update it with the values + bm = BitMap() + bm = bm.deserialize(bytes(values_from_file)) + + bitmap = bm.to_array() + +def format_code_coverage(df): + # Function to apply styling without loop + def highlight_green(column): + # Create a Series where True values get 'background-color: green' and False gets '' + return ['background-color: green' if v else '' for v in df['Highlight']] + + # Apply the style + return df.style.apply(highlight_green, subset=['Code']) diff --git a/ext/orbetto/metrics/metrics.py b/ext/orbetto/metrics/metrics.py new file mode 100644 index 0000000..7d4e3e8 --- /dev/null +++ b/ext/orbetto/metrics/metrics.py @@ -0,0 +1,236 @@ +import pandas as pd +from queries import * +from display import * +from elfFileDecode import * +import argparse + +def init_argparse(): + parser = argparse.ArgumentParser(description="Visualize trace metrics") + parser.add_argument('-f','--file', + help='Path to protobuf trace file', + type = str, + required = False) + parser.add_argument('-f2','--file2', + help='Path to second protobuf trace file for diffs', + type = str, + required = False) + parser.add_argument('-wq','--work_queue_overview', + help='Display work queue overview. (Runnable/Running)', + action='store_true', + default = False) + parser.add_argument('-dwq','--detailed_work_queue', + help='Display detailed work queue overview. (Runnable/Running)', + action='store_true', + default = False) + parser.add_argument('-fp','--function_pairs', + help='Display function pairs', + action='store_true', + default = False) + parser.add_argument('-fr','--function_regularity', + help='Display function regularity', + action='store_true', + default = False) + parser.add_argument('-dma','--dma_throughputs', + help='Display DMA throughput', + action='store_true', + default = False) + parser.add_argument('-sd','--semaphore_deadlocks', + help='Display semaphore deadlocks', + action='store_true', + default = False) + parser.add_argument('-db','--debug', + help='Display debug information', + action='store_true', + default = False) + parser.add_argument('-hp', "--heap_profile", + help='Display heap profile', + action='store_true', + default = False) + parser.add_argument('-cc',"--code_coverage", + help='Display code coverage', + action='store_true', + default = False) + parser.add_argument('-bm',"--bitmap", + help='Path to bitmap file which is needed for code coverage', + type = str, + required = False) + parser.add_argument('-elf',"--elf_file", + help='Path to elf file which is needed for code coverage', + type = str, + required = False) + + parser.add_argument('-diff', "--difference", + help='Switch all other options to display differences between two trace files', + action='store_true', + default = False) + + return parser.parse_args() + + + +def cpu_time(): + cpu_running = get_all_cpu_time('Running') + cpu_runnable = get_all_cpu_time('R') + total_cpu_running_time = cpu_running['cpu_time'].sum() + cpu_running['percentage'] = cpu_running['cpu_time']/total_cpu_running_time * 100 + total_cpu_runnable_time = cpu_runnable['cpu_time'].sum() + cpu_runnable['percentage'] = cpu_runnable['cpu_time']/total_cpu_runnable_time * 100 + bar_chart(cpu_running,cpu_runnable,'thread_name','percentage','cpu_time') + +def cpu_waiting_time(): + df = get_detailed_thread_state_perfetto() + cpu_waiting_time_pie_chart(df) + +def function_distribution(): + functions = get_function_distribution() + custom_data_bar_chart(functions) + +def regularity(): + df = get_function_intervals() + df['merged'] = df.apply(lambda row: (row['interval'], row['next_slice_id'],row['thread_name']), axis=1) + dict = df.groupby('slice_name')['merged'].apply(list).to_dict() + histogram(dict) + +def dma_throughputs(): + pass + +def semaphore_deadlocks(): + pass + +def extract_malloc(df): + df_malloc = df[df['allocation_type']=='malloc'] + df_malloc['allocation_info'] = df_malloc['slice_name'].apply(lambda x: x.split('[')[1][:-1]) + df_malloc['allocation_pointer'] = df_malloc['allocation_info'].apply(lambda x: x.split(',')[0]) + df_malloc['allocation_size'] = df_malloc['allocation_info'].apply(lambda x: int(x.split(',')[1])) + return df_malloc + +def extract_free(df): + df_free = df[df['allocation_type']=='free'] + df_free['allocation_pointer'] = df_free['slice_name'].str.extract(r'\((.*?)\)') + df_free['allocation_size'] = df_free['slice_name'].apply(lambda x: -int(x.split('(')[2][:-1])) + return df_free + + +def match_heap_pointers(df1,df2): + # copy + df_malloc = df1.copy() + df_free = df2.copy() + # convert to int + df_malloc['ts'] = df_malloc['ts'].astype(int) + df_free['ts'] = df_free['ts'].astype(int) + # reset index + df_malloc.reset_index(drop=True, inplace=True) + df_free.reset_index(drop=True, inplace=True) + for index, row in df_malloc.iterrows(): + # Filter rows that match the conditions + future_frees = df_free[(df_free['allocation_pointer'] == row['allocation_pointer']) & + (df_free['ts'] >= row['ts'])] + if not future_frees.empty: + df_malloc.drop(index, inplace=True) + df_free.drop(future_frees['ts'].idxmin(), inplace=True) + assert(df_free.empty) + return df_malloc + + +def extract_heap_profile(df): + if df.empty: + print("No heap profile data found") + return None + df['allocation_type'] = df['slice_name'].apply(lambda x: x.split('(')[0]) + df_malloc = extract_malloc(df) + df_free = extract_free(df) + return pd.concat([df_malloc,df_free]), match_heap_pointers(df_malloc,df_free) + +def heap_profile(): + df = get_heap_profile() + df, df_matched = extract_heap_profile(df) + if(df is not None): + heap_pie_chart(df, title="absolute count") + heap_counter(df, title="counter") + heap_pie_chart(df_matched , title="absolute count matched") + heap_counter(df_matched, title="counter_matched") + + +def diff_heap_profile(df1,df2): + max_ts = min(df1['ts'].max(),df2['ts'].max()) + df1 = df1[df1['ts'] <= max_ts] + df2 = df2[df2['ts'] <= max_ts] + df2['allocation_size'] = df2['allocation_size'] * -1 + return pd.concat([df1,df2]) + +def heap_profile_diff(): + df1 = get_heap_profile() + df1, df1_matched = extract_heap_profile(df1) + df2 = get_heap_profile2() + df2, df2_matched = extract_heap_profile(df2) + if(df1 is not None and df2 is not None): + df = diff_heap_profile(df1,df2) + df_matched = diff_heap_profile(df1_matched,df2_matched) + heap_pie_chart(df, title="absolute count") + heap_counter(df, title="counter") + heap_pie_chart(df_matched, title="absolute count matched") + heap_counter(df_matched, title="counter_matched") + + +def code_coverage(elf, path): + set_elffile_and_bitmap(elf, path) + init_bitmap() + df = get_all_function_names() + display_code_coverage(df) + + +def code_coverage_diff(): + pass + +if __name__ == "__main__": + args = init_argparse() + init_trace_processor(args.file) + if args.difference: + assert(args.file2) + init_trace_processor2(args.file2) + if args.work_queue_overview: + if args.difference: + pass + else: + cpu_time() + if args.detailed_work_queue: + if args.difference: + pass + else: + cpu_waiting_time() + if args.function_pairs: + if args.difference: + pass + else: + function_distribution() + if args.function_regularity: + if args.difference: + pass + else: + regularity() + if args.dma_throughputs: + if args.difference: + pass + else: + dma_throughputs() + if args.semaphore_deadlocks: + if args.difference: + pass + else: + semaphore_deadlocks() + if args.heap_profile: + if args.difference: + heap_profile_diff() + else: + heap_profile() + if args.code_coverage: + if args.bitmap and args.elf_file: + if args.difference: + pass + else: + code_coverage(args.elf_file, args.bitmap) + else: + print("Please provide a bitmap file") + show(args.debug) + + diff --git a/ext/orbetto/metrics/metrics.yml b/ext/orbetto/metrics/metrics.yml new file mode 100644 index 0000000..d04dd20 --- /dev/null +++ b/ext/orbetto/metrics/metrics.yml @@ -0,0 +1,55 @@ +name: metrics +channels: + - defaults + - conda-forge +dependencies: + - bzip2=1.0.8=h80987f9_6 + - ca-certificates=2024.9.24=hca03da5_0 + - libffi=3.4.4=hca03da5_1 + - ncurses=6.4=h313beb8_0 + - openssl=3.0.15=h80987f9_0 + - pip=24.2=py311hca03da5_0 + - python=3.11.10=hb885b13_0 + - readline=8.2=h1a28f6b_0 + - setuptools=75.1.0=py311hca03da5_0 + - sqlite=3.45.3=h80987f9_0 + - tk=8.6.14=h6ba3021_0 + - wheel=0.44.0=py311hca03da5_0 + - xz=5.4.6=h80987f9_1 + - zlib=1.2.13=h18a0788_1 + - pip: + - blinker==1.8.2 + - certifi==2024.8.30 + - charset-normalizer==3.4.0 + - click==8.1.7 + - dash==2.18.1 + - dash-core-components==2.0.0 + - dash-html-components==2.0.0 + - dash-table==5.0.0 + - flask==3.0.3 + - idna==3.10 + - importlib-metadata==8.5.0 + - itsdangerous==2.2.0 + - jinja2==3.1.4 + - markupsafe==3.0.2 + - nest-asyncio==1.6.0 + - numpy==2.1.2 + - packaging==24.1 + - pandas==2.2.3 + - perfetto==0.11.0 + - plotly==5.24.1 + - protobuf==5.28.3 + - pyelftools==0.31 + - pyroaring==1.0.0 + - python-dateutil==2.9.0.post0 + - pytz==2024.2 + - requests==2.32.3 + - retrying==1.3.4 + - six==1.16.0 + - tenacity==9.0.0 + - typing-extensions==4.12.2 + - tzdata==2024.2 + - urllib3==2.2.3 + - werkzeug==3.0.6 + - zipp==3.20.2 +prefix: /Users/lukasvonbriel/anaconda3/envs/metrics diff --git a/ext/orbetto/metrics/queries.py b/ext/orbetto/metrics/queries.py new file mode 100644 index 0000000..775b21f --- /dev/null +++ b/ext/orbetto/metrics/queries.py @@ -0,0 +1,175 @@ +from perfetto.trace_processor import TraceProcessor + +tp = None +tp2 = None + +def init_trace_processor(trace): + global tp + tp = TraceProcessor(trace=trace) + print(tp) + +def init_trace_processor2(trace): + global tp2 + tp2 = TraceProcessor(trace=trace) + +def display_all_available_tables(): + # Display all the available columns in the trace + return tp.query("SELECT name FROM sqlite_master WHERE type='table';").as_pandas_dataframe() + +def get_all_cpu_time(state): + query= f""" + DROP VIEW IF EXISTS cpu_time; + CREATE VIEW cpu_time AS + SELECT + dur, + thread.name as thread_name + FROM thread_state + JOIN thread USING (utid) + WHERE state = '{state}'; + + SELECT * + FROM ( + SELECT + SUM(dur) AS cpu_time, + thread_name + FROM cpu_time + GROUP BY thread_name + ) AS subquery + ORDER BY cpu_time DESC; + """ + return tp.query(query).as_pandas_dataframe() + + +def get_function_distribution(): + query = """ + DROP VIEW IF EXISTS slice_with_thread_names; + CREATE VIEW slice_with_thread_names AS + SELECT + ts, + dur, + tid, + slice.name as slice_name, + slice.id as slice_id,utid, + thread.name as thread_name + FROM slice + JOIN thread_track ON thread_track.id = slice.track_id + JOIN thread USING (utid); + + SELECT * + FROM ( + SELECT + SUM(dur) AS cpu_time, + GROUP_CONCAT(DISTINCT ts) AS tss, + tid, + slice_name, + thread_name, + COUNT(*) AS count + FROM slice_with_thread_names + GROUP BY slice_name, tid + ) AS subquery + WHERE tid > 100000 + ORDER BY cpu_time DESC; + """ + return tp.query(query).as_pandas_dataframe() + +def get_function_intervals(): + query = """ + DROP VIEW IF EXISTS slice_with_thread_names; + CREATE VIEW slice_with_thread_names AS + SELECT + ts, + dur, + tid, + slice.name as slice_name, + slice.id as slice_id,utid, + thread.name as thread_name + FROM slice + JOIN thread_track ON thread_track.id = slice.track_id + JOIN thread USING (utid); + + SELECT * + FROM( + SELECT + ts, + dur, + LEAD(ts) OVER (PARTITION BY slice_name ORDER BY ts) AS next_ts, + LEAD(ts) OVER (PARTITION BY slice_name ORDER BY ts) - (ts + dur) AS interval, + slice_name, + LEAD(slice_id) OVER (PARTITION BY slice_name ORDER BY ts) AS next_slice_id, + thread_name + FROM ( + SELECT + ts - 35696370 AS ts, + dur, + tid, + slice_name, + slice_id, + thread_name + FROM slice_with_thread_names + ORDER BY ts + ) AS subquery1 + WHERE tid > 100000 + ORDER BY slice_name, ts + )AS subquery2; + """ + return tp.query(query).as_pandas_dataframe() + +heap_query =""" + DROP VIEW IF EXISTS mallocs; + CREATE VIEW mallocs AS + SELECT + slice.ts, + slice.id as slice_id, + slice.name as slice_name, + thread.name as thread_name + FROM slice + JOIN thread_track ON thread_track.id = slice.track_id + JOIN thread USING (utid) + WHERE (slice.name LIKE '%malloc%' AND slice.name LIKE '%[%') OR (slice.name LIKE '%free%' AND slice.name LIKE '%<-%'); + + SELECT * + FROM mallocs + ORDER BY ts; + """ + +def get_heap_profile(): + return tp.query(heap_query).as_pandas_dataframe() + +def get_heap_profile2(): + return tp2.query(heap_query).as_pandas_dataframe() + +def get_detailed_thread_state_perfetto(): + + query = f""" + DROP VIEW IF EXISTS workqueue_slices; + CREATE VIEW workqueue_slices AS + SELECT + slice.ts as ts, + slice.dur as dur, + slice.id as slice_id, + slice.name as slice_name, + thread.name as thread_name, + thread.utid as utid + FROM slice + JOIN thread_track ON thread_track.id = slice.track_id + JOIN thread USING (utid) + WHERE thread.tid < 10000; + + DROP TABLE IF EXISTS slice_thread_state_breakdown; + CREATE VIRTUAL TABLE slice_thread_state_breakdown + USING SPAN_LEFT_JOIN( + workqueue_slices PARTITIONED utid, + thread_state PARTITIONED utid + ); + + SELECT + thread_name, + slice_name, + slice_id, + SUM(CASE WHEN state = 'Running' THEN dur ELSE 0 END) as running_interval, + SUM(CASE WHEN state = 'R' THEN dur ELSE 0 END) as runnable_interval, + SUM(CASE WHEN state = 'S' THEN dur ELSE 0 END) as sleeping_interval + FROM slice_thread_state_breakdown + GROUP BY slice_id; + """ + return tp.query(query).as_pandas_dataframe() \ No newline at end of file diff --git a/ext/orbetto/src/LRUCache.hpp b/ext/orbetto/src/LRUCache.hpp new file mode 100644 index 0000000..bc7ab14 --- /dev/null +++ b/ext/orbetto/src/LRUCache.hpp @@ -0,0 +1,71 @@ +/* + * File: lrucache.hpp + * Author: Alexander Ponomarev + * + * Created on June 20, 2013, 5:09 PM + */ + +#ifndef _LRUCACHE_HPP_INCLUDED_ +#define _LRUCACHE_HPP_INCLUDED_ + +#include +#include +#include +#include + +namespace cache { + +template +class lru_cache { +public: + typedef typename std::pair key_value_pair_t; + typedef typename std::list::iterator list_iterator_t; + + lru_cache(size_t max_size) : + _max_size(max_size) { + } + + void put(const key_t& key, const value_t& value) { + auto it = _cache_items_map.find(key); + _cache_items_list.push_front(key_value_pair_t(key, value)); + if (it != _cache_items_map.end()) { + _cache_items_list.erase(it->second); + _cache_items_map.erase(it); + } + _cache_items_map[key] = _cache_items_list.begin(); + + if (_cache_items_map.size() > _max_size) { + auto last = _cache_items_list.end(); + last--; + _cache_items_map.erase(last->first); + _cache_items_list.pop_back(); + } + } + + const value_t& get(const key_t& key) { + auto it = _cache_items_map.find(key); + if (it == _cache_items_map.end()) { + throw std::range_error("There is no such key in cache"); + } else { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return it->second->second; + } + } + + bool exists(const key_t& key) const { + return _cache_items_map.find(key) != _cache_items_map.end(); + } + + size_t size() const { + return _cache_items_map.size(); + } + +private: + std::list _cache_items_list; + std::unordered_map _cache_items_map; + size_t _max_size; +}; + +} // namespace cache + +#endif /* _LRUCACHE_HPP_INCLUDED_ */ \ No newline at end of file diff --git a/ext/orbetto/src/TRACE.md b/ext/orbetto/src/TRACE.md new file mode 100644 index 0000000..61b6fd9 --- /dev/null +++ b/ext/orbetto/src/TRACE.md @@ -0,0 +1,169 @@ +# Mortrall: NuttX instrumentation with Perfetto and Instruction Trace + +We can trace the NuttX scheduler at half CPU speed by streaming the ETM debug +trace over the parallel TRACE pins and capturing it to a file. + +The Mortrall class decodes the ETM packets and generates a CallStack that can be +visualized with Perfetto. This includes thread switching and exception +detection in order to be able to follow the code execution on the corresponding +PX4 FMUv5x or FMUv6x. + +![](https://github.com/niklaut/orbetto-support-files/blob/main/perfetto_callstack.png) + +The structure of the code is based on the mortem module from orbuculum, but has +been heavily modified to suit our needs. There have also been many fixes in +loadelf and traceDecoder_etm4. + + +## Capture the trace + +Use OrbTrace to enable the TRACE pin output on the FMU. + +> [!TIP] +> Data accumulates very fast. If you want to see the whole CallStack in +> Perfetto Trace, trace less than a second. + + +### FMUv5x + +Start GDB in your PX4 Autopilot source directory: + +```sh +# cd PX4-Autopilot +python3 -m emdbg.bench.fmu --target px4_fmu-v5x --orbtrace +``` + +Reset the target and start the capture: + +``` +(gdb) px4_etm_trace_tpiu_swo_stm32f7 +(gdb) Continue +``` + + +### FMUv6x + +Start GDB in your PX4 Autopilot source directory: + +```sh +# cd PX4-Autopilot +python3 -m emdbg.bench.fmu --target px4_fmu-v6x --orbtrace +``` + +Reset the target and start the capture. You might need to power cycle the target: + +``` +(gdb) px4_etm_trace_tpiu_swo_stm32h7 +(gdb) Continue +``` + + +### Visualization + +To convert the `trace.swo` binary to Perfetto format call Orbetto: + +```sh +# cd embedded-debug-tools/ext/orbetto +build/orbetto -f path/to/trace.swo -e path/to/fmu.elf -b path/to/fmu_bootloader.elf -t 1 +# Use -v 3 to show advanced debugging data that also prints instructions +# Use -a to set a cycle count threshold at which verbose printing starts +``` + +If the `trace.swo` files contain instruction data, Mortrall is automatically +initialized and and creates a CallStack in the Protobuf file. You should now +have an `orbetto.perf` file that you can drag into the +[Perfetto user interface](https://ui.perfetto.dev). + + +## Limitations + +There are a few things you need to be aware of when using this tool. + + +### Hardware + +It is important to know that there are two main configurations when tracing +instructions. There is implicit and explicit tracing, which can be set in the +`cortex_m.gdb` file (implicit = `startETM`; explicit = `startETM 1`). With +implicit tracing, the minimum amount of trace data is sent. This mainly refers +to the number of address packets, which basically transmit the current PC +value (there are different formats). With implicit tracing, address packets are +only sent if there is no other way to follow a branch. With explicit tracing, +the address is sent after each branch, resulting in a more robust trace that +can be theoretically be analyzed from any point. + + +#### FMUv5x + +Unfortunately, the trace pinouts of the chip are multipurpose. It is very +important to remove the 3 LEDs from the data lines. Their non-linear +(I-V) characteristics makes it impossible to trace high frequency instruction +data. However, if the LEDs are removed, the resulting stubs can cause unwanted +reflections, which compromises signal integrity. In addition, the trace lines +are not matched in length. In particular, the `nARMED` pin is also connected to +one of the trace lines, making it three times the length of all the other +traces. We have roughly estimated that these problems are not too bad as long +as you stay under 100MHz when tracing. + +The following signals have been observed with an oscilloscope and a clock +frequency of 50Mhz after removing the LEDs: + +![](https://github.com/niklaut/orbetto-support-files/blob/main/SCR17.BMP) + +Since the STM32F7 has only a very small trace buffer (no ETF), explicit tracing +only works if you increase the CPU prescaler in `board.h` to 8 and also disable +the instruction and data cache to further slow down the CPU. Implicit tracing +instead only needs the increased prescaler, but still has a few (~50) overflows +when tracing. This probably only happens when there are a lot of thread +switches during PX4 initialization, which causes a burst in address and branch +packets. + + +#### FMUv6x + +With FMUv6x (STM32H7), the Skynode hardware issues remain much the same. + +However, we now have a 4kByte ETF (Embedded Trace FIFO) inside the STM, which +buffers the trace data during bursts. In addition, the CPU and trace clock +sources have different prescalers so that they can be set separately. The trace +clock must have a prescaler of 16 to stay within the hardware signal integrity +limits. For the CPU clock we need at least a prescaler of 4 (=half max CPU +speed) to get a correct call stack with a small amount of overflow. With a +prescaler of 8 there will be no overflows. + +> [!NOTE] +> FMUv6x has only been tested with implicit traces. + + +### Perfetto + +Due to memory limitations in the Perfetto UI (both local and web interface), +only a very short trace can be visualized (~25MB of trace data results in +~300MB of Protobuf). + +If you already know which thread you want to monitor, this problem can be solved +by displaying only the corresponding thread as it is very unlikely that you +need to see the full CallStack. This is not implemented yet, but can easily be +added. + +As mentioned above, the decoding process in Mortrall makes it possible to trace +every executed instruction and jump precisely. However, both the Python trace +processor and the UI have a memory limitation that makes it impossible to use +this information further (regression testing, fuzzing, etc.) within the +Protobuf format. To work around this problem for now, an additional library +called CRoaring has been added. This library encodes all executed program +counter values into a bitmap. It also has a Python interface for further +analysis of this data. A description of how to use this can be found in +[METRICS.md](../metrics/METRICS.md). + +> [!NOTE] +> Unfortunately, the Perfetto batch processor does not help in this case, as +> it only allows parallel decoding of multiple Protobuf files (ie. many +> different traces) and not with a very large one. + + +### NuttX + +The computation of the CallStack is tightly coupled to the way NuttX handles +thread switching. This means that it is very likely that this will not work for +any other RTOS without modifications. diff --git a/ext/orbetto/src/device.cpp b/ext/orbetto/src/device.cpp index 2e9ad7a..3559ebd 100644 --- a/ext/orbetto/src/device.cpp +++ b/ext/orbetto/src/device.cpp @@ -308,6 +308,7 @@ Device::Device(std::string_view hint) // _register_table = ®isters_stm32f765; _max_irq = 16+109; _clock = 216'000'000; + _id = DeviceId::SYKNODE_V5X; } else if (hint.find("stm32h753") != std::string_view::npos or hint.find("v6x") != std::string_view::npos) @@ -316,5 +317,14 @@ Device::Device(std::string_view hint) _register_table = ®isters_stm32h753; _max_irq = 16+149; _clock = 480'000'000; + _id = DeviceId::SYKNODE_V6X; + } + // this was used in a pure NuttX build as ETM test + else if (hint.find("nuttx") != std::string_view::npos) + { + _irq_table = &irq_names_stm32f765; + _max_irq = 16+109; + _clock = 48'000'000; + _id = DeviceId::SYKNODE_V5X; } } diff --git a/ext/orbetto/src/device.hpp b/ext/orbetto/src/device.hpp index a592c46..fbcd2d3 100644 --- a/ext/orbetto/src/device.hpp +++ b/ext/orbetto/src/device.hpp @@ -7,6 +7,14 @@ #include #include +// enum for skynode device id +enum DeviceId +{ + SYKNODE_V5X = 0, + SYKNODE_V6X = 1, + SKYNODE_S = 2, +}; + class Device { public: @@ -49,9 +57,16 @@ class Device clock() const { return _clock; } + /// Provides the device id. + int + id() const + { return (int)_id; } + + private: const IrqTable *_irq_table{nullptr}; const RegisterTable *_register_table{nullptr}; uint32_t _clock{0}; int16_t _max_irq{0}; + DeviceId _id; // device id: 0 }; diff --git a/ext/orbetto/src/mortrall.hpp b/ext/orbetto/src/mortrall.hpp new file mode 100644 index 0000000..5f2b9ce --- /dev/null +++ b/ext/orbetto/src/mortrall.hpp @@ -0,0 +1,1220 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "git_version_info.h" +#include "generics.h" +#include "nw.h" +#include "traceDecoder.h" +#include "tpiuDecoder.h" +#include "loadelf.h" +#include "sio.h" +#include "stream.h" + +#include "LRUCache.hpp" +#include "roaring.hh" + +using namespace roaring; + +//--------------------------------------------------------------------------------------// +//--------------------------------- BEGIN REGION Defines -------------------------------// +//--------------------------------------------------------------------------------------// + +#define INTERVAL_TIME_MS (1000) /* Intervaltime between acculumator resets */ +#define HANG_TIME_MS (200) /* Time without a packet after which we dump the buffer */ +#define TICK_TIME_MS (100) /* Time intervals for screen updates and keypress check */ +#define STACK_BUFFER_SIZE (4096) /* Size of the stack buffer */ +#define SCRATCH_STRING_LEN (65535) /* Max length for a string under construction */ + +#define MAX_CALL_STACK (30) +#define DEFAULT_PM_BUFLEN_K (32) +#define MAX_BUFFER_SIZE (100) + +#define CACHE_SIZE (10000) + +// struct for caching of capstone dissasembly +struct capstoneCache +{ + char *assembly; + enum instructionClass ic; + uint32_t newaddr; +}; + +/* Materials required to be maintained across callbacks for output construction */ +struct opConstruct +{ + uint32_t currentFileindex; /* The filename we're currently in */ + struct symbolFunctionStore *currentFunctionptr; /* The function we're currently in */ + uint32_t currentLine; /* The line we're currently in */ + uint32_t workingAddr; /* The address we're currently in */ +}; + +struct CallStack +{ + symbolMemaddr stack[MAX_CALL_STACK]; /* Stack of calls */ + int stackDepth{-1}; /* Current stack depth */ + int perfettoStackDepth{-1}; /* Stack depth at which perfetto is currently */ +}; + +struct RunTime +{ + enum TRACEprotocol protocol; /* Encoding protocol to use */ + struct TRACEDecoder i; + + struct symbol *s; /* Currently used elf */ + struct symbol *_s; /* Symbols read from elf */ + struct symbol *sb; /* Symbols read from bootloader elf*/ + + bool bootloader; /* Set if we are still using bootloader elf */ + + struct opConstruct op; /* Materials required to be maintained across callbacks for output construction */ + + bool traceRunning; /* Set if we are currently receiving trace */ + uint32_t context; /* Context we are currently working under */ + + bool committed{true}; /* Set if we have committed to the current jump */ + bool resentStackDel; /* Possibility to remove an entry from the stack, if address not given */ + + bool resentStackSwitch{false}; /* Call stack has been switched in last iteration */ + bool exceptionEntry{false}; /* Set if we are currently in an exception entry */ + int exceptionId{0}; /* Exception ID */ + bool exceptionActive{false}; /* Set if we are currently in an exception */ + uint32_t returnAddress{0}; /* Return address for exception */ + + uint16_t instruction_count{0}; /* Instruction count for precise timing between cycle count packets (Assume all instructions take equally long) */ + + CallStack *callStack; /* Pointer to current active call stack */ + CallStack exceptionCallStack; /* Separate call stack for exceptions (all exceptions need to convert to depth 0; nested exceptions not implemented)*/ + CallStack bootloaderCallStack; /* Call stack for bootloader */ +}; + +struct CallStackBuffer +{ + unsigned int lastCycleCount{0}; /* Last cycle count */ + perfetto::protos::FtraceEvent *proto_buffer[MAX_BUFFER_SIZE]; /* Buffer for protobuf */ + uint16_t instruction_counts[MAX_BUFFER_SIZE]; /* Instruction count */ + uint64_t global_interpolations[MAX_BUFFER_SIZE]; /* Global timestamps */ + int proto_buffer_index{0}; /* Index for the buffer */ +}csb; + +/* Cache to store capstone disassembly for fast access */ +cache::lru_cache cache_lru(CACHE_SIZE); + +//--------------------------------------------------------------------------------------// +//---------------------------------- END REGION Defines --------------------------------// +//--------------------------------------------------------------------------------------// + +class Mortrall +{ + public: + /* Perfetto trace elements */ + static inline perfetto::protos::Trace *perfetto_trace; + static inline perfetto::protos::FtraceEventBundle *ftrace; + static constexpr uint32_t PID_CALLSTACK = 400000; + static constexpr uint32_t PID_BOOTLOADER = 401000; + static constexpr uint32_t PID_EXCEPTION = 500000; + static constexpr uint32_t PID_PC = 600000; + static inline uint32_t activeCallStackThread; + static inline std::unordered_map exception_names; // Store interrupt names to give each a unique perfetto thread + static inline Roaring r1; + + /* Data Struct to store information about the current decoding process (used in orbmortem) */ + static inline RunTime *r; + + /* Callstack elements */ + static inline uint16_t tid; // current running thread id + static inline uint16_t pending_tid; // next thread id (gets set after a thread switch pattern has been detected) + static inline std::deque thread_switches; // thread switches from software Pre-Pump + static inline int thread_switches_size = 0; // size of thread switches + static inline struct symbolFunctionStore *top_thread_func; // current running function of the current thread + static inline std::map callstacks; // CallStack map to store the callStacks of the different threads + + /* Callback function to update timestamp in ITM trace */ + static inline std::function update_itm_timestamp; // callback function to update timestamp in ITM trace + static inline std::function switch_itm_symbols; // callback function to switch symbols in ITM trace (elf file) + + /* Mortrall flags and Parameters */ + static inline bool initialized = false; // Initialization flag + static inline bool pending_thread_switch; // Pending thread switch flag + static inline enum verbLevel verbose; // Verbosity level + static inline uint64_t cps; // Clocks per second of used cpu + static inline uint64_t cycleCountThreshold; // cycle count threshold from which debugging information is output + + /* Default Constructor */ + constexpr Mortrall() + { + ; + } + + /* Initialization */ + void inline init(perfetto::protos::Trace *perfetto_trace,perfetto::protos::FtraceEventBundle *ftrace,uint64_t cps,enum verbLevel v, struct symbol *s,struct symbol *sb, std::function update_itm_timestamp_input,std::function switch_itm_symbols, uint64_t ccth) + { + /* Set all parameters and functions */ + Mortrall::perfetto_trace = perfetto_trace; + Mortrall::ftrace = ftrace; + Mortrall::cps = cps; + Mortrall::r = new RunTime(); + Mortrall::r->_s = s; + Mortrall::r->sb = sb; + Mortrall::callstacks[0] = CallStack(); + Mortrall::r->exceptionCallStack = CallStack(); + Mortrall::r->bootloaderCallStack = CallStack(); + /* Check whether Bootloader is included or not */ + if(sb) + { + r->bootloader = true; + r->s = sb; + Mortrall::r->callStack = &r->bootloaderCallStack; + Mortrall::activeCallStackThread = PID_BOOTLOADER; + }else + { + r->bootloader = false; + r->s = s; + Mortrall::r->callStack = &Mortrall::callstacks[0]; + Mortrall::activeCallStackThread = PID_CALLSTACK; + } + Mortrall::pending_thread_switch = false; + Mortrall::top_thread_func = NULL; + Mortrall::update_itm_timestamp = update_itm_timestamp_input; + Mortrall::switch_itm_symbols = switch_itm_symbols; + if (v) + { + Mortrall::verbose = v; + }else + { + Mortrall::verbose = V_ERROR; + } + Mortrall::cycleCountThreshold = ccth; + Mortrall::_init(); + Mortrall::initialized = true; + /* Report successful initialization */ + _traceReport( V_DEBUG, "Mortrall initialized" EOL); + } + + static void inline dumpElement(char element) + { + /* Input function for decoding instruction trace packets, must be called from outside */ + if(Mortrall::initialized) + { + uint8_t byte = (uint8_t)element; + TRACEDecoderPump( &r->i, &byte, 1, _traceCB, r ); + } + } + + void inline finalize(auto *process_tree) + { + if(Mortrall::initialized) + { + /* Commit all changes */ + Mortrall::r->committed = true; + /* flush remaining buffer entries */ + Mortrall::_flush_proto_buffer(); + } + /* Initialize the protobuf categories */ + Mortrall::_init_protobuf(process_tree); + /* Print Debug Information */ + struct TRACECPUState *cpu = TRACECPUState( &Mortrall::r->i ); + /* The number of overflows is one metrics that represents the quality of the trace */ + /* If there are many overflows try implicit tracing */ + printf("Overflows: %llu - %llu\n",cpu->overflows,cpu->ASyncs); + if (cpu->overflows > 0) + { + printf("Warning: When tracing implicit, it is very likely overflows corrupt the Call Stack.\n"); + } + printf("PC bitmap cardinality = %llu\n", r1.cardinality()); + // Serialize the bitmap to a file + // Step 1: Get the size of the serialized bitmap + size_t size_in_bytes = r1.getSizeInBytes(); + + // Step 2: Create a buffer (vector of char) to hold the serialized bitmap + std::vector buffer(size_in_bytes); + + // Step 3: Serialize the bitmap into the buffer + r1.write(buffer.data()); + + // Step 4: Write the buffer to a binary file + std::ofstream out("bitmap.roar", std::ios::binary); + out.write(buffer.data(), buffer.size()); + out.close(); + printf("PC bitmap serialized to 'bitmap.roar'\n"); + delete Mortrall::r; + } + + void inline add_thread_switch(uint16_t tid) + { + /* Callback for itm decoding */ + Mortrall::thread_switches.push_back(tid); + thread_switches_size++; + } + + private: + +//--------------------------------------------------------------------------------------// +//-------------------------------- BEGIN REGION Callback -------------------------------// +//--------------------------------------------------------------------------------------// + // global variables for callStack handling in callback + static inline bool revertStack = false; + static inline bool inconsistent = false; + // Callback function if an "interesting" element has been received + static void inline _traceCB( void *d ) + /* Callback function for when valid TRACE decode is detected */ + { + Mortrall::r = ( RunTime * )d; + struct TRACECPUState *cpu = TRACECPUState( &Mortrall::r->i ); + uint32_t incAddr = 0; + uint32_t disposition; + uint32_t targetAddr = 0; /* Just to avoid unitialised variable warning */ + bool linearRun = false; + enum instructionClass ic; + symbolMemaddr newaddr; + + + /* Check for Cycle Count update to reset instruction count*/ + if (TRACEStateChanged( &Mortrall::r->i, EV_CH_CYCLECOUNT) ) + { + // Mortrall::_generate_protobuf_cycle_counts(); + Mortrall::_flush_proto_buffer(); + Mortrall::r->instruction_count = 0; + Mortrall::update_itm_timestamp(cpu->cycleCount); + _traceReport( V_DEBUG, "Cc: %lu\n",cpu->cycleCount ); + } + + + /* 2: Deal with exception entry */ + /* ============================ */ + if ( TRACEStateChanged( &Mortrall::r->i, EV_CH_EX_ENTRY ) ) + { + switch ( Mortrall::r->protocol ) + { + case TRACE_PROT_ETM4: + + /* For the ETM4 case we get a new address with the exception indication. This address is the preferred _return_ address, */ + /* there will be a further address packet, which is the jump destination, along shortly. Note that _this_ address */ + /* change indication will be consumed here, and won't hit the test below (which is correct behaviour. */ + if ( !TRACEStateChanged( &Mortrall::r->i, EV_CH_ADDRESS ) ) + { + _traceReport( V_DEBUG, "Exception occured without return address specification" ); + } + else + { + /* Sometimes an invalid exception address is transmitted. When this happens do not use it as preferred return address. */ + /* It seems like this can happen when there just has been a branch instruction before the exception and it is not clear if the jump will be executed*/ + /* An invalid address starts has hex: 0xf...*/ + /* Later I found out this a specific bug for px4v5x with too high clock frequency which causes packets to be corrupted. */ + /* With a perfect setup the else statement should never be executed.*/ + if (cpu->addr < 0xf0000000) + { + _appendToOPBuffer( Mortrall::r, NULL, Mortrall::r->op.currentLine, LT_EVENT, "========== Exception Entry (%d (%s) at 0x%08x return to 0x%08x ) ==========", + cpu->exception, TRACEExceptionName( cpu->exception ), Mortrall::r->op.workingAddr, cpu->addr ); + Mortrall::r->returnAddress = cpu->addr; + revertStack = (cpu->addr != Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth]); + } + else + { + _appendToOPBuffer( Mortrall::r, NULL, Mortrall::r->op.currentLine, LT_EVENT, "========== Exception Entry (%d (%s) at 0x%08x with invalid return address (0x%08x) ) ==========", + cpu->exception, TRACEExceptionName( cpu->exception ), Mortrall::r->op.workingAddr, cpu->addr ); + Mortrall::r->returnAddress = Mortrall::r->op.workingAddr; + } + Mortrall::r->exceptionEntry = true; + Mortrall::r->exceptionId = cpu->exception; + } + break; + + default: + _traceReport( V_DEBUG, "Unrecognised trace protocol in exception handler" ); + break; + } + } + + + /* 3: Collect flow affecting changes introduced by this event */ + /* ========================================================== */ + if ( TRACEStateChanged( &Mortrall::r->i, EV_CH_ADDRESS ) ) + { + /* Make debug report if calculated and reported addresses differ. This is most useful for testing when exhaustive */ + /* address reporting is switched on. It will give 'false positives' for uncalculable instructions (e.g. bx lr) but */ + /* it's a decent safety net to be sure the jump decoder is working correctly. */ + if ( Mortrall::r->protocol != TRACE_PROT_MTB ) + { + inconsistent = ( Mortrall::r->op.workingAddr != cpu->addr ) && !r->resentStackSwitch; + _traceReport( V_DEBUG, "%sCommanded CPU Address change (Was:0x%08x Commanded:0x%08x)" EOL, + ( !inconsistent || Mortrall::r->exceptionEntry) ? "" : "***INCONSISTENT*** ", Mortrall::r->op.workingAddr, cpu->addr ); + _revertStackDel(revertStack,inconsistent); + _catchInconsistencies(inconsistent,cpu->addr); + Mortrall::r->committed = true; // This is needed to generate only protobuff entries when the jump was commited + r->resentStackSwitch = false; // Stack switch has been processed + revertStack = false; // Reset revertStack + } + /* As every Exception Packet is followed by an Address Packet we handle the exception here to keep track of thread switches */ + _handleExceptionEntry(); + /* After it is clear to what location the jump happened add the current function to the top of the stack and update in protobuf */ + _addTopToStack(Mortrall::r,cpu->addr); + /* Update the ProtoBuf entries */ + _generate_protobuf_entries_single(cpu->addr); + /* Report the Stack for debugging */ + _stackReport(Mortrall::r); + /* Whatever the state was, this is an explicit setting of an address, so we need to respect it */ + Mortrall::r->op.workingAddr = cpu->addr; // Update working address from addr packet + Mortrall::r->exceptionEntry = false; // Reset exception entry flag + } + + if ( TRACEStateChanged( &Mortrall::r->i, EV_CH_LINEAR ) ) + { + /* MTB-Specific mechanism: Execute instructions from the marked starting location to the indicated finishing one */ + /* Disposition is all 1's because every instruction is executed. */ + Mortrall::r->op.workingAddr = cpu->addr; + targetAddr = cpu->toAddr; + linearRun = true; + disposition = 0xffffffff; + _traceReport( V_DEBUG, "Linear run 0x%08x to 0x%08x" EOL, cpu->addr, cpu->toAddr ); + } + + if ( TRACEStateChanged( &Mortrall::r->i, EV_CH_ENATOMS ) ) + { + /* Atoms represent instruction steps...some of which will have been executed, some stepped over. The number of steps is the */ + /* total of the eatoms (executed) and natoms (not executed) and the disposition bitfield shows if each individual instruction */ + /* was executed or not. For ETM3 each 'run' of instructions is a single instruction with the disposition bit telling you if */ + /* it was executed or not. For ETM4 each 'run' of instructions is from the current address to the next possible change of */ + /* program flow (and which point the disposition bit tells you if that jump was taken or not). */ + incAddr = cpu->eatoms + cpu->natoms; + disposition = cpu->disposition; + } + + /* 4: Execute the flow instructions */ + /* ================================ */ + while ( ( incAddr && !linearRun ) || ( ( Mortrall::r->op.workingAddr <= targetAddr ) && linearRun ) ) + { + /* Firstly, lets get the source code line...*/ + struct symbolLineStore *l = symbolLineAt( Mortrall::r->s, Mortrall::r->op.workingAddr ); + struct symbolFunctionStore *func = symbolFunctionAt( Mortrall::r->s, Mortrall::r->op.workingAddr ); + + /* To be able to detect thread switches mortrall needs to be combined with itm tracing */ + /* Check whether itm detected a thread switch */ + _detect_thread_switch_pattern(func); + + /* To always have the correct running/top function on the stack update after every instruction */ + _addTopToStack(Mortrall::r,Mortrall::r->op.workingAddr); + /* Then print this update in perfetto */ + _generate_protobuf_entries_single(Mortrall::r->op.workingAddr); + + // New Atom Packets has been received therefore any stack changes are definitive + Mortrall::r->resentStackDel = false; // Stack delete has been processed + + if ( func ) + { + /* There is a valid function tag recognised here. If it's a change highlight it in the output. */ + if ( ( func->filename != Mortrall::r->op.currentFileindex ) || ( func != Mortrall::r->op.currentFunctionptr ) ) + { + _appendToOPBuffer( Mortrall::r, l, Mortrall::r->op.currentLine, LT_FILE, "%s::%s", symbolGetFilename( Mortrall::r->s, func->filename ), func->funcname ); + Mortrall::r->op.currentFileindex = func->filename; + Mortrall::r->op.currentFunctionptr = func; + Mortrall::r->op.currentLine = NO_LINE; + } + } + else + { + /* We didn't find a valid function, but we might have some information to work with.... */ + if ( ( NO_FILE != Mortrall::r->op.currentFileindex ) || ( NULL != Mortrall::r->op.currentFunctionptr ) ) + { + _appendToOPBuffer( Mortrall::r, l, Mortrall::r->op.currentLine, LT_FILE, "Unknown function" ); + Mortrall::r->op.currentFileindex = NO_FILE; + Mortrall::r->op.currentFunctionptr = NULL; + Mortrall::r->op.currentLine = NO_LINE; + } + } + + /* If we have changed line then output the new one */ + if ( l && ( ( l->startline != Mortrall::r->op.currentLine ) ) ) + { + Mortrall::r->op.currentLine = l->startline; + } + + /* This part caches the function symbolDisassembleLine with a lru-cache, which improves runtime heavily */ + char *a = NULL; + if(cache_lru.exists(Mortrall::r->op.workingAddr)) + { + auto val = cache_lru.get(Mortrall::r->op.workingAddr); + a = (char *)malloc(strlen(val.assembly) + 1); + strcpy(a,val.assembly); + ic = val.ic; + newaddr = val.newaddr; + } + else + { + /* Use capstone to disassemble current instruction and assign instructions types to ic */ + a = symbolDisassembleLine( Mortrall::r->s, &ic, Mortrall::r->op.workingAddr, &newaddr); + if (a) + { + struct capstoneCache *cs_element = new capstoneCache(); + cs_element->assembly = (char *)malloc(strlen(a) + 1); + strcpy(cs_element->assembly,a); + cs_element->ic = ic; + cs_element->newaddr = newaddr; + cache_lru.put(Mortrall::r->op.workingAddr, *cs_element); + } + } + + if ( a ) + { + _add_pc(Mortrall::r->op.workingAddr); + /* Calculate if this instruction was executed. This is slightly hairy depending on which protocol we're using; */ + /* * ETM3.5: Instructions are executed based on disposition bit (LSB in disposition word) */ + /* * ETM4 : ETM4 everything up to a branch is executed...decision about that branch is based on disposition bit */ + /* * MTB : Everything except jumps are executed, jumps are executed only if they are the last instruction in a run */ + bool insExecuted = ( + /* ETM3.5 case - dependent on disposition */ + ( ( !linearRun ) && ( Mortrall::r->i.protocol == TRACE_PROT_ETM35 ) && ( disposition & 1 ) ) || + + /* ETM4 case - either not a branch or disposition is 1 */ + ( ( !linearRun ) && ( Mortrall::r->i.protocol == TRACE_PROT_ETM4 ) && ( ( !( ic & LE_IC_JUMP ) ) || ( disposition & 1 ) ) ) || + + /* MTB case - a linear run to last address */ + ( ( linearRun ) && ( Mortrall::r->i.protocol == TRACE_PROT_MTB ) && + ( ( ( Mortrall::r->op.workingAddr != targetAddr ) && ( ! ( ic & LE_IC_JUMP ) ) ) || + ( Mortrall::r->op.workingAddr == targetAddr ) + ) ) ); + _appendToOPBuffer( Mortrall::r, l, Mortrall::r->op.currentLine, insExecuted ? LT_ASSEMBLY : LT_NASSEMBLY, a ); + /* Count instructions fot later interpolating between cycle count packets*/ + if(insExecuted) + { + Mortrall::r->instruction_count++; + } + /* Move addressing along */ + if ( ( Mortrall::r->i.protocol != TRACE_PROT_ETM4 ) || ( ic & LE_IC_JUMP ) || (ic & LE_IC_SYNC_BARRIER) ) + { + if ( Mortrall::r->i.protocol == TRACE_PROT_ETM4 ) + { + _traceReport( V_DEBUG, "Consumed, %sexecuted (%d left)", insExecuted ? "" : "not ", incAddr - 1 ); + } + + disposition >>= 1; + incAddr--; + } + + if ( ic & LE_IC_CALL ) + { + if ( insExecuted ) + { + /* Push the instruction after this if it's a subroutine or ISR */ + _addRetToStack( Mortrall::r, Mortrall::r->op.workingAddr + ( ( ic & LE_IC_4BYTE ) ? 4 : 2 )); + /* Add the called function on top of stack */ + _addTopToStack(Mortrall::r,newaddr); + /* Print the callStack for debugging */ + _traceReport( V_DEBUG, "Call to %08x", newaddr ); + _stackReport(Mortrall::r); + } + + Mortrall::r->op.workingAddr = insExecuted ? newaddr : Mortrall::r->op.workingAddr + ( ( ic & LE_IC_4BYTE ) ? 4 : 2 ); + } + else if ( ic & LE_IC_JUMP ) + { + _traceReport( V_DEBUG, "%sTAKEN JUMP", insExecuted ? "" : "NOT " ); + + if ( insExecuted ) + { + /* Update working address according to if jump was taken */ + if ( ic & LE_IC_IMMEDIATE ) + { + /* This is a jump, so we need to use the target address */ + _traceReport( V_DEBUG, "Immediate address %8x", newaddr ); + Mortrall::r->op.workingAddr = newaddr; + } + else + { + if(!_handleExceptionExit(func)) + { + /* We didn't get the address, so need to park the call stack address if we've got one. Either we won't */ + /* get an address (in which case this one was correct), or we wont (in which case, don't unstack this one). */ + if ( Mortrall::r->callStack->stackDepth ) + { + Mortrall::r->op.workingAddr= Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth - 1]; + _traceReport( V_DEBUG, "Return with stacked candidate to %08x", Mortrall::r->op.workingAddr ); + } + else + { + _traceReport( V_DEBUG, "Return with no stacked candidate" ); + } + Mortrall::r->committed = false; + Mortrall::r->resentStackDel = true; + _removeRetFromStack(Mortrall::r); + } + } + _stackReport(Mortrall::r); + } + else + { + /* The branch wasn't taken, so just move along */ + Mortrall::r->op.workingAddr += ( ic & LE_IC_4BYTE ) ? 4 : 2; + } + } + else if (ic & LE_IC_SYNC_BARRIER) + { + /* This is a sync barrier, it has its own Atom packet like a jump instruction */ + _traceReport( V_DEBUG, "Sync Barrier. \n"); + Mortrall::r->op.workingAddr += ( ic & LE_IC_4BYTE ) ? 4 : 2; + } + else + { + /* Just a regular instruction, so just move along */ + Mortrall::r->op.workingAddr += ( ic & LE_IC_4BYTE ) ? 4 : 2; + } + } + else + { + /* If it is the first time assembly is not found switch elf file because we exceeded the address range of bootloader elf*/ + /* Check if trace is still executing bootloader */ + if (r->bootloader) + { + /* Switch to default elf file if the new address is available */ + if (symbolDisassembleLine( r->_s, &ic, Mortrall::r->op.workingAddr, &newaddr)) + { + /* switch symbols (elf File) */ + r->s = r->_s; + /* Start with main_thread by default */ + Mortrall::tid = 0; + /* Set the callStack to the main_thread */ + r->callStack = &callstacks[Mortrall::tid]; + Mortrall::activeCallStackThread = PID_CALLSTACK + Mortrall::tid; + /* Add the first function to the top of the stack */ + _addTopToStack(Mortrall::r,cpu->addr); + /* Update the ProtoBuf entries */ + _generate_protobuf_entries_single(cpu->addr); + /* reset bootloader flag */ + r->bootloader = false; + /* switch symbols for itm trace as well */ + Mortrall::switch_itm_symbols(); + /* Print Debug Information */ + _traceReport( V_DEBUG, "*** BOOTLOADER FINISHED *** \n"); + } + }else + { + _appendToOPBuffer( Mortrall::r, l, Mortrall::r->op.currentLine, LT_ASSEMBLY, "%8x:\tASSEMBLY NOT FOUND" EOL, Mortrall::r->op.workingAddr ); + Mortrall::r->op.workingAddr += 2; + disposition >>= 1; + incAddr--; + } + } + } + } + +//--------------------------------------------------------------------------------------// +//--------------------------------- END REGION Callback --------------------------------// +//--------------------------------------------------------------------------------------// + +//--------------------------------------------------------------------------------------// +//---------------------------------- BEGIN REGION Init ---------------------------------// +//--------------------------------------------------------------------------------------// + + static void inline _init() + { + /* Init ETM4 Decoder */ + TRACEprotocol trp = TRACE_PROT_ETM4; + Mortrall::Mortrall::r->protocol = trp; + TRACEDecoderInit( &Mortrall::Mortrall::r->i, trp, true, _traceReport ); + /* Init Debug counters */ + r->i.cpu.ASyncs = 0; + r->i.cpu.overflows = 0; + } + static void inline _init_protobuf(auto *process_tree) + { + { + auto *process = process_tree->add_processes(); + process->set_pid(PID_CALLSTACK); + process->add_cmdline("CallStack"); + // loop over callstacks + for (auto const& [key, val] : Mortrall::callstacks) + { + auto *thread = process_tree->add_threads(); + thread->set_tid(PID_CALLSTACK + key); + thread->set_tgid(PID_CALLSTACK); + thread->set_name("Thread"); + } + // Add additional thread for bootloader + auto *thread = process_tree->add_threads(); + thread->set_tid(PID_BOOTLOADER); + thread->set_tgid(PID_CALLSTACK); + thread->set_name("Bootloader"); + } + { + auto *process = process_tree->add_processes(); + process->set_pid(PID_EXCEPTION); + process->add_cmdline("EXCEPTIONS"); + for (auto&& [tid, name] : exception_names) + { + auto *thread = process_tree->add_threads(); + thread->set_tid(PID_EXCEPTION + tid); + thread->set_tgid(PID_EXCEPTION); + thread->set_name(name); + } + } + { + auto *process = process_tree->add_processes(); + process->set_pid(PID_PC); + process->add_cmdline("PC"); + auto *thread = process_tree->add_threads(); + thread->set_tid(PID_PC); + thread->set_tgid(PID_PC); + thread->set_name("PC"); + } + } + +//--------------------------------------------------------------------------------------// +//----------------------------------- END REGION Init ----------------------------------// +//--------------------------------------------------------------------------------------// + +//--------------------------------------------------------------------------------------// +//-------------------------------- BEGIN REGION Perfetto -------------------------------// +//--------------------------------------------------------------------------------------// + + static void inline _add_pc(uint32_t addr) + { + r1.add(addr); + } + + static void inline _appendTOProtoBuffer(auto *event, int offset = 0) + { + /* As the instruction count interpolation cannot be applied before the next cycle count is received store the event in a buffer */ + csb.proto_buffer[csb.proto_buffer_index] = event; + csb.instruction_counts[csb.proto_buffer_index] = Mortrall::r->instruction_count + offset; + if (Mortrall::r->i.cpu.cycleCount == COUNT_UNKNOWN && Mortrall::r->callStack->perfettoStackDepth == -1){ + /* The first packet sometimes still has an unknown cycle count however it is actually 0 */ + csb.global_interpolations[csb.proto_buffer_index] = 0; + }else + { + csb.global_interpolations[csb.proto_buffer_index] = Mortrall::r->i.cpu.cycleCount; + } + /* Increment Index */ + csb.proto_buffer_index++; + /* Update perfetto stack depth by a maximum of 1 */ + if (Mortrall::r->callStack->stackDepth < Mortrall::r->callStack->perfettoStackDepth){ + Mortrall::r->callStack->perfettoStackDepth--; + }else if (Mortrall::r->callStack->stackDepth > Mortrall::r->callStack->perfettoStackDepth){ + Mortrall::r->callStack->perfettoStackDepth++; + } + /* Check for buffer overflow */ + if (csb.proto_buffer_index == MAX_BUFFER_SIZE) + { + Mortrall::_flush_proto_buffer(); + } + } + + static void inline _handleInconsistentFunctionSwitch(struct symbolFunctionStore *next_func, uint32_t addr) + { + /* + This function inserts a stop and start element into the protobuf trace to compensate for function switches + within the same level of the call stack. This should not happen with a perfect instruction trace but seems + unavoidable with the current trace data. + */ + if(Mortrall::r->callStack->stackDepth >= 0) + { + /* First end current function and append to proto buffer */ + auto *event = ftrace->add_event(); // create Ftrace event + auto *print = event->mutable_print(); + char buffer[80]; + event->set_pid(Mortrall::activeCallStackThread); // set the pid of the event + snprintf(buffer, sizeof(buffer), "E|0"); + print->set_buf(buffer); + _appendTOProtoBuffer(event); + + /* Second begin current function and append to proto buffer */ + event = ftrace->add_event(); // create Ftrace event + print = event->mutable_print(); // add print + event->set_pid(Mortrall::activeCallStackThread); // set the pid of the event + snprintf(buffer, sizeof(buffer), "B|0|%s", next_func->funcname); + print->set_buf(buffer); + _appendTOProtoBuffer(event,1); // Use an offset of 1 to compensate for the missing instruction count + } + } + + static bool inline _inconsistentFunctionSwitch(uint32_t addr) + { + /* Check whether a function switch on the same stack depth happend */ + if((int)Mortrall::r->callStack->stackDepth == Mortrall::r->callStack->perfettoStackDepth){ + struct symbolFunctionStore *next_func = symbolFunctionAt( Mortrall::r->s, addr ); + if(top_thread_func && next_func && strcmp(next_func->funcname, top_thread_func->funcname)) + { + _traceReport( V_DEBUG, "Inconsistent function switch detected between functions: %s and %s\n", next_func->funcname, top_thread_func->funcname); + /* Switch the functions in perfetto trace */ + _handleInconsistentFunctionSwitch(next_func,addr); + /* Update top thread function after handling the inconsistency*/ + Mortrall::top_thread_func = symbolFunctionAt( Mortrall::r->s, Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth] ); + } + return true; + } + return false; + } + + static void inline _generate_protobuf_entries_single(uint32_t addr) + { + /* Check whether the stack hight changed and if yes if the changes are commited*/ + if(!_inconsistentFunctionSwitch(addr) && Mortrall::r->committed) + { + /* Create Ftrace event */ + auto *event = ftrace->add_event(); + auto *print = event->mutable_print(); + char buffer[80]; + /* Get the function at the current address */ + Mortrall::top_thread_func = symbolFunctionAt( Mortrall::r->s, Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth] ); + /* Check if the call stack reduced or increased */ + if(((int)Mortrall::r->callStack->stackDepth > Mortrall::r->callStack->perfettoStackDepth)) + { + /* set the pid of the event */ + event->set_pid(Mortrall::activeCallStackThread); + if (top_thread_func) + { + snprintf(buffer, sizeof(buffer), "B|0|%s", Mortrall::top_thread_func->funcname); + }else + { + snprintf(buffer, sizeof(buffer), "B|0|0x%08x", Mortrall::r->op.workingAddr); + } + } + else if(((int)Mortrall::r->callStack->stackDepth < Mortrall::r->callStack->perfettoStackDepth)) + { + /* set the pid of the event */ + event->set_pid(Mortrall::activeCallStackThread); + snprintf(buffer, sizeof(buffer), "E|0"); + } + print->set_buf(buffer); + /* append to buffer to set the timestamp on the next cycle count packet */ + _appendTOProtoBuffer(event); + } + } + + static void inline _generate_protobuf_cycle_counts() + { + /* Create Ftrace print event for cycle count packets */ + /* This is mainly for debugging and not needed for correct displaying of the callstack */ + auto *event = ftrace->add_event(); + uint64_t ns = (uint64_t)(((Mortrall::r->i.cpu.cycleCount * 1'000'000'000)/ Mortrall::cps)-1); + event->set_timestamp(ns); + event->set_pid(activeCallStackThread); + auto *print = event->mutable_print(); + char buffer[40]; + snprintf(buffer, sizeof(buffer), "I|0|CC: %llu",Mortrall::r->i.cpu.cycleCount); + print->set_buf(buffer); + } + + static double inline _get_ic_percentage(int i) + { + /* Compute the relative times between two cycle count packets depending on instruction count*/ + uint16_t ic = csb.instruction_counts[i]; + double ret = 0; + if (ic != 0) + { + ret = ((double)ic/(double)Mortrall::r->instruction_count) * (Mortrall::r->i.cpu.cycleCount - csb.lastCycleCount); + } + return ret; + } + + static inline uint64_t perf_prev_ns = 0; + static void inline _flush_proto_buffer() + { + /* Clear buffer and place all trace packets on their relative timestamp */ + for (int i = 0; i < csb.proto_buffer_index; i++) + { + auto *event = csb.proto_buffer[i]; + uint64_t interpolation = csb.global_interpolations[i] + (uint64_t)_get_ic_percentage(i); + uint64_t ns = (uint64_t)((interpolation * 1'000'000'000) / Mortrall::cps); + /* There cannot be two events at the same timestamp, therefore check if ns is smaller than previous ns */ + /* Note: this should not be necessary with a perfect instruction trace however */ + if(perf_prev_ns >= ns){ + ns = perf_prev_ns + 1; + } + perf_prev_ns = ns; + event->set_timestamp(ns); + } + /* Reset index after flushing */ + csb.proto_buffer_index = 0; + csb.lastCycleCount = Mortrall::r->i.cpu.cycleCount; + } + +//--------------------------------------------------------------------------------------// +//--------------------------------- END REGION Perfetto --------------------------------// +//--------------------------------------------------------------------------------------// + +//--------------------------------------------------------------------------------------// +//-------------------------------- BEGIN REGION CallStack ------------------------------// +//--------------------------------------------------------------------------------------// + + static void inline _handleExceptionEntry() + { + /* Handle the context switch after a exception */ + if (Mortrall::r->exceptionEntry) + { + Mortrall::_generate_protobuf_cycle_counts(); + /* Check if exception Id is in the map */ + if(!Mortrall::exception_names.contains(Mortrall::r->exceptionId)) + { + Mortrall::exception_names[Mortrall::r->exceptionId] = TRACEExceptionName(Mortrall::r->exceptionId); + } + /* Flush remaining perfetto events before switching context */ + _generate_protobuf_entries_single(Mortrall::r->op.workingAddr); + _flush_proto_buffer(); + _traceReport( V_DEBUG, "*** THREAD SWITCH *** (to exception: %u)" , Mortrall::r->exceptionId); + /* Switch current call stack to exception call stack */ + Mortrall::r->callStack = &r->exceptionCallStack; + Mortrall::activeCallStackThread = Mortrall::PID_EXCEPTION + Mortrall::r->exceptionId; + /* Set exception flag */ + r->exceptionActive = true; + } + } + + static bool inline _handleExceptionExit(struct symbolFunctionStore *func) + { + /* As there is no exception exit packet the exit has to be detected by ending of the "arm_exception" function */ + /* If highaddr is reached its the end of exception (highaddr might be offset by a 1/2 byte) */ + if(r->exceptionActive && func && strstr(func->funcname,"arm_exception") && (Mortrall::r->op.workingAddr >= (func->highaddr - 0x1f))&& (Mortrall::r->op.workingAddr <= (func->highaddr + 0xf))) + { + Mortrall::_generate_protobuf_cycle_counts(); + /* Clear the exception call stack */ + _removeRetFromStack(Mortrall::r); + _generate_protobuf_entries_single(Mortrall::r->op.workingAddr); + _flush_proto_buffer(); + _stackReport(Mortrall::r); + /* Check if the current elf file is still the bootloader */ + if(r->bootloader){ + /* Switch to bootloader */ + _traceReport( V_DEBUG, "*** THREAD SWITCH *** (to tid: %u)" , Mortrall::PID_BOOTLOADER); + /* set the current call stack in runtime */ + Mortrall::r->callStack = &r->bootloaderCallStack; + Mortrall::activeCallStackThread = Mortrall::PID_BOOTLOADER; + /* Reset pending thread switch flag */ + Mortrall::pending_thread_switch = false; + /* Set stack switch flag */ + r->resentStackSwitch = true; + /* Reset exception flag */ + r->exceptionActive = false; + /* Debug print Callstack */ + _stackReport(Mortrall::r); + return true; + } + /* Switch to new call stack */ + /* Check if we switched threads or are just returning from the exception to the old callStack */ + if (Mortrall::pending_thread_switch){ + Mortrall::tid = Mortrall::pending_tid; + } + _traceReport( V_DEBUG, "*** THREAD SWITCH *** (to tid: %u)" , Mortrall::tid); + /* set the current call stack in runtime */ + Mortrall::r->callStack = &Mortrall::callstacks[Mortrall::tid]; + Mortrall::activeCallStackThread = Mortrall::PID_CALLSTACK + Mortrall::tid; + /* Reset pending thread switch flag */ + Mortrall::pending_thread_switch = false; + /* Set stack switch flag */ + r->resentStackSwitch = true; + /* Reset exception flag */ + r->exceptionActive = false; + /* Debug print Callstack */ + _stackReport(Mortrall::r); + return true; + } + return false; + } + + + static void inline _revertStackDel(bool revertStack, bool inconsistent) + { + /* + There are multiple reasons why we would want to revert a stack deletion: + 1. There has been a jump instruction without immediate address this means we have taken a jump but are not sure where we jumped until the next addr packet + 2. There has been a exception right after a jump instruction this means it is not clear if the jump will be executed or the exception interrupted + */ + if ( Mortrall::r->resentStackDel && (revertStack || (inconsistent && !Mortrall::r->exceptionEntry))) + { + _traceReport( V_DEBUG, "Stack delete reverted" ); + if ( Mortrall::r->callStack->stackDepth < MAX_CALL_STACK - 1 ) + { + Mortrall::r->callStack->stackDepth++; + } + } + } + + static void inline _catchInconsistencies(bool inconsistent, uint32_t addr) + { + /* Check if we the took the right branch after receiving an address packet.*/ + /* This should not be necessary with a perfect trace and should ideally be removed */ + if (inconsistent) + { + /* The return can be removed if and explicit trace has been recorded. (needs to be set in gdb.init)*/ + /* If removed makes it can make the decoding more robust but might create some inconsistencies and jumps in the callStack */ + return; + } + if(Mortrall::r->callStack->stackDepth > 0) + { + /* Loop over the callstack stack and check for duplicate entries */ + struct symbolFunctionStore *new_func = symbolFunctionAt( Mortrall::r->s, addr); + for (int i = (Mortrall::r->callStack->stackDepth - 1); i >= 0; i--) + { + /* Check if function name address is the same as the new address */ + struct symbolFunctionStore *current_func = symbolFunctionAt( Mortrall::r->s, Mortrall::r->callStack->stack[i]); + if(current_func != NULL && new_func != NULL && strcmp(current_func->funcname,new_func->funcname) == 0) + { + /* Precise Debug prints to be able to reconstruct the code logic */ + _traceReport( V_DEBUG, "Inconsistency has been caught and reverted at stack depth: %i", i); + _traceReport( V_DEBUG, "New Func: %s [%08x]" , new_func->funcname, addr); + _traceReport( V_DEBUG, "Current Func: %s [%08x]" , current_func->funcname, Mortrall::r->callStack->stack[i]); + for (int j = (Mortrall::r->callStack->stackDepth - 1); j >= i; j--) + { + /* Collapse stack until the duplicate function is reached*/ + _removeRetFromStack(Mortrall::r); + /* Commit the changes */ + Mortrall::r->committed = true; + /* Update protobuf trace*/ + _generate_protobuf_entries_single(addr); + } + _stackReport(Mortrall::r); + } + } + } + } + + static void inline displayProgressBar(float progress) { + /* Basic c++ progress bar implementation */ + std::cout << "["; + int pos = 50 * progress; + for (int i = 0; i < 50; ++i) { + if (i < pos) std::cout << "="; + else if (i == pos) std::cout << ">"; + else std::cout << " "; + } + std::cout << "] " << std::fixed << std::setprecision(2) << (progress * 100.0) << " %\r"; + std::cout.flush(); + } + + static void inline _detect_thread_switch_pattern(struct symbolFunctionStore *func) + { + /* Detect if NuttX is about to switch context */ + /* Hint: This would need to be changed for work for other RTOS */ + if(!Mortrall::pending_thread_switch && func && (strcmp(func->funcname,"sched_note_resume") == 0)) + { + int remaining_switches = thread_switches.size(); + if (remaining_switches == 0) + { + /* If there are no more thread switches left return. */ + /* If this happens decoding was not correct */ + _traceReport( V_DEBUG, "No more thread switches." ); + return; + } + /* Switch to next thread id computed by prePumping the trace file */ + Mortrall::pending_tid = Mortrall::thread_switches.front(); + Mortrall::thread_switches.pop_front(); + /* Set pending thread switch flag */ + Mortrall::pending_thread_switch = true; + /* Debug print thread switch */ + _traceReport( V_DEBUG, "Thread switch pattern detected with pending tid: %u" , Mortrall::pending_tid); + /* Display the progress bar depending on how many context switches are left */ + displayProgressBar((float)(thread_switches_size-thread_switches.size()) / (float)thread_switches_size); + } + /* Set commited flag */ + Mortrall::r->committed = true; + } + + static void inline _addRetToStack( RunTime *r, symbolMemaddr p , int num = 0) + { + /* Check if Stack is full */ + if ( Mortrall::r->callStack->stackDepth == MAX_CALL_STACK - 1 ) + { + /* Make room for a new entry */ + memmove( &Mortrall::r->callStack->stack[0], &Mortrall::r->callStack->stack[1], sizeof( symbolMemaddr ) * ( MAX_CALL_STACK - 1 ) ); + } + + /* Add address to stack */ + Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth] = p; + /* Debug print */ + _traceReport( V_DEBUG, "Pushed %08x to return stack", Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth]); + /* Increment stack depth */ + if ( Mortrall::r->callStack->stackDepth < MAX_CALL_STACK - 1 ) + { + Mortrall::r->callStack->stackDepth++; + } + } + static void inline _removeRetFromStack(RunTime *r) + { + /* Decrease callStack */ + /* If Stack depth is '-1' the stack is uninitialized */ + /* This needs to be the case for every exception callStack after it is exited */ + if ( Mortrall::r->callStack->stackDepth >= 0 ) + { + Mortrall::r->callStack->stackDepth--; + _traceReport( V_DEBUG, "Popped %08x from return stack", Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth]); + } + } + static void inline _addTopToStack(RunTime *r,symbolMemaddr p) + { + /* If the stack is uninitialized set the stack depth to 0 */ + if ( Mortrall::r->callStack->stackDepth == -1) + { + Mortrall::r->callStack->stackDepth = 0; + } + /* Just update the top address without changing the stack depth*/ + if ( Mortrall::r->callStack->stackDepth < MAX_CALL_STACK - 1 ) + { + Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth] = p; + } + } + +//--------------------------------------------------------------------------------------// +//--------------------------------- END REGION CallStack -------------------------------// +//--------------------------------------------------------------------------------------// + + +//--------------------------------------------------------------------------------------// +//-------------------------------- BEGIN REGION Report ---------------------------------// +//--------------------------------------------------------------------------------------// + static inline bool debug_flag = false; + static void inline _appendToOPBuffer( struct RunTime *r, void *dat, int32_t lineno, enum LineType lt, const char *fmt, ... ) + /* Add line to output buffer, in a printf stylee */ + { + /* Only append if verbose is actually wanted to save execution time */ + if ( Mortrall::verbose != V_DEBUG ) + { + return; + } + char construct[SCRATCH_STRING_LEN]; + va_list va; + char *p; + + va_start( va , fmt ); + vsnprintf( construct, SCRATCH_STRING_LEN, fmt, va ); + va_end( va ); + + /* Make sure we didn't accidentially admit a CR or LF */ + for ( p = construct; ( ( *p ) && ( *p != '\n' ) && ( *p != '\r' ) ); p++ ); + + *p = 0; + + /* Relate the reporting to cycle count which can be set by an argument to get debug information for certain parts of the trace */ + if(Mortrall::r->i.cpu.cycleCount >= cycleCountThreshold) + { + genericsReport( V_DEBUG, "%s" EOL, construct ); + } + if(Mortrall::r->i.cpu.cycleCount == cycleCountThreshold && !debug_flag) + { + debug_flag = true; + genericsReport( V_INFO, "Debug Flag set." EOL ); + } + } + static void inline _traceReport( enum verbLevel l, const char *fmt, ... ) + /* Debug reporting stream */ + { + /* Only append if verbose is actually wanted to save execution time */ + if ( Mortrall::verbose != V_DEBUG ) + { + return; + } + static char op[SCRATCH_STRING_LEN]; + va_list va; + va_start( va, fmt ); + vsnprintf( op, SCRATCH_STRING_LEN, fmt, va ); + va_end( va ); + + /* Relate the reporting to cycle count which can be set by an argument to get debug information for certain parts of the trace */ + if(Mortrall::r->i.cpu.cycleCount >= cycleCountThreshold) + { + genericsReport( V_DEBUG, "%s" EOL, op ); + } + } + static void inline _stackReport(RunTime *r) + { + /* Only append if verbose is actually wanted to save execution time */ + if ( Mortrall::verbose != V_DEBUG ) + { + return; + } + /* Generate String which has a readable representation of the callStack */ + char CallStack[STACK_BUFFER_SIZE] = ""; + if ( Mortrall::r->callStack->stackDepth == 0 ) + { + Mortrall::strfcat(CallStack, "Stack %d is empty" EOL, Mortrall::tid); + if(Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth]) + { + Mortrall::strfcat(CallStack, "Stack %d: %08x" EOL, Mortrall::r->callStack->stackDepth, Mortrall::r->callStack->stack[Mortrall::r->callStack->stackDepth]); + } + strcat(CallStack, EOL); + } + else + { + Mortrall::strfcat(CallStack, "Stack depth is %d with tid: %d" EOL, Mortrall::r->callStack->stackDepth, Mortrall::tid); + for ( int i = 0; i < Mortrall::r->callStack->stackDepth+1; i++ ) + { + struct symbolFunctionStore *running_func = symbolFunctionAt( Mortrall::r->s, Mortrall::r->callStack->stack[i] ); + if(running_func) + { + Mortrall::strfcat(CallStack, "Stack %d: %08x %s" EOL, i, Mortrall::r->callStack->stack[i], running_func->funcname); + } + else + { + Mortrall::strfcat(CallStack, "Stack %d: %08x" EOL, i, Mortrall::r->callStack->stack[i]); + } + } + strcat(CallStack, EOL); + } + /* Relate the reporting to cycle count which can be set by an argument to get debug information for certain parts of the trace */ + if(Mortrall::r->i.cpu.cycleCount >= cycleCountThreshold) + { + _traceReport( V_DEBUG, CallStack); + } + } + static void inline strfcat(char *str, const char *format, ...) + { + char buffer[STACK_BUFFER_SIZE]; + va_list args; + va_start(args, format); + vsnprintf(buffer,STACK_BUFFER_SIZE, format, args); + va_end(args); + strcat(str, buffer); + } + +//--------------------------------------------------------------------------------------// +//---------------------------------- END REGION Report ---------------------------------// +//--------------------------------------------------------------------------------------// + +//--------------------------------------------------------------------------------------// +//------------------------------------- BEGIN song -------------------------------------// +//--------------------------------------------------------------------------------------// + + /* This part is just for fun because I was stuck on with instruction tracing and Niklas was on vacation */ + /* and sadly no one else at Auterion can help me with that (GPT: why should they, no one has ever done that )*/ + static void inline _startSong() + { + /* Path to your audio file */ + const char *songPath = "/Users/lukasvonbriel/Music/Music/Media.localized/Music/Unknown Artist/Unknown Album/glass-of-wine-143532.mp3"; + + /* Command to play the song using afplay */ + char command[512]; + snprintf(command, sizeof(command), "osascript -e 'tell application \"Terminal\" to do script \"afplay \\\"%s\\\"\"'", songPath); + + /* Play the song */ + system(command); + } + + static void inline _endSong() + { + system("pkill afplay"); + } + +//--------------------------------------------------------------------------------------// +//------------------------------------- END song -------------------------------------// +//--------------------------------------------------------------------------------------// + +}; \ No newline at end of file diff --git a/ext/orbetto/src/orbetto.cpp b/ext/orbetto/src/orbetto.cpp index 6a22e19..537a143 100644 --- a/ext/orbetto/src/orbetto.cpp +++ b/ext/orbetto/src/orbetto.cpp @@ -30,6 +30,7 @@ using namespace std::string_literals; #include "stream.h" #include "loadelf.h" #include "device.hpp" +#include "mortrall.hpp" // To get the ITM channel list #include "../../src/emdbg/patch/data/itm.h" @@ -45,9 +46,13 @@ struct bool useTPIU{false}; uint32_t tpiuChannel{1}; uint64_t cps{0}; + uint64_t dbg_cc{0}; std::string file; std::string elfFile; + std::string elfBootloaderFile; bool outputDebugFile; + enum verbLevel verbose; + bool etm{false}; } options; struct @@ -60,13 +65,17 @@ struct struct TPIUPacket p; uint64_t timeStamp; /* Latest received time */ uint64_t ns; /* Latest received time in ns */ - struct symbol *symbols; /* symbols from the elf file */ + struct symbol *symbols; /* active symbols */ + struct symbol *symbols_main; /* symbols from the main elf file */ + struct symbol *symbols_bootloader; /* symbols from the bootloader elf file */ } _r; static Device device; static perfetto::protos::Trace *perfetto_trace; static perfetto::protos::FtraceEventBundle *ftrace; +Mortrall mortrall; + // ==================================================================================================== static constexpr uint32_t PID_TSK{0}; @@ -275,6 +284,16 @@ static void _handleSW( struct swMsg *m, struct ITMDecoder *i ) const uint8_t priority = m->value >> 16; const uint8_t prev_state = m->value >> 24; _switchTo(tid, true, priority, prev_state); + if(prev_tid < PID_STOP && prev_tid != 0) + { + auto *event = ftrace->add_event(); + event->set_timestamp(ns); + event->set_pid(PID_TSK + prev_tid); + auto *print = event->mutable_print(); + char buffer[100]; + snprintf(buffer, sizeof(buffer), "C|%u|Priorities %s|%u",PID_TSK,thread_names[prev_tid].c_str() ,priority); + print->set_buf(buffer); + } break; } case EMDBG_TASK_RUNNABLE: // ready @@ -310,7 +329,7 @@ static void _handleSW( struct swMsg *m, struct ITMDecoder *i ) { if (const char *name = (const char *) symbolCodeAt(_r.symbols, m->value, NULL); name) { - printf("Found Name %s for 0x%08x\n", name, m->value); + //printf("Found Name %s for 0x%08x\n", name, m->value); workqueue_names[m->value] = name; } else { @@ -624,12 +643,28 @@ static void _handleSW( struct swMsg *m, struct ITMDecoder *i ) } break; } + default: + { + // debug print the unknown source address + genericsReport( V_DEBUG, "Unknown SW message from %u\n", m->srcAddr ); + break; + } } } // ==================================================================================================== static void _handleTS( struct TSMsg *m, struct ITMDecoder *i ) { - _r.timeStamp += m->timeInc; + // only use if instruction trace data is not available + if( !options.etm ) + { + _r.timeStamp += m->timeInc; + _r.ns = (_r.timeStamp * 1e9) / options.cps; + } +} + +static void _handleTSFromETM(uint64_t cc) +{ + _r.timeStamp = cc; _r.ns = (_r.timeStamp * 1e9) / options.cps; } @@ -759,6 +794,11 @@ static void _itmPumpProcessPre( char c ) stopped_threads.insert(m->value); // printf("Thread %u stopped\n", m->value); } + // List all thread switches to construct call graph from instruction (etm) data later + if(m->srcAddr== EMDBG_TASK_RESUME) + { + mortrall.add_thread_switch(m->value & 0xfffful); + } } else if (p.genericMsg.msgtype == MSG_PC_SAMPLE) { @@ -806,6 +846,7 @@ static void _itmPumpProcess( char c ) if ( h[pp->genericMsg.msgtype] ) { + ( h[pp->genericMsg.msgtype] )( pp, &_r.i ); } } @@ -817,11 +858,11 @@ static void _itmPumpProcess( char c ) // ==================================================================================================== // ==================================================================================================== // ==================================================================================================== -static void _protocolPump( uint8_t c ) +static void _protocolPump( uint8_t c ,void ( *_pumpITMProcessGeneric )( char ),void ( *_pumpETMProcessGeneric )( char )) { if ( options.useTPIU ) { - switch ( TPIUPump( &_r.t, c ) ) + switch ( TPIUPump( &_r.t, c) ) { case TPIU_EV_NEWSYNC: case TPIU_EV_SYNCED: @@ -844,15 +885,23 @@ static void _protocolPump( uint8_t c ) for ( uint32_t g = 0; g < _r.p.len; g++ ) { - if ( _r.p.packet[g].s == options.tpiuChannel ) + if ( _r.p.packet[g].s == 2 ) { - _itmPumpProcess( _r.p.packet[g].d ); + if ( _pumpETMProcessGeneric ) + { + _pumpETMProcessGeneric( _r.p.packet[g].d ); + }else{ + options.etm = true; + } continue; } - - if ( _r.p.packet[g].s != 0 ) + else if ( _r.p.packet[g].s == 1 ) { - genericsReport( V_DEBUG, "Unknown TPIU channel %02x" EOL, _r.p.packet[g].s ); + _pumpITMProcessGeneric( (char)_r.p.packet[g].d ); + } + else + { + printf("Unknown TPIU channel %02x" EOL, _r.p.packet[g].s ); } } @@ -861,22 +910,34 @@ static void _protocolPump( uint8_t c ) case TPIU_EV_ERROR: genericsReport( V_WARN, "****ERROR****" EOL ); break; + default: + break; } } else { - _itmPumpProcess( c ); + _pumpITMProcessGeneric( c ); } } + +// ==================================================================================================== + +static void _switchSymbols() +{ + _r.symbols = _r.symbols_main; +} + // ==================================================================================================== static struct option _longOptions[] = { + {"cc", required_argument, NULL, 'a'}, {"cpufreq", required_argument, NULL, 'C'}, {"input-file", required_argument, NULL, 'f'}, {"help", no_argument, NULL, 'h'}, {"tpiu", required_argument, NULL, 't'}, {"elf", required_argument, NULL, 'e'}, - {"debug", no_argument, NULL, 'd'}, + {"elf_bootloader", required_argument, NULL, 'b'}, + {"debug", required_argument, NULL, 'd'}, {"verbose", required_argument, NULL, 'v'}, {"version", no_argument, NULL, 'V'}, {NULL, no_argument, NULL, 0} @@ -884,7 +945,7 @@ static struct option _longOptions[] = bool _processOptions( int argc, char *argv[] ) { int c, optionIndex = 0; - while ( ( c = getopt_long ( argc, argv, "C:Ef:de:hVt:v:", _longOptions, &optionIndex ) ) != -1 ) + while ( ( c = getopt_long ( argc, argv, "a:b:C:Ef:de:hVt:v:", _longOptions, &optionIndex ) ) != -1 ) switch ( c ) { // ------------------------------------ @@ -895,14 +956,15 @@ bool _processOptions( int argc, char *argv[] ) // ------------------------------------ case 'h': fprintf( stdout, "Usage: %s [options]" EOL, argv[0] ); - fprintf( stdout, " -C, --cpufreq: (Scaled) speed of the CPU" EOL ); - fprintf( stdout, " -f, --input-file: Take input from specified file" EOL ); - fprintf( stdout, " -h, --help: This help" EOL ); - fprintf( stdout, " -e, --elf: : Use this ELF file for information" EOL ); - fprintf( stdout, " -d, --debug: Output a human-readable protobuf file" EOL ); - fprintf( stdout, " -t, --tpiu: : Use TPIU decoder on specified channel (normally 1)" EOL ); - fprintf( stdout, " -v, --verbose: Verbose mode 0(errors)..3(debug)" EOL ); - fprintf( stdout, " -V, --version: Print version and exit" EOL ); + fprintf( stdout, " -C, --cpufreq: (Scaled) speed of the CPU" EOL ); + fprintf( stdout, " -f, --input-file: Take input from specified file" EOL ); + fprintf( stdout, " -h, --help: This help" EOL ); + fprintf( stdout, " -e, --elf: : Use this ELF file for information" EOL ); + fprintf( stdout, " -eb, --elf_bootloader: : Use this ELF file for bootloader information" EOL ); + fprintf( stdout, " -d, --debug: Output a human-readable protobuf file" EOL ); + fprintf( stdout, " -t, --tpiu: : Use TPIU decoder on specified channel (normally 1)" EOL ); + fprintf( stdout, " -v, --verbose: Verbose mode 0(errors)..3(debug)" EOL ); + fprintf( stdout, " -V, --version: Print version and exit" EOL ); return false; // ------------------------------------ @@ -915,6 +977,10 @@ bool _processOptions( int argc, char *argv[] ) options.outputDebugFile = true; break; + case 'a': + options.dbg_cc = atoi( optarg ); + break; + // ------------------------------------ case 'f': options.file = optarg; @@ -925,6 +991,10 @@ bool _processOptions( int argc, char *argv[] ) options.elfFile = optarg; break; + case 'b': + options.elfBootloaderFile = optarg; + break; + // ------------------------------------ case 't': options.useTPIU = true; @@ -940,6 +1010,7 @@ bool _processOptions( int argc, char *argv[] ) } genericsSetReportLevel( (enum verbLevel)atoi( optarg ) ); + options.verbose = (enum verbLevel) atoi( optarg ); break; // ------------------------------------ @@ -1012,6 +1083,7 @@ int main(int argc, char *argv[]) ftrace->set_cpu(0); struct Stream *stream = streamCreateFile( options.file.c_str() ); + genericsReport( V_INFO, "PreProcess Stream" EOL ); while ( true ) { size_t receivedSize; @@ -1024,23 +1096,42 @@ int main(int argc, char *argv[]) if (result == RECEIVE_RESULT_EOF or result == RECEIVE_RESULT_ERROR) break; unsigned char *c = cbw; - while (receivedSize--) _itmPumpProcessPre(*c++); + while (receivedSize--) _protocolPump(*c++,_itmPumpProcessPre,NULL); //_itmPumpProcessPre(*c++); fflush(stdout); } stream->close(stream); free(stream); printf("Loading ELF file %s with%s source lines\n", options.elfFile.c_str(), has_pc_samples ? "" : "out"); - _r.symbols = symbolAcquire((char*)options.elfFile.c_str(), true, has_pc_samples); - assert( _r.symbols ); - printf("Loaded ELF with %u sections:\n", _r.symbols->nsect_mem); - for (int ii = 0; ii < _r.symbols->nsect_mem; ii++) + _r.symbols_main = symbolAcquire((char*)options.elfFile.c_str(), true, has_pc_samples); + assert( _r.symbols_main ); + printf("Loaded ELF with %u sections:\n", _r.symbols_main->nsect_mem); + for (int ii = 0; ii < _r.symbols_main->nsect_mem; ii++) { - auto mem = _r.symbols->mem[ii]; + auto mem = _r.symbols_main->mem[ii]; printf(" Section '%s': [0x%08lx, 0x%08lx] (%lu)\n", mem.name, mem.start, mem.start + mem.len, mem.len); } + if(options.elfBootloaderFile.size()) + { + printf("Loading ELF file %s for bootloader with%s source lines\n", options.elfBootloaderFile.c_str(), has_pc_samples ? "" : "out"); + _r.symbols_bootloader = symbolAcquire((char*)options.elfBootloaderFile.c_str(), true, has_pc_samples); + assert( _r.symbols_bootloader ); + printf("Loaded ELF with %u sections:\n", _r.symbols_bootloader->nsect_mem); + for (int ii = 0; ii < _r.symbols_bootloader->nsect_mem; ii++) + { + auto mem = _r.symbols_bootloader->mem[ii]; + printf(" Section '%s': [0x%08lx, 0x%08lx] (%lu)\n", mem.name, mem.start, mem.start + mem.len, mem.len); + } + _r.symbols = _r.symbols_bootloader; + }else{ + _r.symbols = _r.symbols_main; + } + + printf("Initialize Mortrall (Instruction tracing)\n"); + mortrall.init(perfetto_trace,ftrace,options.cps, options.verbose,_r.symbols_main,_r.symbols_bootloader,_handleTSFromETM,_switchSymbols,options.dbg_cc); stream = streamCreateFile( options.file.c_str() ); + genericsReport( V_INFO, "Process Stream" EOL ); while ( true ) { size_t receivedSize; @@ -1053,7 +1144,7 @@ int main(int argc, char *argv[]) if (result == RECEIVE_RESULT_EOF or result == RECEIVE_RESULT_ERROR) break; unsigned char *c = cbw; - while (receivedSize--) _protocolPump(*c++); + while (receivedSize--) _protocolPump(*c++,_itmPumpProcess,mortrall.dumpElement); fflush(stdout); } stream->close(stream); @@ -1095,6 +1186,9 @@ int main(int argc, char *argv[]) thread->set_tid(tid); thread->set_tgid(PID_TSK); } + auto *thread = process_tree->add_threads(); + thread->set_tid(100); + thread->set_tgid(PID_TSK); } { auto *process = process_tree->add_processes(); @@ -1181,6 +1275,7 @@ int main(int argc, char *argv[]) // thread->set_name(buffer); // } } + mortrall.finalize(process_tree); } diff --git a/ext/orbetto/subprojects/croaring.wrap b/ext/orbetto/subprojects/croaring.wrap new file mode 100644 index 0000000..8ae8682 --- /dev/null +++ b/ext/orbetto/subprojects/croaring.wrap @@ -0,0 +1,4 @@ +[wrap-git] +directory = CRoaring +url = https://github.com/RoaringBitmap/CRoaring.git +revision = ad487efbb48637445e6a5eee10c81cc9fdd7bf76 \ No newline at end of file diff --git a/ext/orbetto/subprojects/orbuculum.wrap b/ext/orbetto/subprojects/orbuculum.wrap index 364798a..81b4938 100644 --- a/ext/orbetto/subprojects/orbuculum.wrap +++ b/ext/orbetto/subprojects/orbuculum.wrap @@ -1,3 +1,3 @@ [wrap-git] url = https://github.com/niklaut/orbuculum.git -revision = 81b64da +revision = Mortrall diff --git a/src/emdbg/bench/data/fmu_v5x_orbtrace.cfg b/src/emdbg/bench/data/fmu_v5x_orbtrace.cfg index 7a0b694..4b1cb4f 100644 --- a/src/emdbg/bench/data/fmu_v5x_orbtrace.cfg +++ b/src/emdbg/bench/data/fmu_v5x_orbtrace.cfg @@ -1,5 +1,5 @@ source [find interface/cmsis-dap.cfg] -cmsis-dap vid_pid 0x1209 0x3443 +# cmsis-dap vid_pid 0x1209 0x3443 transport select swd source [find target/stm32f7x.cfg] diff --git a/src/emdbg/bench/data/fmu_v6x_orbtrace.cfg b/src/emdbg/bench/data/fmu_v6x_orbtrace.cfg index f6c3bc0..678c0e5 100644 --- a/src/emdbg/bench/data/fmu_v6x_orbtrace.cfg +++ b/src/emdbg/bench/data/fmu_v6x_orbtrace.cfg @@ -1,5 +1,5 @@ source [find interface/cmsis-dap.cfg] -cmsis-dap vid_pid 0x1209 0x3443 +#cmsis-dap vid_pid 0x1209 0x3443 transport select swd source [find target/stm32h7x_dual_bank.cfg] diff --git a/src/emdbg/debug/data/cortex_m.gdb b/src/emdbg/debug/data/cortex_m.gdb index 7a7a732..75a4d4f 100644 --- a/src/emdbg/debug/data/cortex_m.gdb +++ b/src/emdbg/debug/data/cortex_m.gdb @@ -196,12 +196,29 @@ define px4_enable_trace_stm32h7 set *($TPIUBASE+0x304) = 0x00000102 end +define px4_etm_trace_tpiu_swo_stm32f7 + px4_reset + + enableSTM32TRACE 4 3 1 + + px4_configure_orbuculum + + startETM + + # -o trace.swo dumps the RAW data, not the demuxed data!!! + shell killall orbuculum + shell orbuculum -O "-T4" & + shell sleep 1 + shell nc localhost 3443 > trace.swo & +end + + define px4_trace_tpiu_swo_stm32f7 px4_reset tbreak nx_start continue - enableSTM32TRACE 4 3 + enableSTM32TRACE 4 3 1 px4_configure_orbuculum @@ -212,6 +229,21 @@ define px4_trace_tpiu_swo_stm32f7 shell nc localhost 3443 > trace.swo & end +define px4_etm_trace_tpiu_swo_stm32h7 + px4_reset + + px4_enable_trace_stm32h7 4 + + px4_configure_orbuculum + + startETM + + # -o trace.swo dumps the RAW data, not the demuxed data!!! + shell killall orbuculum + shell orbuculum -O "-T4" & + shell sleep 1 + shell nc localhost 3443 > trace.swo & +end define px4_trace_tpiu_swo_stm32h7 px4_reset diff --git a/src/emdbg/debug/data/orbuculum.gdb b/src/emdbg/debug/data/orbuculum.gdb index 6c1823b..f586a4c 100644 --- a/src/emdbg/debug/data/orbuculum.gdb +++ b/src/emdbg/debug/data/orbuculum.gdb @@ -88,6 +88,7 @@ set $ITMBASE=0xE0000000 set $TPIUBASE=0xE0040000 set $ETMBASE=0xE0041000 + define _setAddressesSTM32 # Locations in the memory map for interesting things on STM32 set $CPU = $CPU_STM32 @@ -165,6 +166,7 @@ set $TRCVDARCCTLR=$ETM4BASE+0x0a8 set $TRCDEVARCH=$ETM4BASE+0xFBC # Trace IDR base address set $TRCIDR0=$ETM4BASE+0x1E0 +set $TRCIDR3=$ETM4BASE+0x1EC end # Trace Address comparator Value registers set $TRCACVR0=$ETMBASE+0x400 @@ -257,6 +259,59 @@ define _startETMv4 end end # ==================================================================== +define _startETMv4_modified + echo ETMv4 Tracing is enabled\n + # Enable the return stack, global timestamping, Context ID, and Virtual context identifier tracing. + # Disabled global timestamp to not interfere with cycle count + set *($TRCCONFIGR) = 0x000010C1 + + # Disable all event tracing. + set *($TRCEVENTCTL0R) = 0 + set *($TRCEVENTCTL1R) = 0 + + # Stalling is not possible on h7 and f7 + # Disable or enable stalling for instructions, if implemented + set *($TRCSTALLCTLR) = (1<<13)|(1<<8)|(0x0f<<0) + + # Fixed on STM32f7 + # Trace sync every 1024 bytes of trace + set *($TRCSYNCPR) = 0x0c + + # Do we want branch broadcasting? + set *$TRCACVR0=0 + set *$TRCACVR1=0xFFFFFFFF + set *$TRCACATR0=0 + set *$TRCACATR1=0 + set *($TRCCONFIGR) |= ($br_out<<3) + set *($TRCBBCTLR) = ($br_out<<8)|0x03 + + # enable cycle count + set *($TRCCONFIGR) |= 0x10 + set *($TRCCCCTLR) |= 0x10 + + # Trace on ID 2 + set *($TRCTRACEIDR) = 2 + + # Disable timestamp event (This does not do anything) + set *($TRCTSCTLR) = 0 + + # Enable ViewInst to trace everything + set *($TRCVICTLR) = 0x201 + + # No address range filtering for ViewInst + set *($TRCVIIECTLR) = 0 + + # No start or stop points for ViewInst + set *($TRCVISSCTLR) = 0 + + # ...and start + set *($TRCPRGCTLR) |= (1<<0) + + while (((*$TRCSTATR)&(1<<0))==1) + echo Wait for trace not idle\n + end +end +# ==================================================================== define _startETMv35 # Allow access to device @@ -329,8 +384,10 @@ define startETM if (((*$TRCDEVARCH)&0xfff0ffff) ==0x47704a13) - _startETMv4 + echo ETMv4 version active \n + _startETMv4_modified else + echo ETMv3 version active \n _startETMv35 end @@ -891,6 +948,7 @@ define enableSTM32TRACE set $bits=4 set $drive=1 + set $remap=0 if $argc >= 1 set $bits = $arg0 @@ -907,6 +965,10 @@ define enableSTM32TRACE help enableSTM32TRACE end + if $argc >= 3 + set $remap = $arg2 + end + set $bits = $bits-1 set $CPU=$CPU_STM32 @@ -923,15 +985,20 @@ define enableSTM32TRACE enableSTM32Pin 4 2 $drive enableSTM32Pin 4 3 $drive - if ($bits>0) + if ($bits>=1) # Setup PE4 enableSTM32Pin 4 4 $drive end - if ($bits>1) - # Setup PE5 & PC12 - enableSTM32Pin 4 5 $drive - enableSTM32Pin 2 12 $drive + if ($bits>=2) + # Setup PE5 & PC12 + enableSTM32Pin 4 5 $drive + #enableSTM32Pin 4 6 $drive + if ($remap) + enableSTM32Pin 2 12 $drive + else + enableSTM32Pin 4 6 $drive + end end # Set number of bits in DBGMCU_CR