scripts/trace: use ijson to parse the traces

Since the trace files can get very large (several GBs), parsing them
using python's json module might require an unfeasible amount of memory,
as it needs to load the whole file first.  The ijson [1] library
provides interfaces for parsing files iteratively, only loading a small
portion of a file at a time.

It requires the input JSON to have the tsc_rate and the definitions of
the tracepoints listed before the tracepoint entries.  It's not a big
deal, as this is the way `spdk_trace` generates it, but it's worth
noting, as passing that file through something like `jq -S` might make
it unreadable to the trace script.

[1] https://pypi.org/project/ijson

Signed-off-by: Konrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I03c0c3fb47091da615a3978b8d63edf4d876b811
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8275
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: Monica Kenguva <monica.kenguva@intel.com>
This commit is contained in:
Konrad Sztyber 2021-06-10 12:35:05 +02:00 committed by Tomasz Zawadzki
parent e61fbe91e7
commit 597688b2b1
4 changed files with 39 additions and 10 deletions

View File

@ -4,7 +4,7 @@ from argparse import ArgumentParser
from dataclasses import dataclass, field
from itertools import islice
from typing import Dict, List, TypeVar
import json
import ijson
import os
import re
import subprocess
@ -168,21 +168,40 @@ class TraceEntry:
class Trace:
"""Stores, parses, and prints out SPDK traces"""
def __init__(self, file):
self._json = json.load(file)
self._parser = ijson.parse(file)
self._objects = []
self._argfmt = {TracepointArgument.TYPE_PTR: lambda a: f'0x{a:x}'}
self.tpoints = {t.id: t for t in self._parse_tpoints()}
self.tsc_rate = self._json['tsc_rate']
self.tpoints = {}
self._parse_defs()
def _parse_tpoints(self):
for tpoint in self._json.get('tpoints', []):
yield Tracepoint(
name=tpoint['name'], id=tpoint['id'],
def _parse_tpoints(self, tpoints):
for tpoint in tpoints:
tpoint_id = tpoint['id']
self.tpoints[tpoint_id] = Tracepoint(
name=tpoint['name'], id=tpoint_id,
new_object=tpoint['new_object'],
args=[TracepointArgument(name=a['name'],
argtype=a['type'])
for a in tpoint.get('args', [])])
def _parse_defs(self):
builder = None
for prefix, event, value in self._parser:
# If we reach entries array, there are no more tracepoint definitions
if prefix == 'entries':
break
elif prefix == 'tsc_rate':
self.tsc_rate = value
continue
if (prefix, event) == ('tpoints', 'start_array'):
builder = ijson.ObjectBuilder()
if builder is not None:
builder.event(event, value)
if (prefix, event) == ('tpoints', 'end_array'):
self._parse_tpoints(builder.value)
builder = None
def _parse_entry(self, entry):
tpoint = self.tpoints[entry['tpoint']]
obj = entry.get('object', {})
@ -193,8 +212,15 @@ class Trace:
args={n.name: v for n, v in zip(tpoint.args, entry.get('args', []))})
def _entries(self):
for entry in self._json.get('entries', []):
yield self._parse_entry(entry)
builder = None
for prefix, event, value in self._parser:
if (prefix, event) == ('entries.item', 'start_map'):
builder = ijson.ObjectBuilder()
if builder is not None:
builder.event(event, value)
if (prefix, event) == ('entries.item', 'end_map'):
yield self._parse_entry(builder.value)
builder = None
def _annotate_args(self, entry):
annotations = {}

View File

@ -7,6 +7,7 @@ pacman -Sy --needed --noconfirm gcc make cmake cunit libaio openssl \
pacman -Sy --needed --noconfirm python-pexpect python-pip libffi
pip install configshell_fb
pip install pyelftools
pip install ijson
# Additional dependencies for DPDK
pacman -Sy --needed --noconfirm numactl nasm
# Additional dependencies for ISA-L used in compression

View File

@ -13,6 +13,7 @@ if ! pip3 install meson; then
apt-get install -y meson
fi
pip3 install pyelftools
pip3 install ijson
# Additional dependencies for SPDK CLI - not available on older Ubuntus
apt-get install -y python3-configshell-fb python3-pexpect || echo \
"Note: Some SPDK CLI dependencies could not be installed."

View File

@ -95,6 +95,7 @@ yum install -y python3-pip
pip3 install ninja
pip3 install meson
pip3 install pyelftools
pip3 install ijson
# Additional dependencies for SPDK CLI - not available in rhel and centos
if ! echo "$ID $VERSION_ID" | grep -E -q 'rhel 7|centos 7'; then