// SPDX-License-Identifier: MIT
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <getopt.h>
#include <errno.h>
#include <trace-cmd.h>
#include <traceeval-hist.h>

/* For filling in gaps to the function stack */
#define GAP_FUNC 1

static char *argv0;

static char *get_this_name(void)
{
	static char *this_name;
	char *arg;
	char *p;

	if (this_name)
		return this_name;

	arg = argv0;
	p = arg+strlen(arg);

	while (p >= arg && *p != '/')
		p--;
	p++;

	this_name = p;
	return p;
}

static void usage(void)
{
	char *p = get_this_name();

	printf("usage: %s trace.dat\n"
	       "\n",p);
	exit(-1);
}

static void __vdie(const char *fmt, va_list ap, int err)
{
	int ret = errno;
	char *p = get_this_name();

	if (err && errno)
		perror(p);
	else
		ret = -1;

	fprintf(stderr, "  ");
	vfprintf(stderr, fmt, ap);

	fprintf(stderr, "\n");
	exit(ret);
}

void die(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	__vdie(fmt, ap, 0);
	va_end(ap);
}

void pdie(const char *fmt, ...)
{
	va_list ap;

	va_start(ap, fmt);
	__vdie(fmt, ap, 1);
	va_end(ap);
}

struct realloc_helper {
	long			size;
	char			data[];
};

struct stack_func {
	unsigned long long	func;
	unsigned long long	func_start_addr;
	unsigned long long	start_time;
	const char		*name;
	int			depth;
};

struct func_stack {
	unsigned long long	key;
	unsigned long long	time;
	unsigned long long	last_ts;
	unsigned long long	first_ts;
	struct stack_func	**funcs;
	struct traceeval	*partial_stacks;
	cpu_set_t		*missed_cpus;
	int			nr_funcs;
	int			pid;
	int			cpu;
};

struct func_data {
	struct tep_handle		*tep;
	struct tep_format_field		*pid_field;
	struct traceeval		*stacks;
	struct traceeval		*save_stacks;
	char				**skip;
	int				cpu_size;
	int				nr_cpus;
	int				nr_skips;
	int				nr_stacks;
	int				nr_save_stacks;
	int				dump;
};

/* Must be a power of 2 */
#define REALLOC_BLOCK_SIZE 256
#define REALLOC_BLOCK_MASK (REALLOC_BLOCK_SIZE - 1)

/* Only realloc if we have to (saves some time) */
static void *do_realloc(void *data, long size)
{
	struct realloc_helper *rdata;
	long rsize = size + sizeof(long);
	long start;

	/* Allocate 256 bytes at a time */
	rsize = (rsize + REALLOC_BLOCK_SIZE) & ~REALLOC_BLOCK_MASK;

	if (data)
		rdata = data - sizeof(long);
	else
		rdata = NULL;

	if (!rdata || rdata->size < rsize) {
		start = rdata ? rdata->size : 0;
		rdata = realloc(rdata, rsize);
		if (!rdata)
			return NULL;
		data = rdata;
		memset(data + start, 0, rsize - start);
		rdata->size = rsize;
	}
	return &rdata->data;
}

static void do_free(void *data)
{
	struct realloc_helper *rdata;

	if (!data)
		return;

	rdata = data - sizeof(long);
	free(rdata);
}

static int hash_stack(struct traceeval *teval, const struct traceeval_type *type,
		      const union traceeval_data *data)
{
	const struct func_stack *fstack = data[0].pointer;

	return fstack->key;
}

static int cmp_stacks(struct traceeval *teval, const struct traceeval_type *types,
		      const union traceeval_data *A, const union traceeval_data *B)
{
	const struct func_stack *a = A->pointer;
	const struct func_stack *b = B->pointer;
	int i;

	if (a->key < b->key)
		return -1;
	if (a->key > b->key)
		return 1;

	if (a->nr_funcs < b->nr_funcs)
		return -1;
	if (a->nr_funcs > b->nr_funcs)
		return 1;

	for (i = 0; i < a->nr_funcs; i++) {
		if (a->funcs[i]->func_start_addr < b->funcs[i]->func_start_addr)
			return -1;
		if (a->funcs[i]->func_start_addr > b->funcs[i]->func_start_addr)
			return 1;
	}
	return 0;
}

struct traceeval_type pid_keys[] = {
	{
		.type = TRACEEVAL_TYPE_NUMBER,
		.name = "PID",
	},
	{
		.type = TRACEEVAL_TYPE_NONE,
	}
};

struct traceeval_type pid_vals[] = {
	{
		.type = TRACEEVAL_TYPE_POINTER,
		.name = "STACK",
	},
	{
		.type = TRACEEVAL_TYPE_NONE,
	}
};

struct traceeval_type stack_keys[] = {
	{
		.type = TRACEEVAL_TYPE_POINTER,
		.name = "STACK",
		.hash = hash_stack,
		.cmp = cmp_stacks,
	},
	{
		.type = TRACEEVAL_TYPE_NONE,
	}
};

struct traceeval_type stack_vals[] = {
	{
		.type = TRACEEVAL_TYPE_POINTER,
		.name = "STACK",
	},
	{
		.type = TRACEEVAL_TYPE_NONE,
	}
};

static void free_stack(struct func_stack *fstack)
{
	int i;

	/* TODO add release of pointers */
	traceeval_release(fstack->partial_stacks);

	for (i = 0; i < fstack->nr_funcs; i++)
		free(fstack->funcs[i]);
	do_free(fstack->funcs);

	free(fstack);
}

static void display_fstack(struct trace_seq *s, struct tep_handle *tep,
			   struct func_stack *fstack)
{
	int i;

	trace_seq_reset(s);

	trace_seq_puts(s, tep_data_comm_from_pid(tep, fstack->pid));
	trace_seq_putc(s, ';');

	for (i = 0; i < fstack->nr_funcs; i++) {
		/* Skip padding due to interrupts */
		if (fstack->funcs[i]->func == GAP_FUNC)
			continue;
		if (fstack->funcs[i]->name) {
			trace_seq_puts(s, fstack->funcs[i]->name);
			trace_seq_putc(s, ';');
		} else
			trace_seq_printf(s, "0x%llx;", fstack->funcs[i]->func);
	}
	trace_seq_printf(s, " %lld\n", fstack->time);
	trace_seq_do_printf(s);
}

static void copy_stack(struct func_stack *dst, struct func_stack *src)
{
	int i;

	*dst = *src;

	/* Do not save the partial stacks */
	dst->partial_stacks = NULL;
	dst->missed_cpus = NULL;

	dst->funcs = do_realloc(NULL, sizeof(*dst->funcs) * src->nr_funcs);
	if (!dst->funcs)
		pdie("Could not allocate new func stack");

	for (i = 0; i < src->nr_funcs; i++) {
		dst->funcs[i] = calloc(1, sizeof(**dst->funcs));
		if (!dst->funcs[i])
			die("Could not allocate func");
		*dst->funcs[i] = *src->funcs[i];
	}
}

#if 0
static struct func_stack *find_stack_by_stack(struct func_stack **stacks, int nr_stacks,
					      struct func_stack *stack)
{
	struct func_stack **item;

	item = bint_find(stack, stacks, nr_stacks, sizeof(stack),
			 cmp_stacks);

	return item ? *item : NULL;
}
#endif

static struct func_stack *get_func_stack_pid(struct func_data *fdata, int pid)
{
	struct func_stack *fstack;
	union traceeval_data keys[] = {
		{
			.number = pid,
		}
	};
	union traceeval_data vals[1] = {};
	const union traceeval_data *results;

	if (traceeval_query(fdata->stacks, keys, &results) > 0) {
		fstack = results[0].pointer;
		traceeval_results_release(fdata->stacks, results);
		return fstack;
	}

	fstack = calloc(1, sizeof(*fstack));
	if (!fstack)
		die("Allocating func");
	memset(fstack, 0, sizeof(*fstack));
	fstack->pid = pid;

	vals[0].pointer = fstack;

	traceeval_insert(fdata->stacks, keys, vals);
	return fstack;
}

static struct func_stack *find_func_stack_pid(struct func_data *fdata, int pid)
{
	union traceeval_data keys[] = {
		{
			.number = pid,
		}
	};
	const union traceeval_data *results;
	struct func_stack *fstack;

	if (traceeval_query(fdata->stacks, keys, &results) <= 0)
		return NULL;

	fstack = results[0].pointer;
	traceeval_results_release(fdata->stacks, results);
	return fstack;
}

static struct func_stack *get_func_save_stack(struct func_data *fdata, struct func_stack *stack)
{
	union traceeval_data keys[] = {
		{
			.pointer = stack,
		}
	};
	union traceeval_data vals[1] = { };
	const union traceeval_data *results;
	struct func_stack *fstack;

	if (traceeval_query(fdata->save_stacks, keys, &results) > 0) {
		fstack = results[0].pointer;
		traceeval_results_release(fdata->save_stacks, results);
		return fstack;
	}

	fstack = calloc(1, sizeof(*fstack));
	if (!fstack)
		die("Allocating func");
	memset(fstack, 0, sizeof(*fstack));
	copy_stack(fstack, stack);

	vals[0].pointer = fstack;
	traceeval_insert(fdata->save_stacks, keys, vals);

	return fstack;
}

static struct func_stack *add_partial_stack(struct func_stack *fstack,
					    struct func_stack *new_stack)
{
	union traceeval_data keys[] = {
		{
			.pointer = new_stack,
		}
	};
	union traceeval_data vals[1] = { };
	const union traceeval_data *results;
	struct func_stack *stack;

	if (!fstack->partial_stacks)
		fstack->partial_stacks = traceeval_init(stack_keys, stack_vals);

	if (traceeval_query(fstack->partial_stacks, keys, &results) > 0) {
		stack = results[0].pointer;
		traceeval_results_release(fstack->partial_stacks, results);
		return stack;
	}

	stack = calloc(1, sizeof(*stack));
	if (!stack)
		pdie("Failed to allocate partial stack function");

	copy_stack(stack, new_stack);

	vals[0].pointer = stack;

	traceeval_insert(fstack->partial_stacks, keys, vals);

	return stack;
}

static struct func_stack *get_func_stack_key(struct func_data *fdata, struct func_stack *fstack,
					     unsigned long long delta)
{
	static struct trace_seq seq;
	struct stack_func *func;
	int i;

	/* If the user didn't want functions to be traced, skip them */
	if (fdata->nr_skips) {
		func = fstack->funcs[fstack->nr_funcs - 1];
		for (i = 0; i < fdata->nr_skips; i++) {
			if (func->name && strcmp(func->name, fdata->skip[i]) == 0)
				return fstack;
		}
	}
	/*
	 * If this is a partial stack, then save it on the current stack for later
	 * modifications
	 */
	if (!fstack->funcs[0]->func)
		return add_partial_stack(fstack, fstack);

	if (fdata->dump) {
		if (!seq.buffer)
			trace_seq_init(&seq);
		fstack->time = delta;
		display_fstack(&seq, fdata->tep, fstack);
		return NULL;
	}

	return get_func_save_stack(fdata, fstack);
}

/*
 * A partial function stack (where the stack is missing the beginning) needs
 * an update (the current pushed function is equal to or less than the last
 * saved function, meaning the last saved function did not finish).
 *
 * To handle this case, drop the top of the stack, but use its timestamp as
 * the timestamp of the next on the stack. If there isn't one, then just
 * drop this entire stack and start a new one by resetting the current one.
 */
static struct func_stack *save_partial_func(struct func_stack *fstack, int depth)
{
	struct stack_func *func;
	unsigned long long ts;
	int i;

	/* We can only salvage stacks with more than one function saved on it */
	if (fstack->nr_funcs < 2)
		goto return_fstack;

	/* If this only had one defined function, just reuse it */
	if (fstack->funcs[fstack->nr_funcs - 2]->func == 0)
		goto return_fstack;

	/*
	 * We can only salvage an unfinished stack if all funcs are filled in
	 * between this depth and the last
	 */
	if (fstack->nr_funcs > depth + 1 && !fstack->funcs[depth + 1]->func)
		goto return_fstack;

	/*
	 * Now use the timestamp of the last function as the last know time
	 * of the second to last function.
	 */
	func = fstack->funcs[fstack->nr_funcs - 1];
	ts = func->start_time;

	fstack->key -= func->func_start_addr;
	fstack->nr_funcs--;

	func = fstack->funcs[fstack->nr_funcs - 1];
	fstack->time = ts - func->start_time;

	add_partial_stack(fstack, fstack);

 return_fstack:
	for (i = 0; i < fstack->nr_funcs; i++)
		free(fstack->funcs[i]);
	fstack->nr_funcs = 0;
	fstack->key = 0;
	return fstack;
}

static void set_func(struct func_data *fdata, struct func_stack *fstack, struct stack_func *func,
		     unsigned long long ip, unsigned long long ts)
{
	func->func = ip;
	func->start_time = ts;

	func->name = tep_find_function(fdata->tep, ip);
	/* It's quicker to compare addresses than string */
	if (func->name)
		func->func_start_addr = tep_find_function_address(fdata->tep, ip);
	else
		func->func_start_addr = ip;

	fstack->key += func->func_start_addr;
}

static struct stack_func **add_funcs(struct func_stack *fstack, int depth,
				     unsigned long long ts)
{
	struct stack_func **funcs;
	struct stack_func *func;
	int nr_new_funcs;
	int i;

	nr_new_funcs = depth - (fstack->nr_funcs - 1);

	fstack->nr_funcs = depth + 1;

	funcs = do_realloc(fstack->funcs, sizeof(*funcs) * fstack->nr_funcs);
	if (!funcs)
		pdie("Could not allocate func");

	fstack->funcs = funcs;

	/* Fill in blanks */
	for (i = depth; nr_new_funcs > 0; nr_new_funcs--, i--) {
		fstack->funcs[i] = calloc(1, sizeof(*func));
		func = fstack->funcs[i];
		if (!func)
			die("Could not allocate function\n");
		func->func = 0;
		func->start_time = ts;
		func->depth = i;
	}
	return funcs;
}

static void push_func(struct func_data *fdata, int pid, int cpu, int depth,
		      unsigned long long ip, unsigned long long ts)
{
	struct func_stack *fstack;
	struct stack_func *func;

	fstack = get_func_stack_pid(fdata, pid);
	if (!fstack->funcs) {
		fstack->first_ts = ts;
		fstack->missed_cpus = CPU_ALLOC(fdata->nr_cpus);
		CPU_ZERO_S(fdata->cpu_size, fstack->missed_cpus);
	}

	/* Ignore functions that are called after the sched_switch tracepoint */
	if (fstack->nr_funcs && !fstack->funcs[fstack->nr_funcs - 1]->start_time)
		return;

	/* Due to missed events, this may not match */
	if (fstack->nr_funcs && (fstack->nr_funcs != depth)) {

		/*
		 * Interrupts can cause the depth not to be in sync, for this case
		 * we need to fill the gaps. (Should we warn if depth is less than
		 * nr_funcs?)
		 */
		if (!CPU_ISSET_S(cpu, fdata->cpu_size, fstack->missed_cpus)) {
			int i;
			for (i = fstack->nr_funcs; i < depth; i++) {
				add_funcs(fstack, i, ts);
				fstack->funcs[i]->func = GAP_FUNC;
			}
		} else {
			fstack = save_partial_func(fstack, depth);
		}
	}

	CPU_ZERO_S(fdata->cpu_size, fstack->missed_cpus);

	add_funcs(fstack, depth, ts);

	func = fstack->funcs[depth];
	func->depth = depth;
	set_func(fdata, fstack, func, ip, ts);
}

#if 0
static void print_stack(struct func_stack *stack)
{
	int i;

	printf("Add partial stack: ");
	for (i = 0; i < stack->nr_funcs; i++) {
		if (i)
			printf(", ");
		if (stack->funcs[i]->func)
			printf("%s", stack->funcs[i]->name);
		else
			printf("NULL");
	}
	printf("\n");
}
#endif

static void restore_missed_events(struct func_data *fdata, struct func_stack *fstack, int pid,
				  unsigned long long val, unsigned long long ts,
				  int depth)
{
	struct traceeval_iterator *iter;
	const union traceeval_data *keys;
	struct func_stack *stack;
	int i;

	if (!fstack->partial_stacks)
		return;

	iter = traceeval_iterator_get(fstack->partial_stacks);
	while (traceeval_iterator_next(iter, &keys) > 0) {
		const union traceeval_data *results;

		if (traceeval_query(fstack->partial_stacks, keys, &results) < 1)
			continue;

		stack = results[0].pointer;
		traceeval_results_release(fstack->partial_stacks, results);
		
		if (stack->nr_funcs <= depth + 1)
			continue;
		if (stack->funcs[depth]->func)
			continue;

		traceeval_remove(fstack->partial_stacks, keys);

		if (stack->funcs[depth + 1]->func) {
			set_func(fdata, stack, stack->funcs[depth], val, stack->first_ts);
			if (!depth)
				get_func_save_stack(fdata, stack);
			else
				add_partial_stack(fstack, stack);
			free_stack(stack);
			continue;
		}

		if (0) {
			/* There's a gap, we have to get rid of this stack */
			fprintf(stderr, "drop stack [%d] ", depth);
			for (i = 1; i < stack->nr_funcs; i++) {
				fprintf(stderr, "%s ", stack->funcs[i]->name ?
					stack->funcs[i]->name : "><");
			}
			fprintf(stderr, " %lld\n",
				i > 1 ? stack->funcs[i - 1]->start_time : 0);
			free_stack(stack);
		}
	}
	traceeval_iterator_put(iter);
}

static void save_func_stack(struct func_data *fdata, struct func_stack *fstack,
			    unsigned long long ts)
{
	struct func_stack *save_fstack;
	struct stack_func *func;
	long long delta;
	int i;

	func = fstack->funcs[fstack->nr_funcs - 1];

	/* If the scheduler stopped it, ignore it */
	if (!func->start_time)
		return;

	delta = ts - func->start_time;
	save_fstack = get_func_stack_key(fdata, fstack, delta);

	for (i = 0; i < fstack->nr_funcs; i++)
		fstack->funcs[i]->start_time += delta;

	if (save_fstack)
		save_fstack->time += delta;
}

static void pop_func(struct func_data *fdata, int pid, unsigned long long ip,
		      unsigned long long ts, unsigned long long depth)
{
	struct func_stack *fstack;
	struct stack_func *func;
	const char *name;

	fstack = find_func_stack_pid(fdata, pid);
	/* If not found, nothing was created for it. */
	if (!fstack)
		return;

	/* Ignore functions that are called after the sched_switch tracepoint */
	if (fstack->nr_funcs && !fstack->funcs[fstack->nr_funcs - 1]->start_time)
		return;

	restore_missed_events(fdata, fstack, pid, ip, ts, depth);

	/* TODO, handle missed functions here */
	if (fstack->nr_funcs < 1)
		return;

	func = fstack->funcs[fstack->nr_funcs - 1];

	/* Could be that the entry was dropped */
	if (!func->func)
		set_func(fdata, fstack, func, ip, ts);

	/*
	 * If the functions do not match, then we need to drop it, but
	 * if the function did not exist, then continue and finish it.
	 */
	if (func->func != ip) {
		/* TODO review this code */
		name = tep_find_function(fdata->tep, ip);
		if (!name ||
		    tep_find_function_address(fdata->tep, ip) != func->func_start_addr)
			return;
	}

	save_func_stack(fdata, fstack, ts);

	fstack->key -= func->func_start_addr;
	fstack->nr_funcs--;
	free(fstack->funcs[fstack->nr_funcs]);
}

static struct tep_format_field *get_field(struct tep_event *event, const char *name)
{
	static struct tep_format_field *field;

	field = tep_find_field(event, name);
	if (!field)
		die("Could not find field %s for %s",
		    name, event->name);

	return field;
}

static int get_pid(struct func_data *fdata, struct tep_record *record)
{
	unsigned long long val;

	tep_read_number_field(fdata->pid_field, record->data, &val);
	return (val);
}

static int fgraph_enter(struct tracecmd_input *handle, struct tep_event *event,
			struct tep_record *record, int cpu, void *data)
{
	static struct tep_format_field *func_field;
	static struct tep_format_field *depth_field;
	struct func_data *fdata = data;
	unsigned long long depth;
	unsigned long long ip;
	int pid;

	pid = get_pid(fdata, record);

	if (!func_field) {
		func_field = get_field(event, "func");
		depth_field = get_field(event, "depth");
	}

	tep_read_number_field(depth_field, record->data, &depth);
	tep_read_number_field(func_field, record->data, &ip);

	push_func(fdata, pid, record->cpu, depth, ip, record->ts);

	return 0;
}

static int fgraph_exit(struct tracecmd_input *handle, struct tep_event *event,
		       struct tep_record *record, int cpu, void *data)
{
	static struct tep_format_field *func_field;
	static struct tep_format_field *depth_field;
	struct func_data *fdata = data;
	unsigned long long depth;
	unsigned long long ip;
	int pid;

	pid = get_pid(fdata, record);

	if (!func_field) {
		func_field = get_field(event, "func");
		depth_field = get_field(event, "depth");
	}

	tep_read_number_field(func_field, record->data, &ip);
	tep_read_number_field(depth_field, record->data, &depth);

	pop_func(fdata, pid, ip, record->ts, depth);

	return 0;
}

static void add_func(struct func_data *fdata, struct func_stack *fstack,
		      int depth, unsigned long long ip, unsigned long long ts)
{
	struct stack_func *func;

	add_funcs(fstack, depth, ts);
	func = fstack->funcs[depth];
	func->depth = depth;
	set_func(fdata, fstack, func, ip, ts);
}

static void process_func(struct func_data *fdata, int pid, int cpu,
			 unsigned long long ip, unsigned long long pip,
			 unsigned long long ts)
{
	struct func_stack *fstack;
	int depth;
	int i;

	fstack = get_func_stack_pid(fdata, pid);
	if (!fstack->funcs || !fstack->nr_funcs) {
		if (!fstack->funcs) {
			fstack->missed_cpus = CPU_ALLOC(fdata->nr_cpus);
			CPU_ZERO_S(fdata->cpu_size, fstack->missed_cpus);
		}
		fstack->first_ts = ts;
		/* First of this process. Add the parent and the child */
		add_func(fdata, fstack, 0, pip, ts);
		add_func(fdata, fstack, 1, ip, ts);
		return;
	}

	/* Functions between sched switch? */
	if (!fstack->funcs[fstack->nr_funcs - 1]->start_time)
		return;

	if (fstack->funcs[fstack->nr_funcs - 1]->func_start_addr == pip) {
		depth = fstack->funcs[fstack->nr_funcs - 1]->depth + 1;
		add_func(fdata, fstack, depth, ip, ts);
		return;
	}

	save_func_stack(fdata, fstack, ts);

	/* Locate pip */
	for (i = fstack->nr_funcs - 1; i >= 0; i--) {
		if (fstack->funcs[i]->func_start_addr == pip)
			break;
		fstack->key -= fstack->funcs[i]->func_start_addr;
		free(fstack->funcs[i]);
	}
	fstack->nr_funcs = i + 1;

	depth = i + 1;
	if (!depth) {
		add_func(fdata, fstack, 0, pip, ts);
		depth++;
	}

	add_func(fdata, fstack, depth, ip, ts);
}

static int function(struct tracecmd_input *handle, struct tep_event *event,
		    struct tep_record *record, int cpu, void *data)
{
	static struct tep_format_field *ip_field;
	static struct tep_format_field *parent_ip_field;
	struct func_data *fdata = data;
	unsigned long long ip;
	unsigned long long pip;
	int pid;

	pid = get_pid(fdata, record);

	if (!ip_field) {
		ip_field = get_field(event, "ip");
		parent_ip_field = get_field(event, "parent_ip");
	}

	tep_read_number_field(ip_field, record->data, &ip);
	tep_read_number_field(parent_ip_field, record->data, &pip);

	/* Set ip and pip to the start of the function */
	if (!tep_find_function_info(fdata->tep, ip, NULL, &ip, NULL))
		return 0;

	if (!tep_find_function_info(fdata->tep, pip, NULL, &pip, NULL))
		return 0;

	process_func(fdata, pid, record->cpu, ip, pip, record->ts);

	return 0;
}

static int sched_switch(struct tracecmd_input *handle, struct tep_event *event,
			struct tep_record *record, int cpu, void *data)
{
	static struct tep_format_field *prev_state_field;
	static struct tep_format_field *prev_pid_field;
	static struct tep_format_field *next_pid_field;
	struct func_data *fdata = data;
	struct func_stack *fstack;
	unsigned long long prev_state;
	unsigned long long prev_pid;
	unsigned long long next_pid;

	if (!prev_state_field) {
		prev_state_field = get_field(event, "prev_state");
		prev_pid_field = get_field(event, "prev_pid");
		next_pid_field = get_field(event, "next_pid");
	}

	/*
	 * Check the next task to see if it was scheduled out and ts was cleared.
	 * If it was then add back in the ts.
	 */
	tep_read_number_field(next_pid_field, record->data, &next_pid);
	fstack = find_func_stack_pid(fdata, next_pid);
	if (fstack) {
		if (fstack->funcs && !fstack->funcs[fstack->nr_funcs - 1]->start_time)
			fstack->funcs[fstack->nr_funcs - 1]->start_time = record->ts;
	}

	tep_read_number_field(prev_pid_field, record->data, &prev_pid);
	fstack = find_func_stack_pid(fdata, prev_pid);
	/* If not found, nothing was created for it. */
	if (!fstack || !fstack->nr_funcs)
		return 0;

	tep_read_number_field(prev_state_field, record->data, &prev_state);

	/* We want to track blocked and sleeping time */
	if (prev_state & 3)
		return 0;

	/* if the task is preempted (running) then save the current stack */
	save_func_stack(fdata, fstack, record->ts);
	fstack->funcs[fstack->nr_funcs - 1]->start_time = 0;

	return 0;
}

static int missed_events(struct tracecmd_input *handle, struct tep_event *event,
			 struct tep_record *record, int cpu, void *data)
{
	struct func_data *fdata = data;
	struct traceeval_iterator *iter = traceeval_iterator_get(fdata->stacks);
	const union traceeval_data *keys;

	while (traceeval_iterator_next(iter, &keys) > 0) {
		const union traceeval_data *results;
		struct func_stack *fstack;

		if (traceeval_query(fdata->stacks, keys, &results) <= 0)
			continue;

		fstack = results[0].pointer;
		CPU_SET_S(cpu, fdata->cpu_size, fstack->missed_cpus);
	}
	return 0;
}

static void display(struct func_data *fdata)
{
	struct traceeval_iterator *iter = traceeval_iterator_get(fdata->save_stacks);
	const union traceeval_data *keys;
	struct trace_seq seq;

	trace_seq_init(&seq);

	while (traceeval_iterator_next(iter, &keys) > 0) {
		const union traceeval_data *results;
		struct func_stack *fstack;

		if (traceeval_query(fdata->save_stacks, keys, &results) < 1)
			continue;
		fstack = results[0].pointer;
		traceeval_results_release(fdata->save_stacks, results);
		
		display_fstack(&seq, fdata->tep, fstack);
	}
}

static void add_skip(struct func_data *fdata, char *skip)
{
	char *p = skip;
	int i;

	for (i = 1; (p = strchr(p, ',')); i++, p++)
		;

	fdata->nr_skips = i;
	fdata->skip = calloc(i, sizeof(char *));
	if (!fdata->skip)
		die("Failed to allocate skip array");

	p = skip;
	for (i = 0; i < fdata->nr_skips; i++) {
		fdata->skip[i] = strtok(p, ",");
		p = NULL;
	}
}

int main (int argc, char **argv)
{
	struct tracecmd_input *handle;
	struct tep_event *event;
	struct func_data data;
	int c;

	memset(&data, 0, sizeof(data));
	argv0 = argv[0];

	while ((c = getopt(argc, argv, "dhs:")) >= 0) {
		switch (c) {
		case 'd':
			data.dump = 1;
			break;
		case 's':
			add_skip(&data, optarg);
			break;
		case 'h':
		default:
			usage();
		}
	}

	argc -= optind;
	argv += optind;

	if (argc < 1)
		usage();

	tracecmd_set_loglevel(TEP_LOG_NONE);

	handle = tracecmd_open(argv[0], TRACECMD_FL_LOAD_NO_PLUGINS);
	if (!handle)
		die("Failed to open %s\n", argv[0]);

	data.tep = tracecmd_get_tep(handle);
	if (!data.tep)
		die("Could not get tep handle");

	event = tep_get_first_event(data.tep);
	if (!event)
		die("tep handle has no events?");

	data.pid_field = tep_find_any_field(event, "common_pid");
	if (!data.pid_field)
		die("common_pid field not found");

	data.nr_cpus = tep_get_cpus(data.tep);
	data.cpu_size = CPU_ALLOC_SIZE(data.nr_cpus);

	data.stacks = traceeval_init(pid_keys, pid_vals);
	data.save_stacks = traceeval_init(stack_keys, stack_vals);

	tracecmd_follow_missed_events(handle, missed_events, &data);
	tracecmd_follow_event(handle, "ftrace", "funcgraph_entry", fgraph_enter, &data);
	tracecmd_follow_event(handle, "ftrace", "funcgraph_exit", fgraph_exit, &data);

	tracecmd_follow_event(handle, "ftrace", "function", function, &data);

	tracecmd_follow_event(handle, "sched", "sched_switch", sched_switch, &data);

	tracecmd_iterate_events(handle, NULL, 0, NULL, NULL);

	display(&data);
}
