here's an example tap file for RHEL 4 update 2

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

here's an example tap file for RHEL 4 update 2

Greg Bruno
i've recently been introduced to systemtap. and along the way, i found
the example files found on this list to be quite helpful. so, i
figured i'd post my tap file in the hopes that:

1) someone else will find it useful, and
2) someone will explain to me how i could have done things much more simple.

in the tap file, you'll see some code sections that i needed to work
around some issues with the version of systemtap that is shipped with
RHEL 4 update 2 (version 0.4) -- for example, i had to write some code
to get the return value and some more code for i386 to get the return
value if the function returns a 64-bit value.

this tap file also interacts with the proc file system in order to
dynamically change what is monitored (rather than reloading the tap
file).

the purpose of the tap file is record all open, read, write, seek,
socket and close system calls for each program that are separated by a
comma in the global variable 'traced_apps'.

any and all comments are appreciated -- here's the code:

=======================================
/*
 * this code is used to put a file into the proc file system that is used
 * by this systemtap program to determine which programs to trace.
 *
 * the file is: /proc/sys/debug/traced-apps
 *
 * for example, to trace 'iozone', after starting this systemtap file,
 * on the command line execute:
 *
 *      echo "iozone" > /proc/sys/debug/traced-apps
 */

%{
#include <linux/sysctl.h>

/*
 * a 'handle' used to deallocate the proc fs table
 */
struct ctl_table_header *app_trace_proc_fs;

#define MAX_TRACED_APPS_SIZE    1024


char traced_apps[MAX_TRACED_APPS_SIZE] = "";
char working_traced_apps[MAX_TRACED_APPS_SIZE];

static ctl_table app_trace_table[] = {
        {
                .ctl_name       = 1,
                .procname       = "traced-apps",
                .data           = &traced_apps,
                .maxlen         = MAX_TRACED_APPS_SIZE,
                .mode           = 0644,
                .proc_handler   = &proc_dostring,
                .strategy       = &sysctl_string,
        },

        { .ctl_name = 0 }
};

static ctl_table app_trace_root_table[] = {
        {
                .ctl_name = CTL_DEBUG,
                .procname = "debug",
                .mode = 0555,
                .child = app_trace_table,
        },

        { .ctl_name = 0 }
};
%}


function get_returnvalue:long () %{
        if (CONTEXT->regs) {
#if defined (__x86_64__)
                THIS->__retvalue = CONTEXT->regs->rax;
#elif defined (__i386__)
                THIS->__retvalue = (unsigned long)CONTEXT->regs->eax;
#endif
        } else {
                THIS->__retvalue = 0;
        }
%}

/*
 * seeks return 'long long' in x86. this is a way to get the hi 32 bits
 */
function get_hi_returnvalue:long () %{
        if (CONTEXT->regs) {
#if defined (__x86_64__)
                THIS->__retvalue = 0;
#elif defined (__i386__)
                THIS->__retvalue = (unsigned long)CONTEXT->regs->edx;
#endif
        } else {
                THIS->__retvalue = 0;
        }
%}


/*
 * get_fd() is called from llseek which is only called from i386 programs.
 * but, there is an __x86_64__ clause to make sure the program compiles
 * correctly.
 */
function get_fd:long (fd) %{
        if (CONTEXT->regs) {
#if defined (__x86_64__)
                THIS->__retvalue = THIS->fd;
#elif defined (__i386__)
                THIS->__retvalue = CONTEXT->regs->ebx;
#endif
        } else {
                THIS->__retvalue = 0;
        }
%}

function is_traced_call:long (name:string) %{
        char    *a;
        char    *b;

        strcpy(working_traced_apps, traced_apps);
        b = working_traced_apps;

        while ((a = strsep(&b, ","))) {
                if (!strcmp(a, THIS->name)) {
                        THIS->__retvalue = 1;
                        return;
                }
        }

        THIS->__retvalue = 0;
%}

function output_header(op, name, pid) {
        print(string(gettimeofday_us()) . " op: " . op);
        print(" execname: " . name);
        print(" pid: " . pid);
}

probe kernel.function("sys_open").return
{
        if (is_traced_call(execname())) {
                output_header("open", execname(), string(pid()));

                fd = get_returnvalue();
                print(" fd: " . string(fd) . "\n");
        }
}

probe kernel.function("sys_socket").return
{
        if (is_traced_call(execname())) {
                output_header("socket", execname(), string(pid()));

                fd = get_returnvalue();
                print(" fd: " . string(fd) . "\n");
        }
}

probe kernel.function("sys_read")
{
        if (is_traced_call(execname())) {
                output_header("read", execname(), string(pid()));

                fd = get_fd($fd);
                print(" fd: " . string(fd));
                print(" count: " . string($count) .  "\n");
        }
}

probe kernel.function("sys_write")
{
        if (is_traced_call(execname())) {
                output_header("write", execname(), string(pid()));

                fd = get_fd($fd);
                print(" fd: " . string(fd));
                print(" count: " . string($count) . "\n");
        }
}

probe kernel.function("sys_lseek")
{
        if (is_traced_call(execname())) {
                output_header("lseek", execname(), string(pid()));

                fd = get_fd($fd);
                print(" fd: " . string(fd));
        }
}

/*
 * llseek is only called on i386
 */
probe kernel.function("sys_llseek")
{
        if (is_traced_call(execname())) {
                output_header("llseek", execname(), string(pid()));

                fd = get_fd($fd);
                print(" fd: " . string(fd));
        }
}

probe kernel.function("vfs_llseek").return
{
        if (is_traced_call(execname())) {
                offset = get_returnvalue();
                hioffset = get_hi_returnvalue();

                if (hioffset > 0) {
                        offset = offset + (hioffset << 32);
                }

                print(" offset: " . string(offset) . "\n");
        }
}

probe kernel.function("sys_close")
{
        if (is_traced_call(execname())) {
                output_header("close", execname(), string(pid()));

                fd = get_fd($fd);
                print(" fd: " . string(fd) . "\n");
        }
}


function init_proc_fs() %{
        app_trace_proc_fs = register_sysctl_table(app_trace_root_table, 1);
%}

function deinit_proc_fs() %{
        unregister_sysctl_table(app_trace_proc_fs);
%}

probe begin
{
        log("trace-io: starting probe");
        init_proc_fs();
}

probe end
{
        log("trace-io: ending probe");
        deinit_proc_fs();
}

=======================================
Reply | Threaded
Open this post in threaded view
|

Re: here's an example tap file for RHEL 4 update 2

Martin Hunt
Thanks for sharing your script.  It is useful for us developers to see
how people are using systemtap and what problems they are having.  You
found some impressive workarounds for systemtap's limitations.

Systemtap is rapidly evolving, so there are some things you might be
able to do better if you can build a more recent systemtap. One thing I
can't think of is a simple way to support a list of executables to
monitor. Perhaps its time to add a way to pass arguments to systemtap
scripts.

If you are only interested in tracing one executable at a time, you can
use the "-c" or "-x" options to stap.  You can also run multiple
systemtap scripts at once.

The syscall tapset is almost finished for i386 and is in the current
build tree. I don't know what it's state was in the systemtap shipped
with RHEL4U2. Using that helps keep things cleaner.  So you might do
something like:

---
probe kernel.syscall.read  {
  if(pid() == target())
        printf("read: %s\tfd: %d\tcount: %d\n", execname(), fd, count)
}
probe kernel.syscall.write  {
  if(pid() == target())
        printf("write: %s\tfd: %d\tcount: %d\n", execname(), fd, count)
}
probe kernel.syscall.open  {
  if(pid() == target())
        printf("open: %s\tfile: %s\tflags: %x\n", execname(),
                user_string(filename_uaddr), flags)
}
probe kernel.syscall.open.return  {
  if(pid() == target())
        printf("open: %s\treturned fd: %d\n", execname(), retval())

---

Then when you use "-c" or "-x" with stap, target() returns the pid.
So you can use the above script and do
> stap -c foobar sys.stp OR
> stap -x 1234 sys.stp

Martin



Reply | Threaded
Open this post in threaded view
|

Re: here's an example tap file for RHEL 4 update 2

Frank Ch. Eigler

hunt wrote:

> [...] One thing I can't think of is a simple way to support a list
> of executables to monitor. Perhaps its time to add a way to pass
> arguments to systemtap scripts. [...]

When implemented, the feature in bug #1154 could make all the
on-the-fly configuration interaction look something like this:

global probed
probe procfs("sys/debug/trace-app").write {
  probed[$value] ++
}
probe procfs("sys/debug/traced-apps").read {
  foreach (name in probed) $value .= (i++ > 0 ? " " : "") . name
}
function is_traced_call(name) {
  return (name in probed)
}

Nice job with the rest of the simplified rewrite.

- FChE