Wednesday, August 14, 2013

Finding Linux Syscall Source Code

One of the joys of developing for Linux is the ability to look at the source code for syscalls.

Example 1: What happens when you call fsync() on a loopback fs?


First, check the glibc source for fsync(), to see if glibc adds any code, or directly calls into the kernel.

alex@laptop:~/src/glibc-2.15$ grep -r -n "^fsync" *
CANCEL-FCT-WAIVE:231:fsync
misc/fsync.c:24:fsync (fd)
sysdeps/unix/syscalls.list:16:fsync - fsync Ci:i __libc_fsync fsync
sysdeps/mach/hurd/fsync.c:26:fsync (fd)

Opening misc/fsync.c, we see that this is just a stub file, and therefore fsync is a thin wrapper around the syscall.

#include <errno.h>
#include <unistd.h>

/* Make all changes done to FD actually appear on disk.  */
int
fsync (fd)
     int fd;
{
  __set_errno (ENOSYS);
  return -1;
}


stub_warning (fsync)
#include <stub-tag.h>

The wrapper calls into the kernel, returns -1 on an error and sets errno with the return value from the kernel. See comments in sysdeps/unix/syscall-template.S for more information about glibc syscall stubs.

From here, we look for the syscall source code in the kernel.  The syscall source is wrapped with the SYSCALL_DEFINEn() macro, where n is the number of arguments.  In this case, SYSCALL_DEFINE1(fsync, unsigned int, fd):

alex@laptop:~src/kernel$ grep -r -n -I "SYSCALL.*fsync" *
arch/xtensa/include/asm/unistd.h:74:__SYSCALL( 26, sys_fsync, 1)
arch/x86/include/asm/unistd_64.h:173:__SYSCALL(__NR_fsync, sys_fsync)
arch/powerpc/include/asm/systbl.h:124:SYSCALL_SPU(fsync)
arch/s390/kernel/syscalls.S:129:SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper)
fs/sync.c:201:SYSCALL_DEFINE1(fsync, unsigned int, fd)
include/asm-generic/unistd.h:253:__SYSCALL(__NR_fsync, sys_fsync)

Opening fs/sync.c, we see the actual source of fsync (function comments removed for brevity):

int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{
if (!file->f_op || !file->f_op->fsync)
return -EINVAL;
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);

int vfs_fsync(struct file *file, int datasync)
{
return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
}
EXPORT_SYMBOL(vfs_fsync);

static int do_fsync(unsigned int fd, int datasync)
{
struct file *file;
int ret = -EBADF;

file = fget(fd);
if (file) {
ret = vfs_fsync(file, datasync);
fput(file);
}
return ret;
}

SYSCALL_DEFINE1(fsync, unsigned int, fd)
{
return do_fsync(fd, 0);
}

And down the rabbit hole we go.