Skip to content

Instantly share code, notes, and snippets.

@karthick18
Created December 28, 2010 09:26
Show Gist options
  • Save karthick18/757086 to your computer and use it in GitHub Desktop.
Save karthick18/757086 to your computer and use it in GitHub Desktop.
Test code to try and reproduce a kernel panic in __mark_inode_dirty triggered when USB is pulled while logs were being written as explained in the comments at the header of the code. The issue was first reproduced by my daughter through her pranks :-) She
/*
* The below code is an effort to reproduce a kernel panic. (not successful yet)
* Trying hard to reproduce a kernel panic in my laptop(SMP dual-core T7100, 4gig ram) running 2.6.36-rc8 (linus-git)
* The panic is consistently reproducible with our distributed product runtime and log directories
* mount binded to a USB drive in my laptop(ext4).
* The trick is to pull the USB drive (ejecting) while our processes are up.
* It always crashes the kernel in __mark_inode_dirty while trying to resolve a write protect page fault in our log server process
* which flushes from shared memory to disk (in this case, the disk was mount binded to the USB drive)
* The panic is a result of the inodes backing device marker becoming NULL (probably the inode flag check in __mark_inode_dirty: fs/fs-writeback.c: 978) for skipping an inode based on its state needs to be fixed.
* But can check that only if I am able to reproduce the issue through a
* test code like below which doesn't involve our product code. (isolation)
* Saw the issue first when my 4 and a half year old daughter pulled the USB cable while I was working :-)
* After that, efforts to isolate the problem by trying to crash the kernel with a generic code which tries
* to fork multiple flusher child processes that flush from the parents log shared segment to the file thats expected
* to be mounted to an USB drive thats supposed to be ejected while the log flushes are in progress.
* Unsuccessful till now while trying to eject the USB drive.
*
* To be run as: ./crash_kernel -c <num_log_files_to_be_created> -s <each_log_file_size>
* Example: ./crash_kernel -c 10 -s 25m would create 10 files with 10 log flusher processes flushing from its
* shared memory to backing storage mount binded to USB.
* The reason a shared segment flusher is used is to try and reproduce the timing to be more or less similar to our
* products log server behavior since this isn't reproducible even with a straight-forward "dd"
* This should be a timing issue considering that the inode state should have been valid for it to be eligible
* to be marked dirty and the panic backtrace shows that the kernel was trying to resolve a page fault on
* the mmapped page with a backing storage. Even with forced page faults on write errors like below, the issue isn't reproducible.
*
* Don't believe a git pull might resolve the issue even though I am running a slightly old kernel than the current
* since I don't see fixes/changes to fs-writeback.c in the latest.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <assert.h>
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <getopt.h>
#include <sys/stat.h>
#include <pthread.h>
#include <ctype.h>
#include <string.h>
#include <semaphore.h>
#include <time.h>
#include <signal.h>
#include <sys/wait.h>
#include <errno.h>
#undef MIN
#define MIN(a, b) ( (a) < (b) ? (a) : (b) )
#define USB_DIR "/root"
#define FLUSH_DIR "crash_test"
#define SHM_SEGMENT_SIZE (1 << 20)
#define SHM_SEGMENT_FLUSH_RATE (20)
#define LOG_FILE_SIZE_DEFAULT (50LL << 20) /* 50 mb */
#define LOG_FILES_DEFAULT 0x5
#define MAX_ROTATIONS (0x4) /* power of 2 */
#define ROTATION_MASK (MAX_ROTATIONS - 1)
#define SHM_WRITE_SIZE (SHM_SEGMENT_SIZE - sizeof(struct log_shm_ctl))
struct log_shm_ctl
{
sem_t sem;
int read_index;
int write_index;
} __attribute__ ((aligned(8)));
static volatile int exitme;
static char crash_dir[0xff+1];
static char *fault_map;
static const int fault_size = 1<<20U; /*faultin 1 mb during the write error*/
static unsigned long long filesize = LOG_FILE_SIZE_DEFAULT;
static int verbose;
#define LDEBUG(fmt, arg...) do { if(__builtin_expect(verbose, 0)) printf(fmt, ##arg); } while(0)
#ifdef SEM_RETRY /* required to test sem_init or futex initializes with non-process shared flag used with processes :P */
#define SEM_WAIT(sem) ({ \
int __err = 0; \
do { \
struct timespec __t; \
clock_gettime(CLOCK_REALTIME, &__t); \
__t.tv_sec += 1; \
__err = sem_timedwait((sem), &__t); \
} while (__err && ( errno == EINTR || errno == ETIMEDOUT)); \
__err; \
})
#else
#define SEM_WAIT(sem) sem_wait(sem)
#endif
#define SEM_POST(sem) sem_post(sem)
static void sigint_handler(int sig)
{
exitme = 1;
}
/*
* We halt when 1 byte of space is remaining as an indicator to throttle the writer or mark space_full
*/
static int space_free(struct log_shm_ctl *ctl)
{
if(ctl->read_index == ctl->write_index)
return SHM_WRITE_SIZE-1;
return (( ctl->read_index + SHM_WRITE_SIZE - ctl->write_index) % SHM_WRITE_SIZE) - 1;
}
/*
* Called with shm lock held.
*/
static void log_shm_write(struct log_shm_ctl *ctl, const char *fmt, ...)
{
char buf[0xff+1];
int len;
int space;
char *data;
va_list ptr;
int pid = getpid();
int err;
va_start(ptr, fmt);
len = vsnprintf(buf, sizeof(buf), fmt, ptr);
va_end(ptr);
data = (char*)(ctl+1);
/*
* Block till space is available.
*/
while(!exitme && !(space=space_free(ctl)))
{
SEM_POST(&ctl->sem);
usleep(1000);
LDEBUG("Log writer process [%d] going to grab the lock in shm_write as its full\n", pid);
err = SEM_WAIT(&ctl->sem);
if(!err)
LDEBUG("Log writer process [%d] grabbed the lock in shm_write as its full\n", pid);
else
LDEBUG("Log writer process [%d] failed to grab the lock in shm_write with error [%s]\n",
pid, strerror(errno));
}
if(exitme) return;
len = MIN(len, space);
if(ctl->write_index >= ctl->read_index)
len = MIN(len, SHM_WRITE_SIZE - ctl->write_index);
else
len = MIN(len, ctl->read_index - ctl->write_index - 1);
memcpy(data + ctl->write_index, buf, len);
data[ctl->write_index + len - 1] = '\n' ; /*truncate with a newline*/
ctl->write_index += len;
if(ctl->write_index == ctl->read_index)
ctl->write_index = 0;
}
/*
* Called with shm lock held.
*/
static void log_shm_read(struct log_shm_ctl *ctl, char *data, int *size)
{
int count;
int max_size = *size;
char *map = (char*)(ctl+1);
pid_t pid = getpid();
int err;
while(!exitme && ctl->read_index == ctl->write_index)
{
SEM_POST(&ctl->sem);
usleep(1000);
LDEBUG("Log flusher task [%d] going to grab the lock in shm read as shm is empty\n", pid);
err = SEM_WAIT(&ctl->sem);
if(!err)
LDEBUG("Log flusher task [%d] grabbed the lock in shm read as shm is empty\n", pid);
else
LDEBUG("Log flusher task [%d] failed to grab the lock in shm read with error [%s]\n",
pid, strerror(errno));
}
if(ctl->read_index <= ctl->write_index)
count = MIN(max_size, ctl->write_index - ctl->read_index);
else
count = MIN(max_size, SHM_WRITE_SIZE - ctl->read_index);
memcpy(data, map + ctl->read_index, count);
ctl->read_index += count;
if(ctl->read_index == SHM_WRITE_SIZE)
ctl->read_index = 0;
*size = count;
}
static void do_write(int fd, const char *buf, int bytes)
{
int n = bytes;
int len;
while(n > 0)
{
len = write(fd, buf, n);
if(len <= 0)
{
if(len < 0)
perror("write:");
/*
*force a break cow at this point
*/
if(fault_map)
{
memset(fault_map, 0xa5, fault_size);
msync(fault_map, fault_size, MS_SYNC);
}
break;
}
buf += len;
n -= len;
}
fsync(fd);
}
static int flusher_task(int tid)
{
int fd;
int shm_fd;
char filename[0xff+1];
int rotations = 0;
int cur_filesize = 0;
char *map;
struct log_shm_ctl *ctl;
static char *shared_data;
int count = 0;
pid_t pid = getpid();
int err;
fprintf(stderr, "Log flusher task [%d] for tid [%d]\n", pid, tid);
snprintf(filename, sizeof(filename), "/CRASH_%d", tid);
shm_fd = shm_open(filename, O_RDWR, 0777);
if(shm_fd < 0)
{
perror("shm_open:");
goto out;
}
map = mmap(0, SHM_SEGMENT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if(map == MAP_FAILED)
{
perror("mmap:");
goto out_close;
}
ctl = (struct log_shm_ctl*)map;
snprintf(filename, sizeof(filename), "%s/%d", crash_dir, tid);
fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0777);
if(fd < 0)
{
perror("open:");
goto out_unmap;
}
shared_data = calloc(1, SHM_SEGMENT_SIZE);
assert(shared_data);
SEM_WAIT(&ctl->sem);
while(!exitme)
{
count = SHM_WRITE_SIZE;
log_shm_read(ctl, shared_data, &count);
if(!count) continue;
SEM_POST(&ctl->sem); /* drop the lock*/
if(cur_filesize + count < filesize)
{
do_write(fd, shared_data, count);
cur_filesize += count;
}
else
{
register int i;
int write_size;
char fromfile[0xff+1];
char tofile[0xff+1];
write_size = (int)(filesize - cur_filesize);
do_write(fd, shared_data, write_size);
fsync(fd); /* sync before rotate*/
count -= write_size;
for(i = rotations - 1; i >= 0; --i)
{
snprintf(fromfile, sizeof(fromfile), "%s/%d.%d", crash_dir, tid, i);
snprintf(tofile, sizeof(tofile), "%s/%d.%d", crash_dir, tid, i+1);
unlink(tofile);
rename(fromfile, tofile);
}
snprintf(fromfile, sizeof(fromfile), "%s/%d", crash_dir, tid);
snprintf(tofile, sizeof(tofile), "%s/%d.0", crash_dir, tid);
unlink(tofile);
rename(fromfile, tofile);
++rotations;
rotations &= ROTATION_MASK;
close(fd);
fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0777);
if(count > 0)
{
do_write(fd, shared_data + write_size, count);
}
cur_filesize = count;
}
LDEBUG("Log flusher task [%d] going to grab the lock after shm write\n", pid);
err = SEM_WAIT(&ctl->sem);
if(!err)
LDEBUG("Log flusher task [%d] grabbed the lock after shm write\n", pid);
else
LDEBUG("Log flusher task [%d] failed to grab the lock after shm write with error [%s]\n",
pid, strerror(errno));
}
SEM_POST(&ctl->sem);
free(shared_data);
close(fd);
out_unmap:
munmap(map, SHM_SEGMENT_SIZE);
out_close:
close(shm_fd);
out:
return 0;
}
static void *log_thread(void *arg)
{
int tid = (int)(unsigned long)arg;
pid_t pid = getpid();
int fd;
char filename[0xff+1];
char *map;
register int i;
struct log_shm_ctl *ctl;
pid_t flusher_pid;
int err;
/*
* Mmap the shared segment and create a flusher thread.
*/
snprintf(filename, sizeof(filename), "/CRASH_%d", tid);
fd = shm_open(filename, O_RDWR, 0777);
if(fd < 0)
{
perror("shm_open:");
goto out;
}
map = mmap(0, SHM_SEGMENT_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if(map == MAP_FAILED)
{
perror("mmap:");
goto out;
}
ctl = (struct log_shm_ctl*)map;
err = sem_init(&ctl->sem, 1, 1);
if(err < 0)
{
perror("sem_init:");
assert(0);
}
ctl->read_index = ctl->write_index = 0;
SEM_WAIT(&ctl->sem);
/*
* test write and create flusher.
*/
log_shm_write(ctl, "This is test write start\n");
if( !(flusher_pid = fork() ))
{
flusher_task(tid);
exit(0);
} else signal(SIGCHLD, SIG_IGN);
while(!exitme)
{
for(i = 0; i < SHM_SEGMENT_FLUSH_RATE; ++i)
log_shm_write(ctl, "This is test write %d to crash the kernel\n", i);
if(exitme) break;
SEM_POST(&ctl->sem);
usleep(1000); /* breather for the flusher to run */
LDEBUG("Log writer [%d], flusher [%d] grabbing the lock before batch shm write\n", pid, flusher_pid);
err = SEM_WAIT(&ctl->sem);
if(!err)
LDEBUG("Log writer [%d], flusher [%d] grabbed the lock after batch shm write\n", pid, flusher_pid);
else
LDEBUG("Log writer [%d], flusher [%d] failed to grab the lock after batch shm write with error [%s]\n",
pid, flusher_pid, strerror(errno));
}
SEM_POST(&ctl->sem);
waitpid(flusher_pid, NULL, 0);
sem_destroy(&ctl->sem);
munmap(map, SHM_SEGMENT_SIZE);
close(fd);
LDEBUG("Exiting log thread [%d]\n", tid);
out:
return NULL;
}
static void crash_kernel(const char *usb_mnt, int files)
{
register int i;
int fault_fd = -1;
pthread_t *tids;
char fault_file[0xff+1];
snprintf(crash_dir, sizeof(crash_dir), "%s/%s", USB_DIR, FLUSH_DIR);
snprintf(fault_file, sizeof(fault_file), "%s/pagefault_file", crash_dir);
if(!access(crash_dir, F_OK))
{
char cmdbuf[0xff+1];
snprintf(cmdbuf, sizeof(cmdbuf), "rm -rf %s", crash_dir);
if(system(cmdbuf))
perror("system: ");
}
if(mkdir(crash_dir, 0755))
{
perror("mkdir:");
fprintf(stderr, "Unable to create crash test directory [%s]. Exiting ...\n", crash_dir);
exit(127);
}
for(i = 0; i < files; ++i)
{
char filename[0xff+1];
int fd;
snprintf(filename, sizeof(filename), "/CRASH_%d", i);
fd = shm_open(filename, O_CREAT | O_RDWR | O_TRUNC, 0777);
if(fd < 0)
{
perror("shm_open:");
assert(0);
}
if(ftruncate(fd, SHM_SEGMENT_SIZE) < 0)
{
perror("ftruncate:");
assert(0);
}
close(fd);
}
fault_fd = open(fault_file, O_RDWR | O_CREAT | O_TRUNC, 0777);
if(fault_fd < 0)
perror("Unable to open fault_file:");
else
{
if(ftruncate(fault_fd, fault_size) < 0)
perror("Unable to truncate fault_file:");
else
{
fault_map = mmap(0, fault_size, PROT_READ | PROT_WRITE, MAP_SHARED, fault_fd, 0);
if(fault_map == MAP_FAILED)
perror("Unable to mmap fault file:");
}
}
tids = calloc(files, sizeof(*tids));
assert(tids);
for(i = 0; i < files; ++i)
{
int err = pthread_create(&tids[i], NULL, log_thread, (void*)(unsigned long)i);
assert(err == 0);
}
for(i =0 ; i < files; ++i)
pthread_join(tids[i], NULL);
for(i = 0; i < files; ++i)
{
char filename[0xff+1];
snprintf(filename, sizeof(filename), "/CRASH_%d", i);
if(shm_unlink(filename))
perror("shm_unlink:");
}
if(fault_map)
munmap(fault_map, fault_size);
if(fault_fd >= 0) close(fault_fd);
unlink(fault_file);
}
static char *prog;
static volatile int usage(void)
{
fprintf(stderr, "[%s] -s log_file_size -c num_log_files -v | verbose\n", prog);
exit(127);
}
static unsigned long long get_size(const char *s)
{
unsigned long long size = 0;
int shift = 10;
char *last_char = NULL;
size = strtoull(s, &last_char, 10);
if(*last_char)
{
switch( tolower(*last_char) )
{
case 'k':
shift = 10;
break;
case 'm':
shift = 20 ;
break;
case 'g':
shift = 30;
break;
default:
fprintf(stderr, "Invalid suffix [%c] to size string [%s]. Using KB\n", *last_char, s);
shift = 10;
break;
}
}
size <<= shift;
return size;
}
int main(int argc, char **argv)
{
int count = LOG_FILES_DEFAULT;
int c;
if ( ( prog = strrchr(argv[0], '/') ) )
++prog;
else prog = argv[0];
opterr = 0;
while ( ( c = getopt(argc, argv, "vs:c:h") ) != EOF )
switch(c)
{
case 's':
filesize = get_size(optarg);
if(!filesize) filesize = LOG_FILE_SIZE_DEFAULT;
break;
case 'c':
{
char *last_char = NULL;
count = strtol(optarg, &last_char, 10);
if(*last_char)
{
fprintf(stderr, "Invalid count specified [%s]. Should be an integer\n", optarg);
exit(127);
}
if(!count) count = LOG_FILES_DEFAULT;
}
break;
case 'v':
verbose = 1;
break;
case 'h':
case '?':
default:
{
usage();
}
break;
}
if(optind != argc) usage();
signal(SIGINT, sigint_handler);
printf("Testing shared memory flushes with size [%lld], count [%d]\n", filesize, count);
printf("Press Ctrl-C to exit\n");
crash_kernel(USB_DIR, count);
return 0;
}
/*
* Local variables:
* c-file-style: "linux"
* compile-command: "gcc -Wall -g -o crash_kernel crash_kernel.c -lrt -lpthread"
* tab-width: 4
* End:
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment