/* * KTCPVS An implementation of the TCP Virtual Server daemon inside * kernel for the LINUX operating system. KTCPVS can be used * to build a moderately scalable and highly available server * based on a cluster of servers, with more flexibility. * * tcp_vs_logger.c: log requests finished by KTCPVS. * * Version: $Id$ * * Authors: Hai Long * Wensong Zhang * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #define __KERNEL_SYSCALLS__ /* for waitpid */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "tcp_vs.h" EXPORT_SYMBOL(tcp_vs_log); static spinlock_t log_lock = SPIN_LOCK_UNLOCKED; static unsigned int log_head, log_tail; static char *log_buffer = NULL; static int logger_pid = 0; static char ktcpvs_logfile[] = "/var/log/ktcpvs.log"; static int enable_log; static DECLARE_WAIT_QUEUE_HEAD(log_wait); /* * Note: many code of ktcpvs logger is taken from TUX logger. */ /* * KTCPVS logging architecture: * * All fast threads share a common log-ringbuffer. (default size 1MB) * Log entries are binary and are padded to be cacheline aligned, this * ensures that there is no cache-pingpong between fast threads. * * The logger thread writes out pending log entries within 1 second * (buffer-cache writes data out within 5 seconds). The logger thread * gets activated once we have more than 25% of the log ringbuffer * filled - or the 1 second log timeout expires. * if more than 95% of the ringbuffer is filled, the log requests * will be discarded. * * * In this architecture: * 1) logging is not reliable. 2) timely (touches disk within 5 seconds), * 3) in the log-contention case the saturation behavior is still * write-clustered. * 4) if the logger thread can keep up then the coupling is completely * asynchronous and parallel. * * The binary log format gives us about 50% saved IO/memory bandwith * and 50% less on-disk used log space than the traditional W3C ASCII * format. * * (We might switch to raw IO though to write the logfile.) */ #define LOG_LEN 1024*1024 #define SOFT_LIMIT (LOG_LEN*25/100) #define HARD_LIMIT (LOG_LEN*95/100) #define HARD_RELAX_LIMIT (LOG_LEN*90/100) #define KTCPVS_LOGENTRY_ALIGN L1_CACHE_SHIFT #define ROUND_UP(x) (((((x)-1) >> KTCPVS_LOGENTRY_ALIGN) + 1) \ << KTCPVS_LOGENTRY_ALIGN) /**************************************************************************** * stop logging */ static inline void __throttle_logging(void) { enable_log = 0; mb(); } /**************************************************************************** * allocate a buffer for logging. * */ char * log_entry_reserve(int module_id, int size) { unsigned int next_head, pad, inc; char *str; if (!enable_log) return NULL; spin_lock(&log_lock); inc = 3 * sizeof(u32) + size; next_head = ROUND_UP(log_head + inc); pad = next_head - log_head - inc; if (next_head < LOG_LEN) { str = log_buffer + log_head; if (str > log_buffer + LOG_LEN) BUG(); log_head = next_head; } else { if (log_head < LOG_LEN) memset(log_buffer + log_head, 0, LOG_LEN - log_head); str = log_buffer; log_head = ROUND_UP(size); } if (str < log_buffer || str + inc >= log_buffer + LOG_LEN) BUG(); spin_unlock(&log_lock); /* * Log record signature - this makes finding the next entry * easier (since record length is variable), and makes the * binary logfile more robust against potential data corruption * and other damage. The signature also servers as a log format * version identifier. */ *(u32 *) str = 0x8888cafe; str += sizeof(u32); /* Log the module ID */ *(u32 *) str = module_id; str += sizeof(u32); /* * Log the request timestamp, in units of 'seconds since 1970'. */ *(u32 *) str = CURRENT_TIME; str += sizeof(u32); /* * pad with space to next cacheline, with an ending newline. * (not needed for the user-space log utility, but results in * a more readable binary log file, and reduces the amount * of cache pingpong.) */ memset(str + size, ' ', pad); return str; } /**************************************************************************** * commit the log. * */ inline void log_entry_commit(void) { int pending; spin_lock(&log_lock); pending = (log_head - log_tail) % LOG_LEN; spin_unlock(&log_lock); if (pending >= SOFT_LIMIT) wake_up(&log_wait); if (pending >= HARD_LIMIT) __throttle_logging(); } /**************************************************************************** * tcp_vs_log: API provided to log message. * It first allocate a buffer (len = size), then call the callback log_func * to record the message into the buffer, finally, commit this log action. * */ int tcp_vs_log(int module_id, logger log_func, int size, void *arg1, void *arg2) { char *buf; buf = log_entry_reserve(module_id, size); if (buf == NULL) return 1; log_func(buf, size, arg1, arg2); log_entry_commit(); return 0; } void flush_all_signals(void) { spin_lock_irq(¤t->sigmask_lock); flush_signals(current); recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); } /**************************************************************************** * write log message from buffer to disk */ static unsigned int writeout_log(void) { unsigned int len, pending, next_log_tail; mm_segment_t oldmm = get_fs(); struct file *log_filp; char *str; unsigned int ret; TCP_VS_DBG(5, "KTCPVS logger: opening log file {%s}.\n", ktcpvs_logfile); log_filp = filp_open(ktcpvs_logfile, O_CREAT | O_APPEND | O_WRONLY | O_LARGEFILE, 0600); if (!log_filp) { TCP_VS_ERR("KTCPVS: could not open log file {%s}!\n", ktcpvs_logfile); __set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); return 0; } spin_lock(&log_lock); str = log_buffer + log_tail; if (log_head < log_tail) { len = LOG_LEN - log_tail; next_log_tail = 0; } else { len = log_head - log_tail; next_log_tail = log_head; } if (!len) goto out; spin_unlock(&log_lock); set_fs(KERNEL_DS); ret = log_filp->f_op->write(log_filp, str, len, &log_filp->f_pos); set_fs(oldmm); if (len != ret) { if (ret == -ENOSPC) { TCP_VS_ERR("trying to write logfile %s, but " "filesystem is full! Lost %d bytes of " "log data.\n", ktcpvs_logfile, len); } else { TCP_VS_ERR("log write %d != %d.\n", ret, len); TCP_VS_ERR("log_filp: %p, str: %p, len: %d " "str[len-1]: %d.\n", log_filp, str, len, str[len - 1]); } goto out_lock; } /* * Sync log data to disk: */ if (log_filp->f_op && log_filp->f_op->fsync) { down(&log_filp->f_dentry->d_inode->i_sem); log_filp->f_op->fsync(log_filp, log_filp->f_dentry, 1); up(&log_filp->f_dentry->d_inode->i_sem); } out_lock: spin_lock(&log_lock); out: log_tail = next_log_tail; pending = (log_head - log_tail) % LOG_LEN; spin_unlock(&log_lock); if (pending < HARD_RELAX_LIMIT) { enable_log = 1; mb(); } fput(log_filp); return pending; } static DECLARE_WAIT_QUEUE_HEAD(stop_logger_wait); static int stop_logger = 0; /**************************************************************************** * logger thread */ static int logger_thread(void *data) { DECLARE_WAITQUEUE(wait, current); mm_segment_t oldmm; daemonize(); oldmm = get_fs(); set_fs(KERNEL_DS); TCP_VS_INFO("logger thread started.\n"); sprintf(current->comm, "ktcpvs logger"); spin_lock_irq(¤t->sigmask_lock); siginitsetinv(¤t->blocked, 0); recalc_sigpending(current); spin_unlock_irq(¤t->sigmask_lock); if (log_buffer) BUG(); log_buffer = vmalloc(LOG_LEN); memset(log_buffer, 0, LOG_LEN); log_head = log_tail = 0; current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; add_wait_queue(&log_wait, &wait); for (;;) { TCP_VS_DBG(5, "logger does writeout - stop:%d.\n", stop_logger); while (writeout_log() >= SOFT_LIMIT) { if (stop_logger) break; } if (stop_logger) break; /* nothing */ ; TCP_VS_DBG(5, "logger does sleep - stop:%d.\n", stop_logger); __set_current_state(TASK_INTERRUPTIBLE); if (log_head != log_tail) { __set_current_state(TASK_RUNNING); continue; } schedule_timeout(HZ); TCP_VS_DBG(5, "logger back from sleep - stop:%d.\n", stop_logger); if (signal_pending(current)) { flush_all_signals(); while (waitpid(-1, NULL, WNOHANG) > 0) /* nothing */ ; } } remove_wait_queue(&log_wait, &wait); vfree(log_buffer); log_buffer = NULL; stop_logger = 0; wake_up(&stop_logger_wait); set_fs(oldmm); return 0; } /**************************************************************************** * start ktcpvs log thread */ void tcp_vs_log_thread_start(void) { enable_log = 1; logger_pid = kernel_thread(logger_thread, NULL, 0); if (logger_pid < 0) BUG(); } /**************************************************************************** * stop ktcpvs log thread */ void tcp_vs_log_thread_stop(void) { DECLARE_WAITQUEUE(wait, current); int ret; TCP_VS_INFO("stopping logger thread %d ...\n", logger_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_logger_wait, &wait); stop_logger = 1; wake_up(&log_wait); schedule(); __set_current_state(TASK_RUNNING); remove_wait_queue(&stop_logger_wait, &wait); ret = waitpid(logger_pid, NULL, __WCLONE); if (stop_logger) BUG(); TCP_VS_INFO("logger thread stopped!\n"); }