/* * KTCPVS An implementation of the TCP Virtual Server daemon inside * kernel for the LINUX operating system. KTCPVS can be used * to build a moderately scalable and highly available server * based on a cluster of servers, with more flexibility. * * Version: $Id$ * * Authors: Wensong Zhang * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include #include #include #include #include #include #include "tcp_vs.h" /* * FIXME: Locking needs reconsideration in the near future!!!!!!!!!!! * current proc read/write to configure ktcpvs is ugly, * we will probably replace it with system call or setsockopt finally. */ /* sysctl variables */ int sysctl_ktcpvs_unload = 0; #ifdef CONFIG_TCP_VS_DEBUG static int sysctl_ktcpvs_debug_level = 0; int tcp_vs_get_debug_level(void) { return sysctl_ktcpvs_debug_level; } #endif static struct tcp_vs dummy_vs = { start: 0, stop: 0, name: "dummy_vs", serverport: 8080, maxSpareServers: 10, minSpareServers: 2, startservers: 6, maxClients: 1000, }; int tcp_vs_sysctl_register(struct tcp_vs *vs); int tcp_vs_sysctl_unregister(struct tcp_vs *vs); struct proc_dir_entry *proc_net_ktcpvs_vs_create(struct tcp_vs *vs); void proc_net_ktcpvs_vs_release(struct tcp_vs *vs); /* * Lookup destination by {addr,port} in the given service */ struct tcp_vs_dest * tcp_vs_lookup_dest(struct tcp_vs *vs, __u32 daddr, __u16 dport) { struct tcp_vs_dest *dest; struct list_head *l, *e; read_lock_bh(&__tcp_vs_lock); /* * Find the destination for the given virtual server */ l = &vs->destinations; for (e=l->next; e!=l; e=e->next) { dest = list_entry(e, struct tcp_vs_dest, n_list); if ((dest->addr == daddr) && (dest->port == dport)) { /* HIT */ read_unlock_bh(&__tcp_vs_lock); return dest; } } read_unlock_bh(&__tcp_vs_lock); return NULL; } /* * Add a destination into an TCP virtual server */ int tcp_vs_add_dest(struct tcp_vs *vs, __u32 daddr, __u16 dport, int weight) { struct tcp_vs_dest *dest; EnterFunction(2); if (weight < 0) { TCP_VS_ERR("server weight less than zero\n"); return -ERANGE; } /* * Check if the dest already exists in the list */ dest = tcp_vs_lookup_dest(vs, daddr, dport); if (dest != NULL) { TCP_VS_DBG(0, "tcp_vs_add_dest(): dest already exists\n"); return -EEXIST; } /* * Allocate and initialize the dest structure */ dest = kmalloc(sizeof(struct tcp_vs_dest), GFP_ATOMIC); if (dest == NULL) { TCP_VS_ERR("kmalloc failed.\n"); return -EFAULT; } memset(dest, 0, sizeof(struct tcp_vs_dest)); dest->addr = daddr; dest->port = dport; dest->weight = weight; atomic_set(&dest->conns, 0); atomic_set(&dest->refcnt, 0); INIT_LIST_HEAD(&dest->r_list); write_lock_bh(&__tcp_vs_lock); /* * Add the dest entry into the list */ list_add(&dest->n_list, &vs->destinations); atomic_inc(&dest->refcnt); write_unlock_bh(&__tcp_vs_lock); LeaveFunction(2); return 0; } /* * Edit a destination in the given virtual server */ int tcp_vs_edit_dest(struct tcp_vs *vs, __u32 daddr, __u16 dport, int weight) { struct tcp_vs_dest *dest; EnterFunction(2); if (weight < 0) { TCP_VS_ERR("server weight less than zero\n"); return -ERANGE; } /* * Lookup the destination list */ dest = tcp_vs_lookup_dest(vs, daddr, dport); if (dest == NULL) { TCP_VS_DBG(0, "destination not exist\n"); return -ENOENT; } write_lock_bh(&__tcp_vs_lock); dest->weight = weight; write_unlock_bh(&__tcp_vs_lock); LeaveFunction(2); return 0; } /* * Delete a destination from the given virtual server */ void __tcp_vs_del_dest(struct tcp_vs_dest *dest) { /* dest->flags &= ~TCP_VS_DEST_F_AVAILABLE; */ /* * Remove it from the d-linked destination list. */ list_del(&dest->n_list); list_del(&dest->r_list); /* * Decrease the refcnt of the dest, and free the dest * if nobody refers to it (refcnt=0). Otherwise, throw * the destination into the trash. */ if (atomic_dec_and_test(&dest->refcnt)) kfree(dest); } int tcp_vs_del_dest(struct tcp_vs *vs, __u32 daddr, __u16 dport) { struct tcp_vs_dest *dest; EnterFunction(2); /* * Lookup the destination list */ dest = tcp_vs_lookup_dest(vs, daddr, dport); if (dest == NULL) { TCP_VS_DBG(0, "tcp_vs_del_dest(): destination not found!\n"); return -ENOENT; } write_lock_bh(&__tcp_vs_lock); /* * Remove dest from the destination list */ __tcp_vs_del_dest(dest); /* * Called the update_service function of its scheduler */ vs->scheduler->update_vs(vs); write_unlock_bh(&__tcp_vs_lock); LeaveFunction(2); return 0; } static int tcp_vs_add_rule(struct tcp_vs *vs, char *pattern, __u32 addr, __u16 port) { struct tcp_vs_dest *dest; struct tcp_vs_rule *r; struct list_head *l; EnterFunction(2); /* * Lookup the destination list */ dest = tcp_vs_lookup_dest(vs, addr, port); if (dest == NULL) { TCP_VS_DBG(0, "destination not exist\n"); return -ENOENT; } if (!list_empty(&dest->r_list)) { TCP_VS_DBG(2, "sorry that a server can be " "added to rule twice\n"); return -EEXIST; } list_for_each(l, &vs->rule_list) { r = list_entry(l, struct tcp_vs_rule, list); if (!strncmp(pattern, r->pattern, r->len)) { TCP_VS_DBG(2, "add server into an existing rule\n"); list_add(&dest->r_list, &r->destinations); goto out; } } r = kmalloc(sizeof(struct tcp_vs_rule), GFP_ATOMIC); if (r == NULL) { TCP_VS_ERR("kmalloc failed.\n"); return -EFAULT; } memset(r, 0, sizeof(struct tcp_vs_rule)); INIT_LIST_HEAD(&r->destinations); if (tcp_vs_exp_comp(&r->exp, pattern)) { TCP_VS_ERR("pattern compiling failed\n"); kfree(r); goto out; } r->pattern = strdup(pattern); r->len = strlen(pattern); list_add(&dest->r_list, &r->destinations); /* add this new rule to rule_list finally */ list_add(&r->list, &vs->rule_list); out: LeaveFunction(2); return 0; } static int tcp_vs_del_rule(struct tcp_vs *vs, char *pattern, __u32 addr, __u16 port) { struct tcp_vs_dest *dest; struct tcp_vs_rule *r; struct list_head *l, *d; EnterFunction(2); list_for_each(l, &vs->rule_list) { r = list_entry(l, struct tcp_vs_rule, list); if (!strncmp(pattern, r->pattern, r->len)) { TCP_VS_DBG(2, "found the rule\n"); goto hit; } } return -EEXIST; hit: list_for_each(d, &r->destinations) { dest = list_entry(d, struct tcp_vs_dest, r_list); if (dest->addr == addr && dest->port == port) { TCP_VS_DBG(2, "found the dest\n"); list_del_init(&dest->r_list); if (list_empty(&r->destinations)) { TCP_VS_DBG(2, "release the rule\n"); list_del(&r->list); tcp_vs_exp_free(&r->exp); kfree(r->pattern); kfree(r); } break; } } LeaveFunction(2); return 0; } static void tcp_vs_flush_rule(struct tcp_vs *vs) { struct list_head *l, *d; struct tcp_vs_rule *r; struct tcp_vs_dest *dest; EnterFunction(2); for (l=&vs->rule_list; l->next!=l; ) { r = list_entry(l->next, struct tcp_vs_rule, list); for (d=&r->destinations; d->next!=d; ) { dest = list_entry(d->next, struct tcp_vs_dest, r_list); list_del_init(&dest->r_list); } list_del(&r->list); tcp_vs_exp_free(&r->exp); kfree(r->pattern); kfree(r); } LeaveFunction(2); } struct tcp_vs *tcp_vs_lookup_byname(const char *name) { struct list_head *e; struct tcp_vs *vs; list_for_each (e, &tcp_vs_list) { vs = list_entry(e, struct tcp_vs, list); if (!strcmp(name, vs->name)) /* HIT */ return vs; } return NULL; } int tcp_vs_add_virtualserver(struct tcp_vs_ctl *ctl) { struct tcp_vs *vs; struct tcp_vs_scheduler *sched; EnterFunction(2); /* lookup scheduler here */ sched = tcp_vs_get_scheduler(ctl->sched_name); if (sched == NULL) { TCP_VS_INFO("Scheduler module tcp_vs_%s.o not found\n", ctl->sched_name); return -EFAULT; } vs = kmalloc(sizeof(*vs), GFP_ATOMIC); if (!vs) { TCP_VS_ERR("no available memory\n"); return -EFAULT; } memcpy(vs, &dummy_vs, sizeof(*vs)); vs->name = strdup(ctl->name); vs->serverport = ctl->serverport; vs->timeout = ctl->timeout; INIT_LIST_HEAD(&vs->destinations); INIT_LIST_HEAD(&vs->rule_list); atomic_inc(&tcp_vs_index); vs->index = atomic_read(&tcp_vs_index); if (tcp_vs_sysctl_register(vs)) { atomic_dec(&tcp_vs_index); kfree(vs); return -EFAULT; } proc_net_ktcpvs_vs_create(vs); list_add(&vs->list, &tcp_vs_list); tcp_vs_bind_scheduler(vs, sched); tcp_vs_put_scheduler(sched); LeaveFunction(2); return 0; } int tcp_vs_edit_virtualserver(struct tcp_vs *vs, struct tcp_vs_ctl *ctl) { struct tcp_vs_scheduler *sched; EnterFunction(2); /* lookup scheduler here */ if (strcmp(vs->scheduler->name, ctl->sched_name)) { sched = tcp_vs_get_scheduler(ctl->sched_name); if (sched == NULL) { TCP_VS_INFO("Scheduler module tcp_vs_%s.o not found\n", ctl->sched_name); return -EFAULT; } tcp_vs_unbind_scheduler(vs); tcp_vs_bind_scheduler(vs, sched); tcp_vs_put_scheduler(sched); } /* more attribute should be assigned here!!!!!! */ vs->serverport = ctl->serverport; vs->timeout = ctl->timeout; LeaveFunction(2); return 0; } int tcp_vs_del_virtualserver(struct tcp_vs *vs) { EnterFunction(2); if (atomic_read(&vs->running)) { TCP_VS_ERR("The VS is running, you'd better stop it first" "before deleting it.\n"); return -1; } list_del(&vs->list); tcp_vs_flush_rule(vs); tcp_vs_unbind_scheduler(vs); tcp_vs_sysctl_unregister(vs); proc_net_ktcpvs_vs_release(vs); kfree(vs); LeaveFunction(2); return 0; } int tcp_vs_flush(void) { struct list_head *l; struct tcp_vs *vs; EnterFunction(2); for (l=&tcp_vs_list; l->next!=l; ) { vs = list_entry(l->next, struct tcp_vs, list); if (tcp_vs_del_virtualserver(vs) == -1) return -1; } LeaveFunction(2); return 0; } int tcp_vs_control(struct tcp_vs_ctl *ctl) { int ret = -EINVAL; struct tcp_vs *vs; EnterFunction(2); /* * Avoid the non-terminated string here */ ctl->name[KTCPVS_VSNAME_MAXLEN-1] = 0; ctl->sched_name[KTCPVS_SCHEDNAME_MAXLEN-1] = 0; TCP_VS_DBG(2, "name=%s, cmd=%d\n", ctl->name, ctl->cmd); /* * Flush all the TCP virtual servers... */ if (ctl->cmd == TCP_VS_CMD_FLUSH) return tcp_vs_flush(); vs = tcp_vs_lookup_byname(ctl->name); if (!vs) { if (ctl->cmd==TCP_VS_CMD_ADD) return tcp_vs_add_virtualserver(ctl); else return -ESRCH; } switch (ctl->cmd) { case TCP_VS_CMD_START: vs->start = 1; ret = 0; break; case TCP_VS_CMD_STOP: vs->stop = 0; ret = 0; break; case TCP_VS_CMD_SET: ret = tcp_vs_edit_virtualserver(vs, ctl); break; case TCP_VS_CMD_DEL: ret = tcp_vs_del_virtualserver(vs); break; case TCP_VS_CMD_ADD_DEST: ret = tcp_vs_add_dest(vs, ctl->daddr, ctl->dport, ctl->weight); break; case TCP_VS_CMD_SET_DEST: ret =tcp_vs_edit_dest(vs, ctl->daddr, ctl->dport, ctl->weight); break; case TCP_VS_CMD_DEL_DEST: ret = tcp_vs_del_dest(vs, ctl->daddr, ctl->dport); break; case TCP_VS_CMD_ADD_RULE: ret = tcp_vs_add_rule(vs, ctl->pattern, ctl->daddr, ctl->dport); break; case TCP_VS_CMD_DEL_RULE: ret = tcp_vs_del_rule(vs, ctl->pattern, ctl->daddr, ctl->dport); break; } LeaveFunction(2); return ret; } static struct ctl_table_header *ktcpvs_table_header; static ctl_table ktcpvs_table[] = { #ifdef CONFIG_TCP_VS_DEBUG {NET_KTCPVS_DEBUGLEVEL, "debug_level", &sysctl_ktcpvs_debug_level, sizeof(int), 0644, NULL, &proc_dointvec}, #endif {NET_KTCPVS_UNLOAD, "unload", &sysctl_ktcpvs_unload, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; static ctl_table ktcpvs_dir_table[] = { {NET_KTCPVS, "ktcpvs", NULL, 0, 0555, ktcpvs_table}, {0} }; static ctl_table ktcpvs_root_table[] = { {CTL_NET, "net", NULL, 0, 0555, ktcpvs_dir_table}, {0} }; static struct tcp_vs_sysctl_table dummy_vs_sysctl = { NULL, {{NET_KTCPVS_VS_START, "start", &dummy_vs.start, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_KTCPVS_VS_STOP, "stop", &dummy_vs.stop, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_KTCPVS_VS_SERVERPORT, "serverport", &dummy_vs.serverport, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_KTCPVS_VS_STARTSERVERS, "startservers", &dummy_vs.startservers, sizeof(int), 0644, NULL, &proc_dointvec}, {0}}, {{NET_KTCPVS_DUMMYVS, "dummy_vs", NULL, 0, 0555, dummy_vs_sysctl.vs_vars}, {0}}, {{NET_KTCPVS, "ktcpvs", NULL, 0, 0555, dummy_vs_sysctl.vs_dir}, {0}}, {{CTL_NET, "net", NULL, 0, 0555, dummy_vs_sysctl.ktcpvs_dir}, {0}} }; int tcp_vs_sysctl_register(struct tcp_vs *vs) { int i; struct tcp_vs_sysctl_table *t; if (vs == NULL) return -1; t = kmalloc(sizeof(*t), GFP_ATOMIC); if (t == NULL) return -1; memcpy(t, &dummy_vs_sysctl, sizeof(*t)); for (i=0; ivs_vars)/sizeof(t->vs_vars[0])-1; i++) { t->vs_vars[i].data += (char*)vs - (char*)&dummy_vs; t->vs_vars[i].de = NULL; } t->vs_dir[0].procname = vs->name; t->vs_dir[0].ctl_name = vs->index; t->vs_dir[0].child = t->vs_vars; t->vs_dir[0].de = NULL; t->ktcpvs_dir[0].child = t->vs_dir; t->ktcpvs_dir[0].de = NULL; t->root_dir[0].child = t->ktcpvs_dir; t->root_dir[0].de = NULL; t->sysctl_header = register_sysctl_table(t->root_dir, 0); if (t->sysctl_header == NULL) { kfree(t); return -1; } else { vs->sysctl = t; return 0; } } int tcp_vs_sysctl_unregister(struct tcp_vs *vs) { if (vs->sysctl) { struct tcp_vs_sysctl_table *t = vs->sysctl; vs->sysctl = NULL; unregister_sysctl_table(t->sysctl_header); kfree(t); } return 0; } /* /proc/net/ktcpvs/config */ static int tcp_vs_get_info(char *buf, char **start, off_t offset, int length) { struct list_head *l, *e, *r, *d; struct tcp_vs *vs; struct tcp_vs_dest *dest; struct tcp_vs_rule *rule; off_t pos=0; int len=0; int size; EnterFunction(2); if (length < 0) return -EINVAL; list_for_each(l, &tcp_vs_list) { vs = list_entry(l, struct tcp_vs, list); size = sprintf(buf+len, "TCPVS %s %s\n" " scheduler = %s\n" " serverport = %d\n" " startservers = %d\n" " maxClients = %d\n", vs->name, atomic_read(&vs->running)?"RUNNING":"", vs->scheduler->name, vs->serverport, vs->startservers, vs->maxClients); len += size; pos += size; if (pos <= offset) len=0; if (pos >= offset+length) break; list_for_each(e, &vs->destinations) { dest = list_entry(e, struct tcp_vs_dest, n_list); size = sprintf(buf+len, " server = %08X:%X %d\n", ntohl(dest->addr), ntohs(dest->port), dest->weight); len += size; pos += size; if (pos <= offset) len=0; if (pos >= offset+length) goto done; } list_for_each(r, &vs->rule_list) { rule = list_entry(r, struct tcp_vs_rule, list); list_for_each(d, &rule->destinations) { dest = list_entry(d, struct tcp_vs_dest, r_list); size = sprintf(buf+len, " rule = pattern %s use server %08X:%X\n", rule->pattern, ntohl(dest->addr), ntohs(dest->port)); len += size; pos += size; if (pos <= offset) len=0; if (pos >= offset+length) goto done; } } } done: *start = buf+len-(pos-offset); /* Start of wanted data */ len = pos-offset; if (len > length) len = length; if (len < 0) len = 0; LeaveFunction(2); return len; } static int tcp_vs_write_proc(struct file *file, const char *buffer, unsigned long count, void *data) { struct tcp_vs_ctl vs_ctl; EnterFunction(2); if (count < sizeof(vs_ctl)) return -EINVAL; if (copy_from_user(&vs_ctl, buffer, sizeof(vs_ctl))) return -EFAULT; if (count > sizeof(vs_ctl)) { TCP_VS_DBG(2, "count is larger than sizeof(ctl)\n"); return -EFAULT; } file->f_pos += count; tcp_vs_control(&vs_ctl); LeaveFunction(2); return count; } /* per tcpvs info output (/proc/net/ktcpvs/) */ static ssize_t tcp_vs_pervs_read_proc(struct file * file, char * buf, size_t count, loff_t *ppos) { struct proc_dir_entry *de; struct list_head *l; struct tcp_vs *vs; struct tcp_vs_dest *dest; char *page; int len; EnterFunction(2); if (count == 0) return 0; if (file->f_pos) return 0; de = (struct proc_dir_entry*) file->f_dentry->d_inode->u.generic_ip; if (!de || !de->data) return -ENOTDIR; vs = (struct tcp_vs *) de->data; if (!(page = (char*) __get_free_page(GFP_ATOMIC))) return -ENOMEM; len = sprintf(page, "TCPVS %s %s\n" " scheduler = %s\n" " serverport = %d\n" " startservers = %d\n" " maxClients = %d\n", vs->name, atomic_read(&vs->running)?"RUNNING":"", vs->scheduler->name, vs->serverport, vs->startservers, vs->maxClients); list_for_each(l, &vs->destinations) { dest = list_entry(l, struct tcp_vs_dest, n_list); len += sprintf(buf+len, " server = %08X:%X %d\n", ntohl(dest->addr), ntohs(dest->port), dest->weight); if (len > PAGE_SIZE-80) break; } if (len <= count) { if (copy_to_user(buf, page, len)) return -EFAULT; file->f_pos += len; } else { len = 0; } TCP_VS_DBG(2, "count:%d ppos:%d len:%d\n", count, (int)*ppos, len); free_page((unsigned long) page); LeaveFunction(2); return len; } struct file_operations proc_net_ktcpvs_file_operations = { read: tcp_vs_pervs_read_proc, }; static struct proc_dir_entry *proc_net_ktcpvs; struct proc_dir_entry *proc_net_ktcpvs_vs_create(struct tcp_vs *vs) { struct proc_dir_entry *de; de = create_proc_entry(vs->name, S_IFREG|S_IRUGO, proc_net_ktcpvs); if (!de) return NULL; de->data = (void *) vs; de->proc_fops = &proc_net_ktcpvs_file_operations; return de; } void proc_net_ktcpvs_vs_release(struct tcp_vs *vs) { remove_proc_entry(vs->name, proc_net_ktcpvs); } void tcp_vs_control_start(void) { struct proc_dir_entry *ent; INIT_LIST_HEAD(&dummy_vs.destinations); INIT_LIST_HEAD(&dummy_vs.rule_list); proc_net_ktcpvs = proc_mkdir("ktcpvs", proc_net); ent = create_proc_entry("config", S_IFREG|S_IRUGO|S_IWUSR, proc_net_ktcpvs); if (ent) { ent->get_info = tcp_vs_get_info; ent->write_proc = tcp_vs_write_proc; } ktcpvs_table_header = register_sysctl_table(ktcpvs_root_table, 0); } void tcp_vs_control_stop(void) { unregister_sysctl_table(ktcpvs_table_header); remove_proc_entry("config", proc_net_ktcpvs); remove_proc_entry("ktcpvs", proc_net); }