/* * KTCPVS An implementation of the TCP Virtual Server daemon inside * kernel for the LINUX operating system. KTCPVS can be used * to build a moderately scalable and highly available server * based on a cluster of servers, with more flexibility. * * tcp_vs_phttp.c: KTCPVS content-based scheduling module for HTTP service * with persistent connection support * * Version: $Id$ * * Authors: Wensong Zhang, * Hai Long, * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "tcp_vs.h" #define CR 13 #define LF 10 #define SP ' ' #define KEEP_ALIVE_TIMES 60 #define PARSE_OK 0 #define PARSE_ERROR 1 #ifndef MIN #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #endif typedef enum http_method_s { HTTP_M_UNKNOWN, HTTP_M_OPTIONS, HTTP_M_GET, HTTP_M_HEAD, HTTP_M_POST, HTTP_M_PUT, HTTP_M_DELETE, HTTP_M_TRACE, HTTP_M_MAX } http_method_t; typedef struct methods { const short number; const char * const name; int len; } methods_t; const char * http_protocol_strings[] = { "HTTP/0.9", /* The position is crucial & magic! do not insert, only add */ "HTTP/1.0", "HTTP/1.1", NULL }; const methods_t http_methods [HTTP_M_MAX] = { {HTTP_M_UNKNOWN, "UNKNOWN", 7}, {HTTP_M_OPTIONS, "OPTIONS", 7}, {HTTP_M_GET, "GET", 3}, {HTTP_M_HEAD, "HEAD", 4}, {HTTP_M_POST, "POST", 4}, {HTTP_M_PUT, "PUT", 3}, {HTTP_M_DELETE, "DELETE", 6}, {HTTP_M_TRACE, "TRACE", 5} }; typedef enum http_versions { HTTP_V_UNKNOWN, HTTP_V_0_9, HTTP_V_1_0, HTTP_V_1_1 } http_version_t; typedef struct http_mime_headers { int content_length; int transfer_encoding; int connection_close; char *sep; /* THIS_STRING_SEPARATES */ } http_mime_header_t; typedef struct http_request_s { const char *message; unsigned int message_len; unsigned int parsed_len; /* request method */ http_method_t method; const char *method_str; unsigned int method_len; /* request URI */ const char *uri_str; unsigned int uri_len; /* http version */ http_version_t version; const char *version_str; unsigned int version_len; /* cookies */ unsigned int cookies; const char *cookies_str; unsigned int cookies_len; /* MIME header */ http_mime_header_t mime; } http_request_t; typedef struct http_response_s { /* http verison */ int version; /* response status code */ int status_code; /* MIME header */ http_mime_header_t mime; } http_response_t; typedef struct http_read_ctl_block_s { struct socket *sock; /* socket that message read from */ char* buffer; /* buffer to store read message */ int len; /* size of buffer */ char* info; /* point to the current information */ int offset; /* offset of remaining bytes */ int remaining; /* remaining bytes not return */ int flag; /* read flag */ } http_read_ctl_block_t; typedef struct http_dest_server_s { struct list_head d_list; /* server list for a client connection */ struct tcp_vs_dest *dest; /* dest server entry */ struct socket *sock; /* destination socket */ } http_dest_server_t; static int tcp_vs_phttp_init_svc(struct tcp_vs_service *svc) { return 0; } static int tcp_vs_phttp_done_svc(struct tcp_vs_service *svc) { return 0; } static int tcp_vs_phttp_update_svc(struct tcp_vs_service *svc) { return 0; } static inline struct tcp_vs_dest * __tcp_vs_phttp_wlc_schedule(struct list_head *destinations) { register struct list_head *e; struct tcp_vs_dest *dest, *least; list_for_each (e, destinations) { least = list_entry(e, struct tcp_vs_dest, r_list); if (least->weight > 0) { goto nextstage; } } return NULL; /* * Find the destination with the least load. */ nextstage: for (e=e->next; e!=destinations; e=e->next) { dest = list_entry(e, struct tcp_vs_dest, r_list); if (atomic_read(&least->conns) * dest->weight > atomic_read(&dest->conns) * least->weight) { least = dest; } } return least; } /**************************************************************************** * skip whitespace */ static inline char * skip_lws (const char* buffer) { char* s = (char*)buffer; while ((*s == ' ') || (*s == '\t') || (*s == '\n') || (*s == '\r')) { s++; } return s; } /**************************************************************************** * This doesn't accept 0x if the radix is 16. The overflow code assumes * a 2's complement architecture */ #ifndef strtol static long strtol( char *string, char **endptr, int radix) { char *s; long value; long new_value; int sign; int increment; value = 0; sign = 1; s = string; if ((radix == 1) || (radix > 36) || (radix < 0)) { goto done; } /* skip whitespace */ while ((*s == ' ') || (*s == '\t') || (*s == '\n') || (*s == '\r')) { s++; } if (*s == '-') { sign = -1; s++; } else if (*s == '+') { s++; } if (radix == 0) { if (*s == '0') { s++; if ((*s == 'x') || (*s == 'X')) { s++; radix = 16; } else radix = 8; } else radix = 10; } /* read number */ while (1) { if ((*s >= '0') && (*s <= '9')) increment = *s - '0'; else if ((*s >= 'a') && (*s <= 'z')) increment = *s - 'a' + 10; else if ((*s >= 'A') && (*s <= 'Z')) increment = *s - 'A' + 10; else break; if (increment >= radix) break; new_value = value * radix + increment; /* detect overflow */ if ((new_value - increment)/radix != value) { s = string; value = -1 >> 1; if (sign < 0) value += 1; goto done; } value = new_value; s++; } done: if (endptr) *endptr = s; return value*sign; } #endif /**************************************************************************** * Parse a chunk extension, detect overflow. * There are two error cases: * 1) If the conversion would require too many bits, a -1 is returned. * 2) If the conversion used the correct number of bits, but an overflow * caused only the sign bit to flip, then that negative number is * returned. * In general, any negative number can be considered an overflow error. */ static long get_chunk_size(char *b) { long chunksize = 0; size_t chunkbits = sizeof(long) * 8; /* skip whitespace */ while ((*b == ' ') || (*b == '\t') || (*b == '\n') || (*b == '\r')) { b++; } /* Skip leading zeros */ while (*b == '0') { ++b; } while (isxdigit(*b) && (chunkbits > 0)) { int xvalue = 0; if (*b >= '0' && *b <= '9') { xvalue = *b - '0'; } else if (*b >= 'A' && *b <= 'F') { xvalue = *b - 'A' + 0xa; } else if (*b >= 'a' && *b <= 'f') { xvalue = *b - 'a' + 0xa; } chunksize = (chunksize << 4) | xvalue; chunkbits -= 4; ++b; } if (isxdigit(*b) && (chunkbits <= 0)) { /* overflow */ return -1; } return chunksize; } /**************************************************************************** * Parse http request line. (request line is terminated by CRLF) * * RFC 2616, 19.3 * Clients SHOULD be tolerant in parsing the Status-Line and servers * tolerant when parsing the Request-Line. In particular, they SHOULD * accept any amount of SP or HT characters between fields, even though * only a single SP is required. * */ static int parse_http_request_line (char *buffer, size_t len, http_request_t *req) { char *pos, c; int ret = PARSE_ERROR; int i; EnterFunction(5); /* terminate string */ c = buffer[len]; buffer[len] = 0; TCP_VS_DBG(5, "parsing request:\n"); TCP_VS_DBG(5, "--------------------\n"); TCP_VS_DBG(5, "%s\n", buffer); TCP_VS_DBG(5, "--------------------\n"); req->message = buffer; req->message_len = len; /* * RFC 2616, 5.1: * Request-Line = Method SP Request-URI SP HTTP-Version CRLF */ /* try to get method */ pos = skip_lws (buffer); req->method = HTTP_M_UNKNOWN; /* Default :) */ for (i = 1; i < HTTP_M_MAX; i++) { if (strnicmp (pos, http_methods[i].name, http_methods[i].len) == 0) { req->method = i; break; } } if (req->method == HTTP_M_UNKNOWN) { goto exit; } TCP_VS_DBG(6, "HTTP METHOD: %s\n", http_methods[i].name); pos += http_methods[i].len; /* get URI string */ req->uri_str = skip_lws (pos + 1); TCP_VS_DBG(6, "URI: %s\n", req->uri_str); if ((pos = strchr ((char*)req->uri_str, SP)) == NULL) { goto exit; } req->uri_len = pos - req->uri_str; /* get http version */ req->version_str = skip_lws (pos + 1); req->version = HTTP_V_UNKNOWN; for (i = 0; http_protocol_strings[i] != NULL; i++) { if (strnicmp (req->version_str, http_protocol_strings[i], strlen (http_protocol_strings[i])) == 0) { req->version = i + HTTP_V_0_9; break; } } if (req->version == HTTP_V_UNKNOWN) { goto exit; } TCP_VS_DBG(6, "HTTP VERSION: %s\n", http_protocol_strings[i]); ret = PARSE_OK; exit: buffer[len] = c; /* restore string */ LeaveFunction(5); return ret; } /**************************************************************************** * parse_http_status_line - parse the http status line. * * RFC 2616, 19.3 * Clients SHOULD be tolerant in parsing the Status-Line and servers * tolerant when parsing the Request-Line. In particular, they SHOULD * accept any amount of SP or HT characters between fields, even though * only a single SP is required. * */ static int parse_http_status_line (char *buffer, size_t len, http_response_t *resp) { char *pos, c; int i, ret = PARSE_ERROR; EnterFunction(5); assert(buffer != NULL) /* terminate string */ c = buffer[len]; buffer[len] = '\0'; TCP_VS_DBG(5, "parsing response:\n"); TCP_VS_DBG(5, "--------------------\n"); TCP_VS_DBG(5, "%s\n", buffer); TCP_VS_DBG(5, "--------------------\n"); /* * RFC 2616, 6.1: * Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF */ pos = skip_lws (buffer); resp->version = HTTP_V_UNKNOWN; for (i = 0; http_protocol_strings[i] != NULL; i++) { if (strnicmp (pos, http_protocol_strings[i], strlen (http_protocol_strings[i])) == 0) { resp->version = i + HTTP_V_0_9; break; } } if (resp->version != HTTP_V_UNKNOWN) { /* get the status code */ pos += strlen (http_protocol_strings[i]) + 1; resp->status_code = strtol (pos, NULL, 10); assert(resp->status_code >= 100) TCP_VS_DBG(6, "Status Code: %d\n", resp->status_code); ret = PARSE_OK; } buffer[len] = c; /* restore string */ LeaveFunction(5); return ret; } static struct tcp_vs_dest * tcp_vs_phttp_matchrule(struct tcp_vs_service *svc, http_request_t *req) { struct list_head *l; struct tcp_vs_rule *r; struct tcp_vs_dest *dest = NULL; char *uri; if (!(uri = kmalloc(req->uri_len+1, GFP_KERNEL))) { TCP_VS_ERR("No memory!\n"); return NULL; } memcpy(uri, req->uri_str, req->uri_len); uri[req->uri_len] = '\0'; TCP_VS_DBG(5, "matching request URI: %s\n", uri); read_lock(&svc->lock); list_for_each(l, &svc->rule_list) { r = list_entry(l, struct tcp_vs_rule, list); if (!regexec(&r->rx, uri, 0, NULL, 0)) { /* HIT */ dest = __tcp_vs_phttp_wlc_schedule(&r->destinations); break; } } read_unlock(&svc->lock); kfree(uri); return dest; } /**************************************************************************** * * http_read_line - read a line from socket. * * Try to get the line from the remaining bytes. then read max_line_size bytes * from socket and get a line. (the line delimeter is "CRLF") * Return the len of the line (not including CRLF), or -1 if failed. * * Note: 1, http_read_line does not terminate the line with '\0', it still end * with CRLF. * 2, the acutul search length may bigger than max_line_size. */ static int http_read_line (http_read_ctl_block_t *ctl_blk, int max_line_size) { char* buf; int nbytes, i, offset, reads; int len = -1; DECLARE_WAIT_QUEUE_HEAD(WQ); EnterFunction(5); assert(max_line_size < ctl_blk->len); assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset)); ctl_blk->info = NULL; if (ctl_blk->remaining == 0) { ctl_blk->offset = 0; } offset = ctl_blk->offset; buf = ctl_blk->buffer + offset; /* try to get a line from the remaining bytes */ for (i = 0; i < ctl_blk->remaining -1; i++) { if ((buf[i] == CR) && (buf[i + 1] == LF)) { len = i; goto done; } } /* overflow? */ if ((offset + max_line_size) > ctl_blk->len) { memmove (ctl_blk->buffer, buf, ctl_blk->remaining); ctl_blk->offset = 0; buf = ctl_blk->buffer; } nbytes = max_line_size - ctl_blk->remaining; /* try to read a line from the socket */ while ((nbytes > 0) && (len < 0)) { /* go out if the connection is closed */ if (ctl_blk->sock->sk->state != TCP_ESTABLISHED && ctl_blk->sock->sk->state != TCP_CLOSE_WAIT) { if (len > 0) break; else goto exit; } assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset)); reads = tcp_vs_recvbuffer(ctl_blk->sock, ctl_blk->buffer + ctl_blk->offset + ctl_blk->remaining, ctl_blk->len - ctl_blk->offset - ctl_blk->remaining, ctl_blk->flag); if (reads == 0) { interruptible_sleep_on_timeout(&WQ, HZ); TCP_VS_DBG(5, "Read 0 bytes while reading a line\n"); continue; } if (reads < 0) { TCP_VS_ERR("Error in reading a line\n"); goto exit; } ctl_blk->remaining += reads; /* try to get a line from the remaing bytes */ for (; i < ctl_blk->remaining -1; i++) { if ((buf[i] == CR) && (buf[i + 1] == LF)) { len = i; goto done; } } nbytes -= reads; } done: ctl_blk->info = buf; ctl_blk->offset += len + 2; ctl_blk->remaining -= len + 2; assert(ctl_blk->remaining >= 0); assert(ctl_blk->offset < ctl_blk->len); exit: LeaveFunction(5); return len; } /**************************************************************************** * http_line_unescape - convert escaped characters in buffer to ASCII * * This routine can be used to convert an "escaped" form of a URL or * parameter (appended to a URL) to standard ASCII format. * The escaping is done by the browser on the client side, for * transferring characters not allowed by the HTTP protocol. * For example, a whitespace character is not allowed in an URL. * It must be substituted by an escape sequence to be transferred. * * ESCAPING * When you want to include any character, not part of the standard * set allowed in URLs, you can do this by specifying its hex value * in the format %xx, where xx is the hex representation. * In addition, every '+' character will be substituted by a space. * */ static void http_line_unescape ( char *string, /* escaped string to unescape */ int len /* length of the string */ ) { int i = 0; char buffer[3]; char c; EnterFunction(5); assert(string != NULL) while (i < len) { if (string[i] == '+') { string[i] = ' '; /* replace '+' by spaces */ } if ((string[i] == '%') && (i < len-2)) { if (isxdigit (string[i+1]) && isxdigit (string[i+2])) { strncpy (buffer, &(string[i+1]), 2); buffer[2] = 0; c = (char) strtol (buffer, NULL, 16); if (c != 0) { memmove (&(string [i]), /* move string 2 chars */ &(string[i+2]), 2); string[i] = c; /* replace % by new char */ len -= 2; } } } i++; } LeaveFunction(5); return; } /****************************************************************************** * http_mime_parse - parse MIME line in a buffer * * This routine parses the MIME line in a buffer. * * NOTE: Some MIME headers (host, Referer) need be considered again, tbd. * */ static int http_mime_parse ( char *buffer, int len, http_mime_header_t *mime, int partial ) { char *pos, *buf, c; int l, ret = PARSE_ERROR; EnterFunction(5); assert(buffer != NULL) /* terminate string */ c = buffer[len]; buffer[len] = 0; buffer = skip_lws (buffer); if ((pos = strchr (buffer, ':')) == NULL) { goto exit; } l = pos - buffer; TCP_VS_DBG(5, "MIME Header: %s\n", buffer); /* * Messages MUST NOT include both a Content-Length header field and * a non-identity transfer-coding. If the message does include a non- * identity transfer-coding, the Content-Length MUST be ignored. */ if (strnicmp (buffer, "Transfer-Encoding", l) == 0) { /* maybe strlen is better than 'l' */ pos = skip_lws (pos + 1); if (strnicmp (pos, "identity", strlen ("identity")) != 0) { mime->transfer_encoding = 1; TCP_VS_DBG(5, "Transfer-Encoding: chunked\n"); } } else if (strnicmp (buffer, "Content-Length", l) == 0) { mime->content_length = strtol (pos + 1, NULL, 10); TCP_VS_DBG(5, "Content-Length: %d\n", mime->content_length); } else if (strnicmp (buffer, "Connection", l) == 0) { pos = skip_lws (pos + 1); if (strnicmp (pos, "close", strlen ("close")) == 0) { mime->connection_close = 1; TCP_VS_DBG(5, "Connection: close\n"); } } else if (partial && (strnicmp (buffer, "Content-type", l) == 0)){ l = strlen ("multipart/byteranges"); pos = skip_lws (pos + 1); if (strnicmp (pos, "multipart/byteranges", l)== 0){ TCP_VS_DBG(5, "multipart/byteranges\n"); pos += l + 1; /* skip ';' */ pos = skip_lws (pos + 1); l = strlen ("boundary="); if (strnicmp (pos, "boundary=", l) != 0) { goto exit; } buf = pos + l; /* the rest of this line is THIS_STRING_SEPARATES */ l = buffer + len - buf; if ((mime->sep = kmalloc (l + 1, GFP_KERNEL)) == NULL) { goto exit; } /* RFC 2046 [40] permits the boundary string to be quoted */ if (buf[0] == '"' || buf[0] == '\'') { buf++; l--; } strncpy (mime->sep, buf, l); mime->sep[l] = 0; TCP_VS_DBG(5, "THIS_STRING_SEPARATES : %s\n", mime->sep); } } ret = PARSE_OK; exit: buffer[len] = c; /* restore the string */ LeaveFunction(5); return ret; } /**************************************************************************** * relay data between source socket and destination socket */ static int relay_http_data ( struct socket* dsock, /* destination socket */ http_read_ctl_block_t *ctl_blk, /* read control block with source socket */ int len /* relay data length */ ) { int nbytes, reads, w = 0; int ret = -1; DECLARE_WAIT_QUEUE_HEAD(WQ); EnterFunction(5); assert(ctl_blk->remaining <= (ctl_blk->len - ctl_blk->offset)) assert(len > 0) /* if there is enough data in read buffer */ nbytes = len - ctl_blk->remaining; if (nbytes <= 0) { if (tcp_vs_xmit (dsock, ctl_blk->buffer + ctl_blk->offset, len, MSG_MORE) < 0) { TCP_VS_ERR("Error in xmitting message body\n"); goto exit; } ctl_blk->offset += len; ctl_blk->remaining -= len; goto done; } /* xmit the remaining bytes */ if (ctl_blk->remaining > 0) { if (tcp_vs_xmit (dsock, ctl_blk->buffer + ctl_blk->offset, ctl_blk->remaining, MSG_MORE) < 0) { TCP_VS_ERR("Error in xmitting remaining bytes\n"); goto exit; } } do { reads = tcp_vs_recvbuffer (ctl_blk->sock, ctl_blk->buffer, ctl_blk->len, ctl_blk->flag); if (reads == 0) { interruptible_sleep_on_timeout(&WQ, HZ); TCP_VS_DBG (5, "Reads 0 bytes while relay\n"); continue; } if (reads < 0) { TCP_VS_ERR("Error in reading while relaying\n"); goto exit; } w = MIN(nbytes, reads); if (tcp_vs_xmit (dsock, ctl_blk->buffer, w, MSG_MORE) < 0) { TCP_VS_ERR("Error in relaying bytes\n"); goto exit; } nbytes -= w; }while (nbytes > 0); ctl_blk->offset = w; ctl_blk->remaining = reads - w; assert(ctl_blk->remaining >= 0) assert(ctl_blk->offset < ctl_blk->len) done: ret = 0; exit: LeaveFunction(5); return ret; } /**************************************************************************** * search the seperator in a string */ static char* search_sep (const char * s, int len, const char * sep) { int l, ll; l = strlen(sep); if (!l) return (char *)s; ll = len; while (ll >= l) { ll--; if (!memcmp(s, sep, l)) return (char *) s; s++; } return NULL; } /**************************************************************************** * relay_multiparts: relay multipart/byteranges body * * relay all data until "CRLF--THIS_STRING_SEPARATES--CRLF" is found. * * Note: there may be a endless loop if the separate string is not found, tbd. * */ static int relay_multiparts ( struct socket* dsock, http_read_ctl_block_t *ctl_blk, http_mime_header_t *mime ) { int len, sep_len, l, reads; int ret = -1; char *buf, *pos; char *sep = NULL; DECLARE_WAIT_QUEUE_HEAD(WQ); EnterFunction(5); sep_len = strlen (mime->sep) + 8; if ((sep = kmalloc (sep_len + 1, GFP_KERNEL)) == NULL) { goto exit; } snprintf (sep, sep_len + 1, "\r\n--%s--\r\n" , mime->sep); /* deal with the remaining bytes */ buf = ctl_blk->buffer + ctl_blk->offset; len = ctl_blk->remaining; if ((len > 0) && (tcp_vs_xmit (dsock, buf, len, MSG_MORE) < 0)) { TCP_VS_ERR("Error in xmitting multiparts (remaining)\n"); goto exit; } pos = search_sep (buf, len, sep); if (pos != NULL) { goto done; } l = MIN(len, sep_len); memmove (ctl_blk->buffer, buf + len - l, l); /* search for CRLF--THIS_STRING_SEPARATES--CRLF */ while (1) { reads = tcp_vs_recvbuffer (ctl_blk->sock, ctl_blk->buffer + l, ctl_blk->len - l, 0); if (reads == 0) { interruptible_sleep_on_timeout(&WQ, HZ); TCP_VS_DBG (5, "Reads 0 bytes while relaying multiparts\n"); } if (reads < 0) { TCP_VS_ERR("Error in receiving multiparts\n"); goto exit; } if (tcp_vs_xmit (dsock, ctl_blk->buffer + l, reads, MSG_MORE) < 0) { TCP_VS_ERR("Error in xmitting multiparts\n"); goto exit; } len = l + reads; pos = search_sep (ctl_blk->buffer, len, sep); if (pos != NULL) { goto done; } l = MIN(len, sep_len); memmove (ctl_blk->buffer, ctl_blk->buffer + len - l, l); } done: ret = 0; exit: if (sep) { kfree (sep); } LeaveFunction(5); return ret; } /**************************************************************************** * transfer http message body. * * When a message-body is included with a message, the transfer-length of that * body is determined by one of the following (in order of precedence): * 1. Any response message which "MUST NOT" include a message-body (such as * the 1xx, 204, and 304 responses and any response to a HEAD request) is always * terminated by the first empty line after the header fields, regardless of the * entity-header fields present in the message. * 2. If a Transfer-Encoding header field (section 14.41) is present and has * any value other than "identity", then the transfer-length is defined by use of * the "chunked" transfer-coding (section 3.6), unless the message is terminated * by closing the connection. * 3. If a Content-Length header field (section 14.13) is present, its decimal * value in OCTETs represents both the entity-length and the transfer-length. The * Content-Length header field MUST NOT be sent if these two lengths are different * (i.e., if a Transfer-Encoding header field is present). If a message is received * with both a Transfer-Encoding header field and a Content-Length header field, * the latter MUST be ignored. * 4. If the message uses the media type "multipart/byteranges", and the transfer- * length is not otherwise specified, then this self-delimiting media type defines * the transfer-length. This media type MUST NOT be used unless the sender knows * that the recipient can arse it; the presence in a request of a Range header with * multiple byte-range specifiers from a 1.1 client implies that the client can parse * multipart/byteranges responses. * A range header might be forwarded by a 1.0 proxy that does not understand multipart/byteranges; in this case the server MUST delimit the message using methods defined in items 1,3 or 5 of this section. * 5. By the server closing the connection. (Closing the connection cannot be * used to indicate the end of a request body, since that would leave no possibility * for the server to send back a response.) * */ static int relay_http_message_body ( struct socket* dsock, /* destination socket */ http_read_ctl_block_t *ctl_blk, /* read control block */ http_mime_header_t *mime ) { int ret = -1; EnterFunction(5); if (mime->transfer_encoding) { /* * 19.4.6 Introduction of Transfer-Encoding * HTTP/1.1 introduces the Transfer-Encoding header field (section * 14.41). Proxies/gateways MUST remove any transfer-coding prior to * forwarding a message via a MIME-compliant protocol. * A process for decoding the "chunked" transfer-coding (section 3.6) can be * represented in pseudo-code as: * length := 0 * read chunk-size, chunk-extension (if any) and CRLF * while (chunk-size > 0) { * read chunk-data and CRLF * append chunk-data to entity-body * length := length + chunk-size * read chunk-size and CRLF * } * read entity-header * while (entity-header not empty) { * append entity-header to existing header fields * read entity-header * } * Content-Length := length * Remove "chunked" from Transfer-Encoding */ int len, chunk_size; do { len = http_read_line (ctl_blk, HTTP_MAX_MIMLINE_SIZE); if (len < 0) { TCP_VS_ERR("Error in reading chunk size from client\n"); goto exit; } if (tcp_vs_xmit (dsock, ctl_blk->info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in xmitting chunk size & extension\n"); goto exit; } ctl_blk->info[len] = 0; chunk_size = get_chunk_size (ctl_blk->info); TCP_VS_DBG(5, "Chunked line: %s\n", ctl_blk->info); if (chunk_size > 0) { if (relay_http_data (dsock, ctl_blk, chunk_size + 2) < 0) { TCP_VS_ERR("Error in xmitting chunk data\n"); goto exit; } } } while ( chunk_size > 0); /* relay the trailer */ do { len = http_read_line (ctl_blk, HTTP_MAX_MIMLINE_SIZE); if (len < 0) { TCP_VS_ERR("Error in reading trailer.\n"); goto exit; } if (tcp_vs_xmit (dsock, ctl_blk->info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in xmitting trailer\n"); goto exit; } } while (len != 0); ret = 0; } else if (mime->content_length) { ret = relay_http_data (dsock, ctl_blk, mime->content_length); } else if (mime->sep) { ret = relay_multiparts (dsock, ctl_blk, mime); } else { ret = 0; /* ? */ } exit: LeaveFunction(5); return ret; } /**************************************************************************** * get response from the specified server */ static int http_get_response (struct socket *csock, struct socket* dsock, http_request_t *req, char* buffer, int buflen, int *close) { http_read_ctl_block_t read_ctl_blk; http_response_t resp; int len, ret = -1; EnterFunction(5); memset (&read_ctl_blk, 0, sizeof (read_ctl_blk)); read_ctl_blk.buffer = buffer; read_ctl_blk.len = buflen; read_ctl_blk.sock = dsock; *close = 0; /* Do we have data ? */ while (skb_queue_empty(&(dsock->sk->receive_queue))) { interruptible_sleep_on_timeout(&dsock->wait, HZ); } /* read status line from server */ len = http_read_line (&read_ctl_blk, HTTP_MAX_STALINE_SIZE); if (len < 0) { TCP_VS_ERR("Error in reading status line from server\n"); goto exit; } /* xmit status line to client (2 more bytes for CRLF) */ if (tcp_vs_xmit (csock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in sending status line\n"); goto exit; } /* parse status line */ memset (&resp, 0, sizeof(resp)); if (parse_http_status_line (read_ctl_blk.info, len, &resp) == PARSE_ERROR) { goto exit; } /* parse MIME header */ do { if ((len = http_read_line (&read_ctl_blk, HTTP_MAX_MIMLINE_SIZE)) < 0) { goto exit; } /* xmit MIME header (2 more bytes for CRLF) */ if (tcp_vs_xmit (csock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in sending status line\n"); goto exit; } http_line_unescape (read_ctl_blk.info, len); http_mime_parse (read_ctl_blk.info, len, &resp.mime, resp.status_code == 206); } while (len != 0); /* http header end with CRLF,CRLF */ *close = resp.mime.connection_close; /* * Any response message which "MUST NOT" include a message-body (such * as the 1xx, 204, and 304 responses and any response to a HEAD * request) is always terminated by the first empty line after the * header fields, regardless of the entity-header fields present in * the message. */ if (req->method != HTTP_M_HEAD) { if ((resp.status_code < 200) || (resp.status_code == 204 || resp.status_code == 304)) { ret = 0; goto exit; } ret = relay_http_message_body (csock, &read_ctl_blk, &resp.mime); if (resp.mime.sep) { kfree (resp.mime.sep); } } exit: LeaveFunction(5); return ret; } /**************************************************************************** * Is there any data in socket? * * return: * -1, Socket error * 0, No data can read from socket * 1, Data available */ static inline int data_available (http_read_ctl_block_t *ctl_blk) { char buf[10]; int ret = 0; EnterFunction(6); if (ctl_blk->remaining == 0) { /* Do we have data ? */ if (skb_queue_empty(&(ctl_blk->sock->sk->receive_queue))) goto out; ret = tcp_vs_recvbuffer(ctl_blk->sock, buf, 10, MSG_PEEK); if (ret > 0) ret = 1; } out: LeaveFunction(6); return ret; } /**************************************************************************** * HTTP content-based scheduling: * 1, For http 1.0 request, parse the http request, select a server * according to the request, and create a socket the server finally. * 2, For http 1.1 request, do all the work by itself. Parse every http * message header and direct each message to the right server according * to the scheduling rule. For a worker thread to get response by order * from the server. * returns: * 0, success, schedule just chose a dest server * 1, success, schedule has done all the jobs * -1, redirect to the local server * -2, error */ static int tcp_vs_phttp_schedule(struct tcp_vs_conn *conn, struct tcp_vs_service *svc) { http_request_t req; http_read_ctl_block_t read_ctl_blk; char* buffer = NULL; /* store data from server */ int ret = 1; /* scheduler has done all the jobs */ int len; unsigned long last_read; int close_server = 0; http_dest_server_t *server = NULL; struct list_head dest_list; /* destination server list */ struct list_head *l; struct tcp_vs_dest *dest; struct socket *dsock; DECLARE_WAIT_QUEUE_HEAD(WQ); EnterFunction(5); memset (&read_ctl_blk, 0, sizeof (read_ctl_blk)); read_ctl_blk.buffer = conn->buffer; read_ctl_blk.len = conn->buflen; read_ctl_blk.flag = MSG_PEEK; read_ctl_blk.sock = conn->csock; conn->dest = NULL; conn->dsock = NULL; INIT_LIST_HEAD(&dest_list); if (read_ctl_blk.len < HTTP_MAX_REQLINE_SIZE) { TCP_VS_ERR("Error: buffer is too small!\n"); goto exit; } /* Do we have data ? */ while (skb_queue_empty(&(conn->csock->sk->receive_queue))) { interruptible_sleep_on_timeout(&conn->csock->wait, HZ); } /* allocate buffer to store data that get from servers */ buffer = (char*) get_free_page(GFP_KERNEL); if (buffer == NULL) { goto exit; } last_read = jiffies; do { switch (data_available(&read_ctl_blk)) { case -1: TCP_VS_DBG(5, "Socket error before reading request line.\n"); ret = -2; goto done; case 0: /* check if the service is stopped or system is unloaded */ if (svc->stop != 0 || sysctl_ktcpvs_unload != 0) { TCP_VS_DBG(5, "phttp scheduling exit (pid=%d)\n", current->pid); goto done; } interruptible_sleep_on_timeout(&WQ, HZ); if ((jiffies - last_read) > KEEP_ALIVE_TIMES*HZ) { TCP_VS_DBG(5, "Timeout, disconnect.\n"); goto done; } continue; case 1: last_read = jiffies; break; } /* read request line from client socket */ len = http_read_line(&read_ctl_blk, HTTP_MAX_REQLINE_SIZE); if (len < 0) { TCP_VS_ERR("Error reading request line from client\n"); ret = -2; goto done; } /* parse the http request line */ memset(&req, 0, sizeof (req)); if (parse_http_request_line(read_ctl_blk.info, len, &req) != PARSE_OK) { TCP_VS_ERR("Cannot parse http request\n"); ret = -2; goto done; } /* select a server */ dest = tcp_vs_phttp_matchrule(svc, &req); if (!dest) { TCP_VS_DBG(5, "Can't find a right server\n"); if (read_ctl_blk.flag == MSG_PEEK) { ret = -1; goto exit; /* redirect to a local port */ } else { ret = -2; goto done; } } if (req.version <= HTTP_V_1_0) { TCP_VS_DBG(5, "HTTP: server %d.%d.%d.%d:%d " "conns %d refcnt %d weight %d\n", NIPQUAD(dest->addr), ntohs(dest->port), atomic_read(&dest->conns), atomic_read (&dest->refcnt), dest->weight); conn->dsock = tcp_vs_connect2dest(dest); if (conn->dsock == NULL) { TCP_VS_ERR("The destination is not available!\n"); goto exit; } atomic_inc(&dest->conns); conn->dest = dest; ret = 0; goto exit; } /* * For http 1.1 client, continue processing for * persistent connections */ /* find the dest server from server list, delete it if it's a dead one */ dsock = NULL; list_for_each (l, &dest_list) { server = list_entry (l, http_dest_server_t, d_list); if (server->dest == dest) { assert(server->sock != NULL); if (server->sock->sk->state != TCP_ESTABLISHED && server->sock->sk->state != TCP_CLOSE_WAIT) { sock_release(server->sock); list_del(&server->d_list); atomic_dec(&server->dest->conns); kfree(server); } else { dsock = server->sock; } break; } } if (dsock == NULL) { /* open socket for a new server */ dsock = tcp_vs_connect2dest(dest); if (dsock == NULL) { TCP_VS_ERR("The destination is not available\n"); if (read_ctl_blk.flag == MSG_PEEK) { ret = -1; goto exit; /* redirect to a local port */ } else { ret = -2; goto done; } } if ((server = kmalloc(sizeof(http_dest_server_t), GFP_ATOMIC)) == NULL) { ret = -2; goto done; } server->dest = dest; server->sock = dsock; atomic_inc(&dest->conns); list_add_tail(&server->d_list, &dest_list); } /* re-read the peeked data for the first http request of a connection */ if (read_ctl_blk.flag == MSG_PEEK) { read_ctl_blk.flag = 0; read_ctl_blk.offset = 0; read_ctl_blk.remaining = 0; if (tcp_vs_recvbuffer(conn->csock, read_ctl_blk.buffer, len + 2, 0) != (len + 2)) { TCP_VS_ERR("Error in re-reading http request line\n"); goto exit; } } /* xmit request line (2 more bytes for CRLF) */ if (tcp_vs_xmit(dsock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in sending request line\n"); goto done; } /* Process MIME header */ do { len = http_read_line (&read_ctl_blk, HTTP_MAX_MIMLINE_SIZE); if (len < 0) { goto done; } /* xmit MIME header (2 more bytes for CRLF) */ if (tcp_vs_xmit (dsock, read_ctl_blk.info, len + 2, MSG_MORE) < 0) { TCP_VS_ERR("Error in sending MIME header\n"); goto done; } http_line_unescape (read_ctl_blk.info, len); http_mime_parse (read_ctl_blk.info, len, &req.mime, 0); } while (len != 0); /* http header end with CRLF,CRLF */ if (relay_http_message_body(dsock, &read_ctl_blk, &req.mime) != 0) { TCP_VS_ERR("Error in sending http message body\n"); goto done; } if (http_get_response(conn->csock, dsock, &req, buffer, PAGE_SIZE, &close_server) < 0) { goto done; } if (close_server) { sock_release(dsock); list_del (&server->d_list); atomic_dec (&server->dest->conns); kfree(server); TCP_VS_DBG(5, "Close server connection.\n"); break; /* close the connection? tbd */ } } while (req.mime.connection_close != 1); done: for (l = &dest_list; l->next != l; ) { server = list_entry(l->next, http_dest_server_t, d_list); if (server->sock) { sock_release(server->sock); } list_del(&server->d_list); if (server->dest) atomic_dec(&server->dest->conns); kfree(server); } exit: if (buffer) { free_page ((unsigned long)buffer); } LeaveFunction(5); return ret; } static struct tcp_vs_scheduler tcp_vs_phttp_scheduler = { {0}, /* n_list */ "phttp", /* name */ THIS_MODULE, /* this module */ tcp_vs_phttp_init_svc, /* initializer */ tcp_vs_phttp_done_svc, /* done */ tcp_vs_phttp_update_svc,/* update */ tcp_vs_phttp_schedule, /* select a server by http request */ }; static int __init tcp_vs_phttp_init(void) { INIT_LIST_HEAD(&tcp_vs_phttp_scheduler.n_list); return register_tcp_vs_scheduler(&tcp_vs_phttp_scheduler); } static void __exit tcp_vs_phttp_cleanup(void) { unregister_tcp_vs_scheduler(&tcp_vs_phttp_scheduler); } module_init(tcp_vs_phttp_init); module_exit(tcp_vs_phttp_cleanup); MODULE_LICENSE("GPL");