Created
February 18, 2009 04:30
-
-
Save danp/66185 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -u linux-2.6.20.y/net/ipv4/ipvs/Kconfig linux-2.6.27.y/net/ipv4/ipvs/Kconfig | |
--- linux-2.6.20.y/net/ipv4/ipvs/Kconfig 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/Kconfig 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,10 +1,7 @@ | |
# | |
# IP Virtual Server configuration | |
# | |
-menu "IP: Virtual Server Configuration" | |
- depends on NETFILTER | |
- | |
-config IP_VS | |
+menuconfig IP_VS | |
tristate "IP virtual server support (EXPERIMENTAL)" | |
depends on NETFILTER | |
---help--- | |
@@ -25,9 +22,10 @@ | |
If you want to compile it in kernel, say Y. To compile it as a | |
module, choose M here. If unsure, say N. | |
+if IP_VS | |
+ | |
config IP_VS_DEBUG | |
bool "IP virtual server debugging" | |
- depends on IP_VS | |
---help--- | |
Say Y here if you want to get additional messages useful in | |
debugging the IP virtual server code. You can change the debug | |
@@ -35,7 +33,6 @@ | |
config IP_VS_TAB_BITS | |
int "IPVS connection table size (the Nth power of 2)" | |
- depends on IP_VS | |
default "12" | |
---help--- | |
The IPVS connection hash table uses the chaining scheme to handle | |
@@ -61,42 +58,35 @@ | |
needed for your box. | |
comment "IPVS transport protocol load balancing support" | |
- depends on IP_VS | |
config IP_VS_PROTO_TCP | |
bool "TCP load balancing support" | |
- depends on IP_VS | |
---help--- | |
This option enables support for load balancing TCP transport | |
protocol. Say Y if unsure. | |
config IP_VS_PROTO_UDP | |
bool "UDP load balancing support" | |
- depends on IP_VS | |
---help--- | |
This option enables support for load balancing UDP transport | |
protocol. Say Y if unsure. | |
config IP_VS_PROTO_ESP | |
bool "ESP load balancing support" | |
- depends on IP_VS | |
---help--- | |
This option enables support for load balancing ESP (Encapsulation | |
Security Payload) transport protocol. Say Y if unsure. | |
config IP_VS_PROTO_AH | |
bool "AH load balancing support" | |
- depends on IP_VS | |
---help--- | |
This option enables support for load balancing AH (Authentication | |
Header) transport protocol. Say Y if unsure. | |
comment "IPVS scheduler" | |
- depends on IP_VS | |
config IP_VS_RR | |
tristate "round-robin scheduling" | |
- depends on IP_VS | |
---help--- | |
The robin-robin scheduling algorithm simply directs network | |
connections to different real servers in a round-robin manner. | |
@@ -106,7 +96,6 @@ | |
config IP_VS_WRR | |
tristate "weighted round-robin scheduling" | |
- depends on IP_VS | |
---help--- | |
The weighted robin-robin scheduling algorithm directs network | |
connections to different real servers based on server weights | |
@@ -120,7 +109,6 @@ | |
config IP_VS_LC | |
tristate "least-connection scheduling" | |
- depends on IP_VS | |
---help--- | |
The least-connection scheduling algorithm directs network | |
connections to the server with the least number of active | |
@@ -131,7 +119,6 @@ | |
config IP_VS_WLC | |
tristate "weighted least-connection scheduling" | |
- depends on IP_VS | |
---help--- | |
The weighted least-connection scheduling algorithm directs network | |
connections to the server with the least active connections | |
@@ -142,7 +129,6 @@ | |
config IP_VS_LBLC | |
tristate "locality-based least-connection scheduling" | |
- depends on IP_VS | |
---help--- | |
The locality-based least-connection scheduling algorithm is for | |
destination IP load balancing. It is usually used in cache cluster. | |
@@ -157,7 +143,6 @@ | |
config IP_VS_LBLCR | |
tristate "locality-based least-connection with replication scheduling" | |
- depends on IP_VS | |
---help--- | |
The locality-based least-connection with replication scheduling | |
algorithm is also for destination IP load balancing. It is | |
@@ -176,7 +161,6 @@ | |
config IP_VS_DH | |
tristate "destination hashing scheduling" | |
- depends on IP_VS | |
---help--- | |
The destination hashing scheduling algorithm assigns network | |
connections to the servers through looking up a statically assigned | |
@@ -187,7 +171,6 @@ | |
config IP_VS_SH | |
tristate "source hashing scheduling" | |
- depends on IP_VS | |
---help--- | |
The source hashing scheduling algorithm assigns network | |
connections to the servers through looking up a statically assigned | |
@@ -198,7 +181,6 @@ | |
config IP_VS_SED | |
tristate "shortest expected delay scheduling" | |
- depends on IP_VS | |
---help--- | |
The shortest expected delay scheduling algorithm assigns network | |
connections to the server with the shortest expected delay. The | |
@@ -212,7 +194,6 @@ | |
config IP_VS_NQ | |
tristate "never queue scheduling" | |
- depends on IP_VS | |
---help--- | |
The never queue scheduling algorithm adopts a two-speed model. | |
When there is an idle server available, the job will be sent to | |
@@ -225,11 +206,10 @@ | |
module, choose M here. If unsure, say N. | |
comment 'IPVS application helper' | |
- depends on IP_VS | |
config IP_VS_FTP | |
tristate "FTP protocol helper" | |
- depends on IP_VS && IP_VS_PROTO_TCP | |
+ depends on IP_VS_PROTO_TCP | |
---help--- | |
FTP is a protocol that transfers IP address and/or port number in | |
the payload. In the virtual server via Network Address Translation, | |
@@ -241,4 +221,4 @@ | |
If you want to compile it in kernel, say Y. To compile it as a | |
module, choose M here. If unsure, say N. | |
-endmenu | |
+endif # IP_VS | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_app.c: Application module support for IPVS | |
* | |
- * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -25,6 +23,8 @@ | |
#include <linux/skbuff.h> | |
#include <linux/in.h> | |
#include <linux/ip.h> | |
+#include <linux/netfilter.h> | |
+#include <net/net_namespace.h> | |
#include <net/protocol.h> | |
#include <net/tcp.h> | |
#include <asm/system.h> | |
@@ -49,18 +49,13 @@ | |
*/ | |
static inline int ip_vs_app_get(struct ip_vs_app *app) | |
{ | |
- /* test and get the module atomically */ | |
- if (app->module) | |
- return try_module_get(app->module); | |
- else | |
- return 1; | |
+ return try_module_get(app->module); | |
} | |
static inline void ip_vs_app_put(struct ip_vs_app *app) | |
{ | |
- if (app->module) | |
- module_put(app->module); | |
+ module_put(app->module); | |
} | |
@@ -327,18 +322,18 @@ | |
spin_unlock(&cp->lock); | |
} | |
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, | |
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, | |
struct ip_vs_app *app) | |
{ | |
int diff; | |
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; | |
+ const unsigned int tcp_offset = ip_hdrlen(skb); | |
struct tcphdr *th; | |
__u32 seq; | |
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) | |
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) | |
return 0; | |
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); | |
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); | |
/* | |
* Remember seq number in case this pkt gets resized | |
@@ -359,7 +354,7 @@ | |
if (app->pkt_out == NULL) | |
return 1; | |
- if (!app->pkt_out(app, cp, pskb, &diff)) | |
+ if (!app->pkt_out(app, cp, skb, &diff)) | |
return 0; | |
/* | |
@@ -377,7 +372,7 @@ | |
* called by ipvs packet handler, assumes previously checked cp!=NULL | |
* returns false if it can't handle packet (oom) | |
*/ | |
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) | |
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) | |
{ | |
struct ip_vs_app *app; | |
@@ -390,7 +385,7 @@ | |
/* TCP is complicated */ | |
if (cp->protocol == IPPROTO_TCP) | |
- return app_tcp_pkt_out(cp, pskb, app); | |
+ return app_tcp_pkt_out(cp, skb, app); | |
/* | |
* Call private output hook function | |
@@ -398,22 +393,22 @@ | |
if (app->pkt_out == NULL) | |
return 1; | |
- return app->pkt_out(app, cp, pskb, NULL); | |
+ return app->pkt_out(app, cp, skb, NULL); | |
} | |
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, | |
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, | |
struct ip_vs_app *app) | |
{ | |
int diff; | |
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4; | |
+ const unsigned int tcp_offset = ip_hdrlen(skb); | |
struct tcphdr *th; | |
__u32 seq; | |
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) | |
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) | |
return 0; | |
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset); | |
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); | |
/* | |
* Remember seq number in case this pkt gets resized | |
@@ -434,7 +429,7 @@ | |
if (app->pkt_in == NULL) | |
return 1; | |
- if (!app->pkt_in(app, cp, pskb, &diff)) | |
+ if (!app->pkt_in(app, cp, skb, &diff)) | |
return 0; | |
/* | |
@@ -452,7 +447,7 @@ | |
* called by ipvs packet handler, assumes previously checked cp!=NULL. | |
* returns false if can't handle packet (oom). | |
*/ | |
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) | |
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) | |
{ | |
struct ip_vs_app *app; | |
@@ -465,7 +460,7 @@ | |
/* TCP is complicated */ | |
if (cp->protocol == IPPROTO_TCP) | |
- return app_tcp_pkt_in(cp, pskb, app); | |
+ return app_tcp_pkt_in(cp, skb, app); | |
/* | |
* Call private input hook function | |
@@ -473,7 +468,7 @@ | |
if (app->pkt_in == NULL) | |
return 1; | |
- return app->pkt_in(app, cp, pskb, NULL); | |
+ return app->pkt_in(app, cp, skb, NULL); | |
} | |
@@ -549,7 +544,7 @@ | |
return 0; | |
} | |
-static struct seq_operations ip_vs_app_seq_ops = { | |
+static const struct seq_operations ip_vs_app_seq_ops = { | |
.start = ip_vs_app_seq_start, | |
.next = ip_vs_app_seq_next, | |
.stop = ip_vs_app_seq_stop, | |
@@ -561,7 +556,7 @@ | |
return seq_open(file, &ip_vs_app_seq_ops); | |
} | |
-static struct file_operations ip_vs_app_fops = { | |
+static const struct file_operations ip_vs_app_fops = { | |
.owner = THIS_MODULE, | |
.open = ip_vs_app_open, | |
.read = seq_read, | |
@@ -577,7 +572,6 @@ | |
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, | |
char *o_buf, int o_len, char *n_buf, int n_len) | |
{ | |
- struct iphdr *iph; | |
int diff; | |
int o_offset; | |
int o_left; | |
@@ -603,27 +597,26 @@ | |
skb_put(skb, diff); | |
memmove(skb->data + o_offset + n_len, | |
skb->data + o_offset + o_len, o_left); | |
- memcpy(skb->data + o_offset, n_buf, n_len); | |
+ skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); | |
} | |
/* must update the iph total length here */ | |
- iph = skb->nh.iph; | |
- iph->tot_len = htons(skb->len); | |
+ ip_hdr(skb)->tot_len = htons(skb->len); | |
LeaveFunction(9); | |
return 0; | |
} | |
-int ip_vs_app_init(void) | |
+int __init ip_vs_app_init(void) | |
{ | |
/* we will replace it with proc_net_ipvs_create() soon */ | |
- proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); | |
+ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); | |
return 0; | |
} | |
void ip_vs_app_cleanup(void) | |
{ | |
- proc_net_remove("ip_vs_app"); | |
+ proc_net_remove(&init_net, "ip_vs_app"); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -5,8 +5,6 @@ | |
* high-performance and highly available server based on a | |
* cluster of servers. | |
* | |
- * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* Julian Anastasov <ja@ssi.bg> | |
@@ -35,6 +33,7 @@ | |
#include <linux/jhash.h> | |
#include <linux/random.h> | |
+#include <net/net_namespace.h> | |
#include <net/ip_vs.h> | |
@@ -392,7 +391,15 @@ | |
atomic_inc(&dest->refcnt); | |
/* Bind with the destination and its corresponding transmitter */ | |
- cp->flags |= atomic_read(&dest->conn_flags); | |
+ if ((cp->flags & IP_VS_CONN_F_SYNC) && | |
+ (!(cp->flags & IP_VS_CONN_F_TEMPLATE))) | |
+ /* if the connection is not template and is created | |
+ * by sync, preserve the activity flag. | |
+ */ | |
+ cp->flags |= atomic_read(&dest->conn_flags) & | |
+ (~IP_VS_CONN_F_INACTIVE); | |
+ else | |
+ cp->flags |= atomic_read(&dest->conn_flags); | |
cp->dest = dest; | |
IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | |
@@ -411,7 +418,11 @@ | |
/* It is a normal connection, so increase the inactive | |
connection counter because it is in TCP SYNRECV | |
state (inactive) or other protocol inacive state */ | |
- atomic_inc(&dest->inactconns); | |
+ if ((cp->flags & IP_VS_CONN_F_SYNC) && | |
+ (!(cp->flags & IP_VS_CONN_F_INACTIVE))) | |
+ atomic_inc(&dest->activeconns); | |
+ else | |
+ atomic_inc(&dest->inactconns); | |
} else { | |
/* It is a persistent connection/template, so increase | |
the peristent connection counter */ | |
@@ -425,6 +436,24 @@ | |
/* | |
+ * Check if there is a destination for the connection, if so | |
+ * bind the connection to the destination. | |
+ */ | |
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) | |
+{ | |
+ struct ip_vs_dest *dest; | |
+ | |
+ if ((cp) && (!cp->dest)) { | |
+ dest = ip_vs_find_dest(cp->daddr, cp->dport, | |
+ cp->vaddr, cp->vport, cp->protocol); | |
+ ip_vs_bind_dest(cp, dest); | |
+ return dest; | |
+ } else | |
+ return NULL; | |
+} | |
+ | |
+ | |
+/* | |
* Unbind a connection entry with its VS destination | |
* Called by the ip_vs_conn_expire function. | |
*/ | |
@@ -494,8 +523,8 @@ | |
* Checking the dest server status. | |
*/ | |
if ((dest == NULL) || | |
- !(dest->flags & IP_VS_DEST_F_AVAILABLE) || | |
- (sysctl_ip_vs_expire_quiescent_template && | |
+ !(dest->flags & IP_VS_DEST_F_AVAILABLE) || | |
+ (sysctl_ip_vs_expire_quiescent_template && | |
(atomic_read(&dest->weight) == 0))) { | |
IP_VS_DBG(9, "check_template: dest not available for " | |
"protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " | |
@@ -603,17 +632,14 @@ | |
struct ip_vs_conn *cp; | |
struct ip_vs_protocol *pp = ip_vs_proto_get(proto); | |
- cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); | |
+ cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); | |
if (cp == NULL) { | |
IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n"); | |
return NULL; | |
} | |
- memset(cp, 0, sizeof(*cp)); | |
INIT_LIST_HEAD(&cp->c_list); | |
- init_timer(&cp->timer); | |
- cp->timer.data = (unsigned long)cp; | |
- cp->timer.function = ip_vs_conn_expire; | |
+ setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); | |
cp->protocol = proto; | |
cp->caddr = caddr; | |
cp->cport = cport; | |
@@ -667,7 +693,7 @@ | |
{ | |
int idx; | |
struct ip_vs_conn *cp; | |
- | |
+ | |
for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) { | |
ct_read_lock_bh(idx); | |
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | |
@@ -695,7 +721,7 @@ | |
int idx; | |
++*pos; | |
- if (v == SEQ_START_TOKEN) | |
+ if (v == SEQ_START_TOKEN) | |
return ip_vs_conn_array(seq, 0); | |
/* more on same hash chain? */ | |
@@ -710,7 +736,7 @@ | |
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { | |
seq->private = &ip_vs_conn_tab[idx]; | |
return cp; | |
- } | |
+ } | |
ct_read_unlock_bh(idx); | |
} | |
seq->private = NULL; | |
@@ -746,7 +772,7 @@ | |
return 0; | |
} | |
-static struct seq_operations ip_vs_conn_seq_ops = { | |
+static const struct seq_operations ip_vs_conn_seq_ops = { | |
.start = ip_vs_conn_seq_start, | |
.next = ip_vs_conn_seq_next, | |
.stop = ip_vs_conn_seq_stop, | |
@@ -758,13 +784,64 @@ | |
return seq_open(file, &ip_vs_conn_seq_ops); | |
} | |
-static struct file_operations ip_vs_conn_fops = { | |
+static const struct file_operations ip_vs_conn_fops = { | |
.owner = THIS_MODULE, | |
.open = ip_vs_conn_open, | |
.read = seq_read, | |
.llseek = seq_lseek, | |
.release = seq_release, | |
}; | |
+ | |
+static const char *ip_vs_origin_name(unsigned flags) | |
+{ | |
+ if (flags & IP_VS_CONN_F_SYNC) | |
+ return "SYNC"; | |
+ else | |
+ return "LOCAL"; | |
+} | |
+ | |
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) | |
+{ | |
+ | |
+ if (v == SEQ_START_TOKEN) | |
+ seq_puts(seq, | |
+ "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); | |
+ else { | |
+ const struct ip_vs_conn *cp = v; | |
+ | |
+ seq_printf(seq, | |
+ "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n", | |
+ ip_vs_proto_name(cp->protocol), | |
+ ntohl(cp->caddr), ntohs(cp->cport), | |
+ ntohl(cp->vaddr), ntohs(cp->vport), | |
+ ntohl(cp->daddr), ntohs(cp->dport), | |
+ ip_vs_state_name(cp->protocol, cp->state), | |
+ ip_vs_origin_name(cp->flags), | |
+ (cp->timer.expires-jiffies)/HZ); | |
+ } | |
+ return 0; | |
+} | |
+ | |
+static const struct seq_operations ip_vs_conn_sync_seq_ops = { | |
+ .start = ip_vs_conn_seq_start, | |
+ .next = ip_vs_conn_seq_next, | |
+ .stop = ip_vs_conn_seq_stop, | |
+ .show = ip_vs_conn_sync_seq_show, | |
+}; | |
+ | |
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) | |
+{ | |
+ return seq_open(file, &ip_vs_conn_sync_seq_ops); | |
+} | |
+ | |
+static const struct file_operations ip_vs_conn_sync_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = ip_vs_conn_sync_open, | |
+ .read = seq_read, | |
+ .llseek = seq_lseek, | |
+ .release = seq_release, | |
+}; | |
+ | |
#endif | |
@@ -888,7 +965,7 @@ | |
} | |
-int ip_vs_conn_init(void) | |
+int __init ip_vs_conn_init(void) | |
{ | |
int idx; | |
@@ -902,7 +979,7 @@ | |
/* Allocate ip_vs_conn slab cache */ | |
ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn", | |
sizeof(struct ip_vs_conn), 0, | |
- SLAB_HWCACHE_ALIGN, NULL, NULL); | |
+ SLAB_HWCACHE_ALIGN, NULL); | |
if (!ip_vs_conn_cachep) { | |
vfree(ip_vs_conn_tab); | |
return -ENOMEM; | |
@@ -923,7 +1000,8 @@ | |
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); | |
} | |
- proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); | |
+ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); | |
+ proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); | |
/* calculate the random value for connection hash */ | |
get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); | |
@@ -939,6 +1017,7 @@ | |
/* Release the empty cache */ | |
kmem_cache_destroy(ip_vs_conn_cachep); | |
- proc_net_remove("ip_vs_conn"); | |
+ proc_net_remove(&init_net, "ip_vs_conn"); | |
+ proc_net_remove(&init_net, "ip_vs_conn_sync"); | |
vfree(ip_vs_conn_tab); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -5,8 +5,6 @@ | |
* high-performance and highly available server based on a | |
* cluster of servers. | |
* | |
- * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* Julian Anastasov <ja@ssi.bg> | |
@@ -58,7 +56,6 @@ | |
#ifdef CONFIG_IP_VS_DEBUG | |
EXPORT_SYMBOL(ip_vs_get_debug_level); | |
#endif | |
-EXPORT_SYMBOL(ip_vs_make_skb_writable); | |
/* ID used in ICMP lookups */ | |
@@ -163,42 +160,6 @@ | |
} | |
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) | |
-{ | |
- struct sk_buff *skb = *pskb; | |
- | |
- /* skb is already used, better copy skb and its payload */ | |
- if (unlikely(skb_shared(skb) || skb->sk)) | |
- goto copy_skb; | |
- | |
- /* skb data is already used, copy it */ | |
- if (unlikely(skb_cloned(skb))) | |
- goto copy_data; | |
- | |
- return pskb_may_pull(skb, writable_len); | |
- | |
- copy_data: | |
- if (unlikely(writable_len > skb->len)) | |
- return 0; | |
- return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | |
- | |
- copy_skb: | |
- if (unlikely(writable_len > skb->len)) | |
- return 0; | |
- skb = skb_copy(skb, GFP_ATOMIC); | |
- if (!skb) | |
- return 0; | |
- BUG_ON(skb_is_nonlinear(skb)); | |
- | |
- /* Rest of kernel will get very unhappy if we pass it a | |
- suddenly-orphaned skbuff */ | |
- if ((*pskb)->sk) | |
- skb_set_owner_w(skb, (*pskb)->sk); | |
- kfree_skb(*pskb); | |
- *pskb = skb; | |
- return 1; | |
-} | |
- | |
/* | |
* IPVS persistent scheduling function | |
* It creates a connection entry according to its template if exists, | |
@@ -212,7 +173,7 @@ | |
__be16 ports[2]) | |
{ | |
struct ip_vs_conn *cp = NULL; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
struct ip_vs_dest *dest; | |
struct ip_vs_conn *ct; | |
__be16 dport; /* destination port to forward */ | |
@@ -381,7 +342,7 @@ | |
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |
{ | |
struct ip_vs_conn *cp = NULL; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
struct ip_vs_dest *dest; | |
__be16 _ports[2], *pptr; | |
@@ -447,7 +408,7 @@ | |
struct ip_vs_protocol *pp) | |
{ | |
__be16 _ports[2], *pptr; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
pptr = skb_header_pointer(skb, iph->ihl*4, | |
sizeof(_ports), _ports); | |
@@ -460,7 +421,7 @@ | |
and the destination is RTN_UNICAST (and not local), then create | |
a cache_bypass connection entry */ | |
if (sysctl_ip_vs_cache_bypass && svc->fwmark | |
- && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { | |
+ && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { | |
int ret, cs; | |
struct ip_vs_conn *cp; | |
@@ -518,19 +479,19 @@ | |
/* | |
- * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING | |
+ * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING | |
* chain, and is used for VS/NAT. | |
* It detects packets for VS/NAT connections and sends the packets | |
* immediately. This can avoid that iptable_nat mangles the packets | |
* for VS/NAT. | |
*/ | |
static unsigned int ip_vs_post_routing(unsigned int hooknum, | |
- struct sk_buff **pskb, | |
+ struct sk_buff *skb, | |
const struct net_device *in, | |
const struct net_device *out, | |
int (*okfn)(struct sk_buff *)) | |
{ | |
- if (!((*pskb)->ipvs_property)) | |
+ if (!skb->ipvs_property) | |
return NF_ACCEPT; | |
/* The packet was sent from IPVS, exit this chain */ | |
return NF_STOP; | |
@@ -541,13 +502,14 @@ | |
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); | |
} | |
-static inline struct sk_buff * | |
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | |
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) | |
{ | |
- skb = ip_defrag(skb, user); | |
- if (skb) | |
- ip_send_check(skb->nh.iph); | |
- return skb; | |
+ int err = ip_defrag(skb, user); | |
+ | |
+ if (!err) | |
+ ip_send_check(ip_hdr(skb)); | |
+ | |
+ return err; | |
} | |
/* | |
@@ -557,9 +519,10 @@ | |
void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, | |
struct ip_vs_conn *cp, int inout) | |
{ | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
unsigned int icmp_offset = iph->ihl*4; | |
- struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset); | |
+ struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) + | |
+ icmp_offset); | |
struct iphdr *ciph = (struct iphdr *)(icmph + 1); | |
if (inout) { | |
@@ -604,9 +567,8 @@ | |
* Currently handles error types - unreachable, quench, ttl exceeded. | |
* (Only used in VS/NAT) | |
*/ | |
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) | |
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related) | |
{ | |
- struct sk_buff *skb = *pskb; | |
struct iphdr *iph; | |
struct icmphdr _icmph, *ic; | |
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | |
@@ -617,14 +579,12 @@ | |
*related = 1; | |
/* reassemble IP fragments */ | |
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { | |
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); | |
- if (!skb) | |
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | |
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | |
return NF_STOLEN; | |
- *pskb = skb; | |
} | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
offset = ihl = iph->ihl * 4; | |
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); | |
if (ic == NULL) | |
@@ -659,7 +619,7 @@ | |
return NF_ACCEPT; | |
/* Is the embedded protocol header present? */ | |
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && | |
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) && | |
pp->dont_defrag)) | |
return NF_ACCEPT; | |
@@ -675,13 +635,12 @@ | |
verdict = NF_DROP; | |
if (IP_VS_FWD_METHOD(cp) != 0) { | |
- IP_VS_ERR("shouldn't reach here, because the box is on the" | |
+ IP_VS_ERR("shouldn't reach here, because the box is on the " | |
"half connection in the tun/dr module.\n"); | |
} | |
/* Ensure the checksum is correct */ | |
- if (skb->ip_summed != CHECKSUM_UNNECESSARY && | |
- ip_vs_checksum_complete(skb, ihl)) { | |
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | |
/* Failed checksum! */ | |
IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", | |
NIPQUAD(iph->saddr)); | |
@@ -690,9 +649,8 @@ | |
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) | |
offset += 2 * sizeof(__u16); | |
- if (!ip_vs_make_skb_writable(pskb, offset)) | |
+ if (!skb_make_writable(skb, offset)) | |
goto out; | |
- skb = *pskb; | |
ip_vs_nat_icmp(skb, pp, cp, 1); | |
@@ -712,24 +670,22 @@ | |
{ | |
struct tcphdr _tcph, *th; | |
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, | |
- sizeof(_tcph), &_tcph); | |
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | |
if (th == NULL) | |
return 0; | |
return th->rst; | |
} | |
/* | |
- * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT. | |
+ * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. | |
* Check if outgoing packet belongs to the established ip_vs_conn, | |
* rewrite addresses of the packet and send it on its way... | |
*/ | |
static unsigned int | |
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, | |
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb, | |
const struct net_device *in, const struct net_device *out, | |
int (*okfn)(struct sk_buff *)) | |
{ | |
- struct sk_buff *skb = *pskb; | |
struct iphdr *iph; | |
struct ip_vs_protocol *pp; | |
struct ip_vs_conn *cp; | |
@@ -740,14 +696,13 @@ | |
if (skb->ipvs_property) | |
return NF_ACCEPT; | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
if (unlikely(iph->protocol == IPPROTO_ICMP)) { | |
- int related, verdict = ip_vs_out_icmp(pskb, &related); | |
+ int related, verdict = ip_vs_out_icmp(skb, &related); | |
if (related) | |
return verdict; | |
- skb = *pskb; | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
} | |
pp = ip_vs_proto_get(iph->protocol); | |
@@ -755,13 +710,11 @@ | |
return NF_ACCEPT; | |
/* reassemble IP fragments */ | |
- if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) && | |
+ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && | |
!pp->dont_defrag)) { | |
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); | |
- if (!skb) | |
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) | |
return NF_STOLEN; | |
- iph = skb->nh.iph; | |
- *pskb = skb; | |
+ iph = ip_hdr(skb); | |
} | |
ihl = iph->ihl << 2; | |
@@ -803,25 +756,23 @@ | |
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | |
- if (!ip_vs_make_skb_writable(pskb, ihl)) | |
+ if (!skb_make_writable(skb, ihl)) | |
goto drop; | |
/* mangle the packet */ | |
- if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) | |
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | |
+ goto drop; | |
+ ip_hdr(skb)->saddr = cp->vaddr; | |
+ ip_send_check(ip_hdr(skb)); | |
+ | |
+ /* For policy routing, packets originating from this | |
+ * machine itself may be routed differently to packets | |
+ * passing through. We want this packet to be routed as | |
+ * if it came from this machine itself. So re-compute | |
+ * the routing information. | |
+ */ | |
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | |
goto drop; | |
- skb = *pskb; | |
- skb->nh.iph->saddr = cp->vaddr; | |
- ip_send_check(skb->nh.iph); | |
- | |
- /* For policy routing, packets originating from this | |
- * machine itself may be routed differently to packets | |
- * passing through. We want this packet to be routed as | |
- * if it came from this machine itself. So re-compute | |
- * the routing information. | |
- */ | |
- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) | |
- goto drop; | |
- skb = *pskb; | |
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | |
@@ -836,7 +787,7 @@ | |
drop: | |
ip_vs_conn_put(cp); | |
- kfree_skb(*pskb); | |
+ kfree_skb(skb); | |
return NF_STOLEN; | |
} | |
@@ -847,10 +798,9 @@ | |
* forward to the right destination host if relevant. | |
* Currently handles error types - unreachable, quench, ttl exceeded. | |
*/ | |
-static int | |
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) | |
+static int | |
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) | |
{ | |
- struct sk_buff *skb = *pskb; | |
struct iphdr *iph; | |
struct icmphdr _icmph, *ic; | |
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ | |
@@ -861,16 +811,13 @@ | |
*related = 1; | |
/* reassemble IP fragments */ | |
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { | |
- skb = ip_vs_gather_frags(skb, | |
- hooknum == NF_IP_LOCAL_IN ? | |
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); | |
- if (!skb) | |
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | |
+ if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? | |
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) | |
return NF_STOLEN; | |
- *pskb = skb; | |
} | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
offset = ihl = iph->ihl * 4; | |
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); | |
if (ic == NULL) | |
@@ -905,7 +852,7 @@ | |
return NF_ACCEPT; | |
/* Is the embedded protocol header present? */ | |
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) && | |
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) && | |
pp->dont_defrag)) | |
return NF_ACCEPT; | |
@@ -921,8 +868,7 @@ | |
verdict = NF_DROP; | |
/* Ensure the checksum is correct */ | |
- if (skb->ip_summed != CHECKSUM_UNNECESSARY && | |
- ip_vs_checksum_complete(skb, ihl)) { | |
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { | |
/* Failed checksum! */ | |
IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", | |
NIPQUAD(iph->saddr)); | |
@@ -947,11 +893,10 @@ | |
* and send it on its way... | |
*/ | |
static unsigned int | |
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, | |
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb, | |
const struct net_device *in, const struct net_device *out, | |
int (*okfn)(struct sk_buff *)) | |
{ | |
- struct sk_buff *skb = *pskb; | |
struct iphdr *iph; | |
struct ip_vs_protocol *pp; | |
struct ip_vs_conn *cp; | |
@@ -963,22 +908,21 @@ | |
* ... don't know why 1st test DOES NOT include 2nd (?) | |
*/ | |
if (unlikely(skb->pkt_type != PACKET_HOST | |
- || skb->dev == &loopback_dev || skb->sk)) { | |
+ || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { | |
IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", | |
skb->pkt_type, | |
- skb->nh.iph->protocol, | |
- NIPQUAD(skb->nh.iph->daddr)); | |
+ ip_hdr(skb)->protocol, | |
+ NIPQUAD(ip_hdr(skb)->daddr)); | |
return NF_ACCEPT; | |
} | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
if (unlikely(iph->protocol == IPPROTO_ICMP)) { | |
- int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); | |
+ int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); | |
if (related) | |
return verdict; | |
- skb = *pskb; | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
} | |
/* Protocol supported? */ | |
@@ -1033,15 +977,24 @@ | |
ret = NF_ACCEPT; | |
} | |
- /* increase its packet counter and check if it is needed | |
- to be synchronized */ | |
+ /* Increase its packet counter and check if it is needed | |
+ * to be synchronized | |
+ * | |
+ * Sync connection if it is about to close to | |
+ * encorage the standby servers to update the connections timeout | |
+ */ | |
atomic_inc(&cp->in_pkts); | |
if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && | |
- (cp->protocol != IPPROTO_TCP || | |
- cp->state == IP_VS_TCP_S_ESTABLISHED) && | |
- (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] | |
- == sysctl_ip_vs_sync_threshold[0])) | |
+ (((cp->protocol != IPPROTO_TCP || | |
+ cp->state == IP_VS_TCP_S_ESTABLISHED) && | |
+ (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] | |
+ == sysctl_ip_vs_sync_threshold[0])) || | |
+ ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && | |
+ ((cp->state == IP_VS_TCP_S_FIN_WAIT) || | |
+ (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || | |
+ (cp->state == IP_VS_TCP_S_TIME_WAIT))))) | |
ip_vs_sync_conn(cp); | |
+ cp->old_state = cp->state; | |
ip_vs_conn_put(cp); | |
return ret; | |
@@ -1049,65 +1002,64 @@ | |
/* | |
- * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP | |
+ * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP | |
* related packets destined for 0.0.0.0/0. | |
* When fwmark-based virtual service is used, such as transparent | |
* cache cluster, TCP packets can be marked and routed to ip_vs_in, | |
* but ICMP destined for 0.0.0.0/0 cannot not be easily marked and | |
- * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain | |
+ * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain | |
* and send them to ip_vs_in_icmp. | |
*/ | |
static unsigned int | |
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, | |
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, | |
const struct net_device *in, const struct net_device *out, | |
int (*okfn)(struct sk_buff *)) | |
{ | |
int r; | |
- if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP) | |
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP) | |
return NF_ACCEPT; | |
- return ip_vs_in_icmp(pskb, &r, hooknum); | |
+ return ip_vs_in_icmp(skb, &r, hooknum); | |
} | |
-/* After packet filtering, forward packet through VS/DR, VS/TUN, | |
- or VS/NAT(change destination), so that filtering rules can be | |
- applied to IPVS. */ | |
-static struct nf_hook_ops ip_vs_in_ops = { | |
- .hook = ip_vs_in, | |
- .owner = THIS_MODULE, | |
- .pf = PF_INET, | |
- .hooknum = NF_IP_LOCAL_IN, | |
- .priority = 100, | |
-}; | |
- | |
-/* After packet filtering, change source only for VS/NAT */ | |
-static struct nf_hook_ops ip_vs_out_ops = { | |
- .hook = ip_vs_out, | |
- .owner = THIS_MODULE, | |
- .pf = PF_INET, | |
- .hooknum = NF_IP_FORWARD, | |
- .priority = 100, | |
-}; | |
- | |
-/* After packet filtering (but before ip_vs_out_icmp), catch icmp | |
- destined for 0.0.0.0/0, which is for incoming IPVS connections */ | |
-static struct nf_hook_ops ip_vs_forward_icmp_ops = { | |
- .hook = ip_vs_forward_icmp, | |
- .owner = THIS_MODULE, | |
- .pf = PF_INET, | |
- .hooknum = NF_IP_FORWARD, | |
- .priority = 99, | |
-}; | |
- | |
-/* Before the netfilter connection tracking, exit from POST_ROUTING */ | |
-static struct nf_hook_ops ip_vs_post_routing_ops = { | |
- .hook = ip_vs_post_routing, | |
- .owner = THIS_MODULE, | |
- .pf = PF_INET, | |
- .hooknum = NF_IP_POST_ROUTING, | |
- .priority = NF_IP_PRI_NAT_SRC-1, | |
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = { | |
+ /* After packet filtering, forward packet through VS/DR, VS/TUN, | |
+ * or VS/NAT(change destination), so that filtering rules can be | |
+ * applied to IPVS. */ | |
+ { | |
+ .hook = ip_vs_in, | |
+ .owner = THIS_MODULE, | |
+ .pf = PF_INET, | |
+ .hooknum = NF_INET_LOCAL_IN, | |
+ .priority = 100, | |
+ }, | |
+ /* After packet filtering, change source only for VS/NAT */ | |
+ { | |
+ .hook = ip_vs_out, | |
+ .owner = THIS_MODULE, | |
+ .pf = PF_INET, | |
+ .hooknum = NF_INET_FORWARD, | |
+ .priority = 100, | |
+ }, | |
+ /* After packet filtering (but before ip_vs_out_icmp), catch icmp | |
+ * destined for 0.0.0.0/0, which is for incoming IPVS connections */ | |
+ { | |
+ .hook = ip_vs_forward_icmp, | |
+ .owner = THIS_MODULE, | |
+ .pf = PF_INET, | |
+ .hooknum = NF_INET_FORWARD, | |
+ .priority = 99, | |
+ }, | |
+ /* Before the netfilter connection tracking, exit from POST_ROUTING */ | |
+ { | |
+ .hook = ip_vs_post_routing, | |
+ .owner = THIS_MODULE, | |
+ .pf = PF_INET, | |
+ .hooknum = NF_INET_POST_ROUTING, | |
+ .priority = NF_IP_PRI_NAT_SRC-1, | |
+ }, | |
}; | |
@@ -1138,37 +1090,15 @@ | |
goto cleanup_app; | |
} | |
- ret = nf_register_hook(&ip_vs_in_ops); | |
+ ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | |
if (ret < 0) { | |
- IP_VS_ERR("can't register in hook.\n"); | |
+ IP_VS_ERR("can't register hooks.\n"); | |
goto cleanup_conn; | |
} | |
- ret = nf_register_hook(&ip_vs_out_ops); | |
- if (ret < 0) { | |
- IP_VS_ERR("can't register out hook.\n"); | |
- goto cleanup_inops; | |
- } | |
- ret = nf_register_hook(&ip_vs_post_routing_ops); | |
- if (ret < 0) { | |
- IP_VS_ERR("can't register post_routing hook.\n"); | |
- goto cleanup_outops; | |
- } | |
- ret = nf_register_hook(&ip_vs_forward_icmp_ops); | |
- if (ret < 0) { | |
- IP_VS_ERR("can't register forward_icmp hook.\n"); | |
- goto cleanup_postroutingops; | |
- } | |
- | |
IP_VS_INFO("ipvs loaded.\n"); | |
return ret; | |
- cleanup_postroutingops: | |
- nf_unregister_hook(&ip_vs_post_routing_ops); | |
- cleanup_outops: | |
- nf_unregister_hook(&ip_vs_out_ops); | |
- cleanup_inops: | |
- nf_unregister_hook(&ip_vs_in_ops); | |
cleanup_conn: | |
ip_vs_conn_cleanup(); | |
cleanup_app: | |
@@ -1182,10 +1112,7 @@ | |
static void __exit ip_vs_cleanup(void) | |
{ | |
- nf_unregister_hook(&ip_vs_forward_icmp_ops); | |
- nf_unregister_hook(&ip_vs_post_routing_ops); | |
- nf_unregister_hook(&ip_vs_out_ops); | |
- nf_unregister_hook(&ip_vs_in_ops); | |
+ nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | |
ip_vs_conn_cleanup(); | |
ip_vs_app_cleanup(); | |
ip_vs_protocol_cleanup(); | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -5,8 +5,6 @@ | |
* high-performance and highly available server based on a | |
* cluster of servers. | |
* | |
- * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* Julian Anastasov <ja@ssi.bg> | |
@@ -29,13 +27,13 @@ | |
#include <linux/proc_fs.h> | |
#include <linux/workqueue.h> | |
#include <linux/swap.h> | |
-#include <linux/proc_fs.h> | |
#include <linux/seq_file.h> | |
#include <linux/netfilter.h> | |
#include <linux/netfilter_ipv4.h> | |
#include <linux/mutex.h> | |
+#include <net/net_namespace.h> | |
#include <net/ip.h> | |
#include <net/route.h> | |
#include <net/sock.h> | |
@@ -579,6 +577,31 @@ | |
return NULL; | |
} | |
+/* | |
+ * Find destination by {daddr,dport,vaddr,protocol} | |
+ * Cretaed to be used in ip_vs_process_message() in | |
+ * the backup synchronization daemon. It finds the | |
+ * destination to be bound to the received connection | |
+ * on the backup. | |
+ * | |
+ * ip_vs_lookup_real_service() looked promissing, but | |
+ * seems not working as expected. | |
+ */ | |
+struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, | |
+ __be32 vaddr, __be16 vport, __u16 protocol) | |
+{ | |
+ struct ip_vs_dest *dest; | |
+ struct ip_vs_service *svc; | |
+ | |
+ svc = ip_vs_service_get(0, protocol, vaddr, vport); | |
+ if (!svc) | |
+ return NULL; | |
+ dest = ip_vs_lookup_dest(svc, daddr, dport); | |
+ if (dest) | |
+ atomic_inc(&dest->refcnt); | |
+ ip_vs_service_put(svc); | |
+ return dest; | |
+} | |
/* | |
* Lookup dest by {svc,addr,port} in the destination trash. | |
@@ -660,9 +683,22 @@ | |
ip_vs_zero_stats(struct ip_vs_stats *stats) | |
{ | |
spin_lock_bh(&stats->lock); | |
- memset(stats, 0, (char *)&stats->lock - (char *)stats); | |
- spin_unlock_bh(&stats->lock); | |
+ | |
+ stats->conns = 0; | |
+ stats->inpkts = 0; | |
+ stats->outpkts = 0; | |
+ stats->inbytes = 0; | |
+ stats->outbytes = 0; | |
+ | |
+ stats->cps = 0; | |
+ stats->inpps = 0; | |
+ stats->outpps = 0; | |
+ stats->inbps = 0; | |
+ stats->outbps = 0; | |
+ | |
ip_vs_zero_estimator(stats); | |
+ | |
+ spin_unlock_bh(&stats->lock); | |
} | |
/* | |
@@ -679,7 +715,7 @@ | |
conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; | |
/* check if local node and update the flags */ | |
- if (inet_addr_type(udest->addr) == RTN_LOCAL) { | |
+ if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { | |
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | |
| IP_VS_CONN_F_LOCALNODE; | |
} | |
@@ -731,7 +767,7 @@ | |
EnterFunction(2); | |
- atype = inet_addr_type(udest->addr); | |
+ atype = inet_addr_type(&init_net, udest->addr); | |
if (atype != RTN_LOCAL && atype != RTN_UNICAST) | |
return -EINVAL; | |
@@ -909,7 +945,7 @@ | |
write_lock_bh(&__ip_vs_svc_lock); | |
/* Wait until all other svc users go away */ | |
- while (atomic_read(&svc->usecnt) > 1) {}; | |
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); | |
/* call the update_service, because server weight may be changed */ | |
svc->scheduler->update_service(svc); | |
@@ -1399,7 +1435,6 @@ | |
static struct ctl_table vs_vars[] = { | |
{ | |
- .ctl_name = NET_IPV4_VS_AMEMTHRESH, | |
.procname = "amemthresh", | |
.data = &sysctl_ip_vs_amemthresh, | |
.maxlen = sizeof(int), | |
@@ -1408,7 +1443,6 @@ | |
}, | |
#ifdef CONFIG_IP_VS_DEBUG | |
{ | |
- .ctl_name = NET_IPV4_VS_DEBUG_LEVEL, | |
.procname = "debug_level", | |
.data = &sysctl_ip_vs_debug_level, | |
.maxlen = sizeof(int), | |
@@ -1417,7 +1451,6 @@ | |
}, | |
#endif | |
{ | |
- .ctl_name = NET_IPV4_VS_AMDROPRATE, | |
.procname = "am_droprate", | |
.data = &sysctl_ip_vs_am_droprate, | |
.maxlen = sizeof(int), | |
@@ -1425,7 +1458,6 @@ | |
.proc_handler = &proc_dointvec, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_DROP_ENTRY, | |
.procname = "drop_entry", | |
.data = &sysctl_ip_vs_drop_entry, | |
.maxlen = sizeof(int), | |
@@ -1433,7 +1465,6 @@ | |
.proc_handler = &proc_do_defense_mode, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_DROP_PACKET, | |
.procname = "drop_packet", | |
.data = &sysctl_ip_vs_drop_packet, | |
.maxlen = sizeof(int), | |
@@ -1441,7 +1472,6 @@ | |
.proc_handler = &proc_do_defense_mode, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_SECURE_TCP, | |
.procname = "secure_tcp", | |
.data = &sysctl_ip_vs_secure_tcp, | |
.maxlen = sizeof(int), | |
@@ -1450,7 +1480,6 @@ | |
}, | |
#if 0 | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_ES, | |
.procname = "timeout_established", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], | |
.maxlen = sizeof(int), | |
@@ -1458,7 +1487,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_SS, | |
.procname = "timeout_synsent", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], | |
.maxlen = sizeof(int), | |
@@ -1466,7 +1494,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_SR, | |
.procname = "timeout_synrecv", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], | |
.maxlen = sizeof(int), | |
@@ -1474,7 +1501,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_FW, | |
.procname = "timeout_finwait", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], | |
.maxlen = sizeof(int), | |
@@ -1482,7 +1508,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_TW, | |
.procname = "timeout_timewait", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], | |
.maxlen = sizeof(int), | |
@@ -1490,7 +1515,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_CL, | |
.procname = "timeout_close", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], | |
.maxlen = sizeof(int), | |
@@ -1498,7 +1522,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_CW, | |
.procname = "timeout_closewait", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], | |
.maxlen = sizeof(int), | |
@@ -1506,7 +1529,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_LA, | |
.procname = "timeout_lastack", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], | |
.maxlen = sizeof(int), | |
@@ -1514,7 +1536,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_LI, | |
.procname = "timeout_listen", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], | |
.maxlen = sizeof(int), | |
@@ -1522,7 +1543,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_SA, | |
.procname = "timeout_synack", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], | |
.maxlen = sizeof(int), | |
@@ -1530,7 +1550,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_UDP, | |
.procname = "timeout_udp", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], | |
.maxlen = sizeof(int), | |
@@ -1538,7 +1557,6 @@ | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_TO_ICMP, | |
.procname = "timeout_icmp", | |
.data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], | |
.maxlen = sizeof(int), | |
@@ -1547,7 +1565,6 @@ | |
}, | |
#endif | |
{ | |
- .ctl_name = NET_IPV4_VS_CACHE_BYPASS, | |
.procname = "cache_bypass", | |
.data = &sysctl_ip_vs_cache_bypass, | |
.maxlen = sizeof(int), | |
@@ -1555,7 +1572,6 @@ | |
.proc_handler = &proc_dointvec, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN, | |
.procname = "expire_nodest_conn", | |
.data = &sysctl_ip_vs_expire_nodest_conn, | |
.maxlen = sizeof(int), | |
@@ -1563,7 +1579,6 @@ | |
.proc_handler = &proc_dointvec, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, | |
.procname = "expire_quiescent_template", | |
.data = &sysctl_ip_vs_expire_quiescent_template, | |
.maxlen = sizeof(int), | |
@@ -1571,7 +1586,6 @@ | |
.proc_handler = &proc_dointvec, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD, | |
.procname = "sync_threshold", | |
.data = &sysctl_ip_vs_sync_threshold, | |
.maxlen = sizeof(sysctl_ip_vs_sync_threshold), | |
@@ -1579,7 +1593,6 @@ | |
.proc_handler = &proc_do_sync_threshold, | |
}, | |
{ | |
- .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND, | |
.procname = "nat_icmp_send", | |
.data = &sysctl_ip_vs_nat_icmp_send, | |
.maxlen = sizeof(int), | |
@@ -1589,35 +1602,13 @@ | |
{ .ctl_name = 0 } | |
}; | |
-static ctl_table vs_table[] = { | |
- { | |
- .ctl_name = NET_IPV4_VS, | |
- .procname = "vs", | |
- .mode = 0555, | |
- .child = vs_vars | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table ipvs_ipv4_table[] = { | |
- { | |
- .ctl_name = NET_IPV4, | |
- .procname = "ipv4", | |
- .mode = 0555, | |
- .child = vs_table, | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table vs_root_table[] = { | |
- { | |
- .ctl_name = CTL_NET, | |
- .procname = "net", | |
- .mode = 0555, | |
- .child = ipvs_ipv4_table, | |
- }, | |
- { .ctl_name = 0 } | |
+const struct ctl_path net_vs_ctl_path[] = { | |
+ { .procname = "net", .ctl_name = CTL_NET, }, | |
+ { .procname = "ipv4", .ctl_name = NET_IPV4, }, | |
+ { .procname = "vs", }, | |
+ { } | |
}; | |
+EXPORT_SYMBOL_GPL(net_vs_ctl_path); | |
static struct ctl_table_header * sysctl_header; | |
@@ -1783,7 +1774,7 @@ | |
return 0; | |
} | |
-static struct seq_operations ip_vs_info_seq_ops = { | |
+static const struct seq_operations ip_vs_info_seq_ops = { | |
.start = ip_vs_info_seq_start, | |
.next = ip_vs_info_seq_next, | |
.stop = ip_vs_info_seq_stop, | |
@@ -1792,27 +1783,11 @@ | |
static int ip_vs_info_open(struct inode *inode, struct file *file) | |
{ | |
- struct seq_file *seq; | |
- int rc = -ENOMEM; | |
- struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL); | |
- | |
- if (!s) | |
- goto out; | |
- | |
- rc = seq_open(file, &ip_vs_info_seq_ops); | |
- if (rc) | |
- goto out_kfree; | |
- | |
- seq = file->private_data; | |
- seq->private = s; | |
-out: | |
- return rc; | |
-out_kfree: | |
- kfree(s); | |
- goto out; | |
+ return seq_open_private(file, &ip_vs_info_seq_ops, | |
+ sizeof(struct ip_vs_iter)); | |
} | |
-static struct file_operations ip_vs_info_fops = { | |
+static const struct file_operations ip_vs_info_fops = { | |
.owner = THIS_MODULE, | |
.open = ip_vs_info_open, | |
.read = seq_read, | |
@@ -1822,7 +1797,9 @@ | |
#endif | |
-struct ip_vs_stats ip_vs_stats; | |
+struct ip_vs_stats ip_vs_stats = { | |
+ .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), | |
+}; | |
#ifdef CONFIG_PROC_FS | |
static int ip_vs_stats_show(struct seq_file *seq, void *v) | |
@@ -1859,7 +1836,7 @@ | |
return single_open(file, ip_vs_stats_show, NULL); | |
} | |
-static struct file_operations ip_vs_stats_fops = { | |
+static const struct file_operations ip_vs_stats_fops = { | |
.owner = THIS_MODULE, | |
.open = ip_vs_stats_seq_open, | |
.read = seq_read, | |
@@ -2340,10 +2317,11 @@ | |
.get_optmin = IP_VS_BASE_CTL, | |
.get_optmax = IP_VS_SO_GET_MAX+1, | |
.get = do_ip_vs_get_ctl, | |
+ .owner = THIS_MODULE, | |
}; | |
-int ip_vs_control_init(void) | |
+int __init ip_vs_control_init(void) | |
{ | |
int ret; | |
int idx; | |
@@ -2356,10 +2334,10 @@ | |
return ret; | |
} | |
- proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); | |
- proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); | |
+ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); | |
+ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); | |
- sysctl_header = register_sysctl_table(vs_root_table, 0); | |
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); | |
/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ | |
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { | |
@@ -2370,8 +2348,6 @@ | |
INIT_LIST_HEAD(&ip_vs_rtable[idx]); | |
} | |
- memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); | |
- spin_lock_init(&ip_vs_stats.lock); | |
ip_vs_new_estimator(&ip_vs_stats); | |
/* Hook the defense timer */ | |
@@ -2387,10 +2363,11 @@ | |
EnterFunction(2); | |
ip_vs_trash_cleanup(); | |
cancel_rearming_delayed_work(&defense_work); | |
+ cancel_work_sync(&defense_work.work); | |
ip_vs_kill_estimator(&ip_vs_stats); | |
unregister_sysctl_table(sysctl_header); | |
- proc_net_remove("ip_vs_stats"); | |
- proc_net_remove("ip_vs"); | |
+ proc_net_remove(&init_net, "ip_vs_stats"); | |
+ proc_net_remove(&init_net, "ip_vs"); | |
nf_unregister_sockopt(&ip_vs_sockopts); | |
LeaveFunction(2); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Destination Hashing scheduling module | |
* | |
- * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@gnuchina.org> | |
* | |
* Inspired by the consistent hashing scheduler patch from | |
@@ -204,7 +202,7 @@ | |
{ | |
struct ip_vs_dest *dest; | |
struct ip_vs_dh_bucket *tbl; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); | |
@@ -235,6 +233,7 @@ | |
.name = "dh", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), | |
.init_service = ip_vs_dh_init_svc, | |
.done_service = ip_vs_dh_done_svc, | |
.update_service = ip_vs_dh_update_svc, | |
@@ -244,7 +243,6 @@ | |
static int __init ip_vs_dh_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_dh_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_est.c: simple rate estimator for IPVS | |
* | |
- * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -18,6 +16,8 @@ | |
#include <linux/slab.h> | |
#include <linux/types.h> | |
#include <linux/interrupt.h> | |
+#include <linux/sysctl.h> | |
+#include <linux/list.h> | |
#include <net/ip_vs.h> | |
@@ -45,28 +45,11 @@ | |
*/ | |
-struct ip_vs_estimator | |
-{ | |
- struct ip_vs_estimator *next; | |
- struct ip_vs_stats *stats; | |
+static void estimation_timer(unsigned long arg); | |
- u32 last_conns; | |
- u32 last_inpkts; | |
- u32 last_outpkts; | |
- u64 last_inbytes; | |
- u64 last_outbytes; | |
- | |
- u32 cps; | |
- u32 inpps; | |
- u32 outpps; | |
- u32 inbps; | |
- u32 outbps; | |
-}; | |
- | |
- | |
-static struct ip_vs_estimator *est_list = NULL; | |
-static DEFINE_RWLOCK(est_lock); | |
-static struct timer_list est_timer; | |
+static LIST_HEAD(est_list); | |
+static DEFINE_SPINLOCK(est_lock); | |
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); | |
static void estimation_timer(unsigned long arg) | |
{ | |
@@ -77,9 +60,9 @@ | |
u64 n_inbytes, n_outbytes; | |
u32 rate; | |
- read_lock(&est_lock); | |
- for (e = est_list; e; e = e->next) { | |
- s = e->stats; | |
+ spin_lock(&est_lock); | |
+ list_for_each_entry(e, &est_list, list) { | |
+ s = container_of(e, struct ip_vs_stats, est); | |
spin_lock(&s->lock); | |
n_conns = s->conns; | |
@@ -115,19 +98,16 @@ | |
s->outbps = (e->outbps+0xF)>>5; | |
spin_unlock(&s->lock); | |
} | |
- read_unlock(&est_lock); | |
+ spin_unlock(&est_lock); | |
mod_timer(&est_timer, jiffies + 2*HZ); | |
} | |
-int ip_vs_new_estimator(struct ip_vs_stats *stats) | |
+void ip_vs_new_estimator(struct ip_vs_stats *stats) | |
{ | |
- struct ip_vs_estimator *est; | |
+ struct ip_vs_estimator *est = &stats->est; | |
- est = kzalloc(sizeof(*est), GFP_KERNEL); | |
- if (est == NULL) | |
- return -ENOMEM; | |
+ INIT_LIST_HEAD(&est->list); | |
- est->stats = stats; | |
est->last_conns = stats->conns; | |
est->cps = stats->cps<<10; | |
@@ -143,60 +123,40 @@ | |
est->last_outbytes = stats->outbytes; | |
est->outbps = stats->outbps<<5; | |
- write_lock_bh(&est_lock); | |
- est->next = est_list; | |
- if (est->next == NULL) { | |
- init_timer(&est_timer); | |
- est_timer.expires = jiffies + 2*HZ; | |
- est_timer.function = estimation_timer; | |
- add_timer(&est_timer); | |
- } | |
- est_list = est; | |
- write_unlock_bh(&est_lock); | |
- return 0; | |
+ spin_lock_bh(&est_lock); | |
+ if (list_empty(&est_list)) | |
+ mod_timer(&est_timer, jiffies + 2 * HZ); | |
+ list_add(&est->list, &est_list); | |
+ spin_unlock_bh(&est_lock); | |
} | |
void ip_vs_kill_estimator(struct ip_vs_stats *stats) | |
{ | |
- struct ip_vs_estimator *est, **pest; | |
- int killed = 0; | |
+ struct ip_vs_estimator *est = &stats->est; | |
- write_lock_bh(&est_lock); | |
- pest = &est_list; | |
- while ((est=*pest) != NULL) { | |
- if (est->stats != stats) { | |
- pest = &est->next; | |
- continue; | |
- } | |
- *pest = est->next; | |
- kfree(est); | |
- killed++; | |
+ spin_lock_bh(&est_lock); | |
+ list_del(&est->list); | |
+ while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { | |
+ spin_unlock_bh(&est_lock); | |
+ cpu_relax(); | |
+ spin_lock_bh(&est_lock); | |
} | |
- if (killed && est_list == NULL) | |
- del_timer_sync(&est_timer); | |
- write_unlock_bh(&est_lock); | |
+ spin_unlock_bh(&est_lock); | |
} | |
void ip_vs_zero_estimator(struct ip_vs_stats *stats) | |
{ | |
- struct ip_vs_estimator *e; | |
+ struct ip_vs_estimator *est = &stats->est; | |
- write_lock_bh(&est_lock); | |
- for (e = est_list; e; e = e->next) { | |
- if (e->stats != stats) | |
- continue; | |
- | |
- /* set counters zero */ | |
- e->last_conns = 0; | |
- e->last_inpkts = 0; | |
- e->last_outpkts = 0; | |
- e->last_inbytes = 0; | |
- e->last_outbytes = 0; | |
- e->cps = 0; | |
- e->inpps = 0; | |
- e->outpps = 0; | |
- e->inbps = 0; | |
- e->outbps = 0; | |
- } | |
- write_unlock_bh(&est_lock); | |
+ /* set counters zero, caller must hold the stats->lock lock */ | |
+ est->last_inbytes = 0; | |
+ est->last_outbytes = 0; | |
+ est->last_conns = 0; | |
+ est->last_inpkts = 0; | |
+ est->last_outpkts = 0; | |
+ est->cps = 0; | |
+ est->inpps = 0; | |
+ est->outpps = 0; | |
+ est->inbps = 0; | |
+ est->outbps = 0; | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_ftp.c: IPVS ftp application module | |
* | |
- * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* Changes: | |
@@ -30,6 +28,7 @@ | |
#include <linux/skbuff.h> | |
#include <linux/in.h> | |
#include <linux/ip.h> | |
+#include <linux/netfilter.h> | |
#include <net/protocol.h> | |
#include <net/tcp.h> | |
#include <asm/unaligned.h> | |
@@ -135,7 +134,7 @@ | |
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. | |
*/ | |
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, | |
- struct sk_buff **pskb, int *diff) | |
+ struct sk_buff *skb, int *diff) | |
{ | |
struct iphdr *iph; | |
struct tcphdr *th; | |
@@ -155,14 +154,14 @@ | |
return 1; | |
/* Linear packets are much easier to deal with. */ | |
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) | |
+ if (!skb_make_writable(skb, skb->len)) | |
return 0; | |
if (cp->app_data == &ip_vs_ftp_pasv) { | |
- iph = (*pskb)->nh.iph; | |
+ iph = ip_hdr(skb); | |
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); | |
data = (char *)th + (th->doff << 2); | |
- data_limit = (*pskb)->tail; | |
+ data_limit = skb_tail_pointer(skb); | |
if (ip_vs_ftp_get_addrport(data, data_limit, | |
SERVER_STRING, | |
@@ -213,7 +212,7 @@ | |
memcpy(start, buf, buf_len); | |
ret = 1; | |
} else { | |
- ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start, | |
+ ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, | |
end-start, buf, buf_len); | |
} | |
@@ -238,7 +237,7 @@ | |
* the client. | |
*/ | |
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, | |
- struct sk_buff **pskb, int *diff) | |
+ struct sk_buff *skb, int *diff) | |
{ | |
struct iphdr *iph; | |
struct tcphdr *th; | |
@@ -256,20 +255,20 @@ | |
return 1; | |
/* Linear packets are much easier to deal with. */ | |
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) | |
+ if (!skb_make_writable(skb, skb->len)) | |
return 0; | |
/* | |
* Detecting whether it is passive | |
*/ | |
- iph = (*pskb)->nh.iph; | |
+ iph = ip_hdr(skb); | |
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); | |
/* Since there may be OPTIONS in the TCP packet and the HLEN is | |
the length of the header in 32-bit multiples, it is accurate | |
to calculate data address by th+HLEN*4 */ | |
data = data_start = (char *)th + (th->doff << 2); | |
- data_limit = (*pskb)->tail; | |
+ data_limit = skb_tail_pointer(skb); | |
while (data <= data_limit - 6) { | |
if (strnicmp(data, "PASV\r\n", 6) == 0) { | |
@@ -370,7 +369,7 @@ | |
if (ret) | |
break; | |
IP_VS_INFO("%s: loaded support on port[%d] = %d\n", | |
- app->name, i, ports[i]); | |
+ app->name, i, ports[i]); | |
} | |
if (ret) | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Locality-Based Least-Connection scheduling module | |
* | |
- * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@gnuchina.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -114,46 +112,15 @@ | |
static ctl_table vs_vars_table[] = { | |
{ | |
- .ctl_name = NET_IPV4_VS_LBLC_EXPIRE, | |
.procname = "lblc_expiration", | |
.data = &sysctl_ip_vs_lblc_expiration, | |
.maxlen = sizeof(int), | |
- .mode = 0644, | |
+ .mode = 0644, | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ .ctl_name = 0 } | |
}; | |
-static ctl_table vs_table[] = { | |
- { | |
- .ctl_name = NET_IPV4_VS, | |
- .procname = "vs", | |
- .mode = 0555, | |
- .child = vs_vars_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table ipvs_ipv4_table[] = { | |
- { | |
- .ctl_name = NET_IPV4, | |
- .procname = "ipv4", | |
- .mode = 0555, | |
- .child = vs_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table lblc_root_table[] = { | |
- { | |
- .ctl_name = CTL_NET, | |
- .procname = "net", | |
- .mode = 0555, | |
- .child = ipvs_ipv4_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
static struct ctl_table_header * sysctl_header; | |
/* | |
@@ -288,7 +255,7 @@ | |
write_lock(&tbl->lock); | |
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | |
- if (time_before(now, | |
+ if (time_before(now, | |
en->lastuse + sysctl_ip_vs_lblc_expiration)) | |
continue; | |
@@ -393,9 +360,8 @@ | |
/* | |
* Hook periodic timer for garbage collection | |
*/ | |
- init_timer(&tbl->periodic_timer); | |
- tbl->periodic_timer.data = (unsigned long)tbl; | |
- tbl->periodic_timer.function = ip_vs_lblc_check_expire; | |
+ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, | |
+ (unsigned long)tbl); | |
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | |
add_timer(&tbl->periodic_timer); | |
@@ -521,7 +487,7 @@ | |
struct ip_vs_dest *dest; | |
struct ip_vs_lblc_table *tbl; | |
struct ip_vs_lblc_entry *en; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); | |
@@ -573,6 +539,7 @@ | |
.name = "lblc", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), | |
.init_service = ip_vs_lblc_init_svc, | |
.done_service = ip_vs_lblc_done_svc, | |
.update_service = ip_vs_lblc_update_svc, | |
@@ -582,9 +549,13 @@ | |
static int __init ip_vs_lblc_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list); | |
- sysctl_header = register_sysctl_table(lblc_root_table, 0); | |
- return register_ip_vs_scheduler(&ip_vs_lblc_scheduler); | |
+ int ret; | |
+ | |
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | |
+ ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); | |
+ if (ret) | |
+ unregister_sysctl_table(sysctl_header); | |
+ return ret; | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Locality-Based Least-Connection with Replication scheduler | |
* | |
- * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@gnuchina.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -48,8 +46,7 @@ | |
/* for sysctl */ | |
#include <linux/fs.h> | |
#include <linux/sysctl.h> | |
-/* for proc_net_create/proc_net_remove */ | |
-#include <linux/proc_fs.h> | |
+#include <net/net_namespace.h> | |
#include <net/ip_vs.h> | |
@@ -303,46 +300,15 @@ | |
static ctl_table vs_vars_table[] = { | |
{ | |
- .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE, | |
.procname = "lblcr_expiration", | |
.data = &sysctl_ip_vs_lblcr_expiration, | |
.maxlen = sizeof(int), | |
- .mode = 0644, | |
+ .mode = 0644, | |
.proc_handler = &proc_dointvec_jiffies, | |
}, | |
{ .ctl_name = 0 } | |
}; | |
-static ctl_table vs_table[] = { | |
- { | |
- .ctl_name = NET_IPV4_VS, | |
- .procname = "vs", | |
- .mode = 0555, | |
- .child = vs_vars_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table ipvs_ipv4_table[] = { | |
- { | |
- .ctl_name = NET_IPV4, | |
- .procname = "ipv4", | |
- .mode = 0555, | |
- .child = vs_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
-static ctl_table lblcr_root_table[] = { | |
- { | |
- .ctl_name = CTL_NET, | |
- .procname = "net", | |
- .mode = 0555, | |
- .child = ipvs_ipv4_table | |
- }, | |
- { .ctl_name = 0 } | |
-}; | |
- | |
static struct ctl_table_header * sysctl_header; | |
/* | |
@@ -546,71 +512,6 @@ | |
mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL); | |
} | |
- | |
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG | |
-static struct ip_vs_lblcr_table *lblcr_table_list; | |
- | |
-/* | |
- * /proc/net/ip_vs_lblcr to display the mappings of | |
- * destination IP address <==> its serverSet | |
- */ | |
-static int | |
-ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length) | |
-{ | |
- off_t pos=0, begin; | |
- int len=0, size; | |
- struct ip_vs_lblcr_table *tbl; | |
- unsigned long now = jiffies; | |
- int i; | |
- struct ip_vs_lblcr_entry *en; | |
- | |
- tbl = lblcr_table_list; | |
- | |
- size = sprintf(buffer, "LastTime Dest IP address Server set\n"); | |
- pos += size; | |
- len += size; | |
- | |
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { | |
- read_lock_bh(&tbl->lock); | |
- list_for_each_entry(en, &tbl->bucket[i], list) { | |
- char tbuf[16]; | |
- struct ip_vs_dest_list *d; | |
- | |
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr)); | |
- size = sprintf(buffer+len, "%8lu %-16s ", | |
- now-en->lastuse, tbuf); | |
- | |
- read_lock(&en->set.lock); | |
- for (d=en->set.list; d!=NULL; d=d->next) { | |
- size += sprintf(buffer+len+size, | |
- "%u.%u.%u.%u ", | |
- NIPQUAD(d->dest->addr)); | |
- } | |
- read_unlock(&en->set.lock); | |
- size += sprintf(buffer+len+size, "\n"); | |
- len += size; | |
- pos += size; | |
- if (pos <= offset) | |
- len=0; | |
- if (pos >= offset+length) { | |
- read_unlock_bh(&tbl->lock); | |
- goto done; | |
- } | |
- } | |
- read_unlock_bh(&tbl->lock); | |
- } | |
- | |
- done: | |
- begin = len - (pos - offset); | |
- *start = buffer + begin; | |
- len -= begin; | |
- if(len>length) | |
- len = length; | |
- return len; | |
-} | |
-#endif | |
- | |
- | |
static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |
{ | |
int i; | |
@@ -643,15 +544,11 @@ | |
/* | |
* Hook periodic timer for garbage collection | |
*/ | |
- init_timer(&tbl->periodic_timer); | |
- tbl->periodic_timer.data = (unsigned long)tbl; | |
- tbl->periodic_timer.function = ip_vs_lblcr_check_expire; | |
+ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, | |
+ (unsigned long)tbl); | |
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | |
add_timer(&tbl->periodic_timer); | |
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG | |
- lblcr_table_list = tbl; | |
-#endif | |
return 0; | |
} | |
@@ -775,7 +672,7 @@ | |
struct ip_vs_dest *dest; | |
struct ip_vs_lblcr_table *tbl; | |
struct ip_vs_lblcr_entry *en; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); | |
@@ -831,6 +728,7 @@ | |
.name = "lblcr", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), | |
.init_service = ip_vs_lblcr_init_svc, | |
.done_service = ip_vs_lblcr_done_svc, | |
.update_service = ip_vs_lblcr_update_svc, | |
@@ -840,20 +738,18 @@ | |
static int __init ip_vs_lblcr_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); | |
- sysctl_header = register_sysctl_table(lblcr_root_table, 0); | |
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG | |
- proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); | |
-#endif | |
- return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | |
+ int ret; | |
+ | |
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); | |
+ ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | |
+ if (ret) | |
+ unregister_sysctl_table(sysctl_header); | |
+ return ret; | |
} | |
static void __exit ip_vs_lblcr_cleanup(void) | |
{ | |
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG | |
- proc_net_remove("ip_vs_lblcr"); | |
-#endif | |
unregister_sysctl_table(sysctl_header); | |
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Least-Connection Scheduling module | |
* | |
- * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -100,6 +98,7 @@ | |
.name = "lc", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), | |
.init_service = ip_vs_lc_init_svc, | |
.done_service = ip_vs_lc_done_svc, | |
.update_service = ip_vs_lc_update_svc, | |
@@ -109,7 +108,6 @@ | |
static int __init ip_vs_lc_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ; | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Never Queue scheduling module | |
* | |
- * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -138,6 +136,7 @@ | |
.name = "nq", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), | |
.init_service = ip_vs_nq_init_svc, | |
.done_service = ip_vs_nq_done_svc, | |
.update_service = ip_vs_nq_update_svc, | |
@@ -147,7 +146,6 @@ | |
static int __init ip_vs_nq_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_nq_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_proto.c: transport protocol load balancing support for IPVS | |
* | |
- * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Julian Anastasov <ja@ssi.bg> | |
* | |
@@ -45,7 +43,7 @@ | |
/* | |
* register an ipvs protocol | |
*/ | |
-static int register_ip_vs_protocol(struct ip_vs_protocol *pp) | |
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) | |
{ | |
unsigned hash = IP_VS_PROTO_HASH(pp->protocol); | |
@@ -148,7 +146,7 @@ | |
struct ip_vs_protocol *pp = ip_vs_proto_get(proto); | |
if (pp == NULL || pp->state_name == NULL) | |
- return "ERR!"; | |
+ return (IPPROTO_IP == proto) ? "NONE" : "ERR!"; | |
return pp->state_name(state); | |
} | |
@@ -165,7 +163,7 @@ | |
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | |
if (ih == NULL) | |
sprintf(buf, "%s TRUNCATED", pp->name); | |
- else if (ih->frag_off & __constant_htons(IP_OFFSET)) | |
+ else if (ih->frag_off & htons(IP_OFFSET)) | |
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", | |
pp->name, NIPQUAD(ih->saddr), | |
NIPQUAD(ih->daddr)); | |
@@ -192,7 +190,7 @@ | |
} | |
-int ip_vs_protocol_init(void) | |
+int __init ip_vs_protocol_init(void) | |
{ | |
char protocols[64]; | |
#define REGISTER_PROTOCOL(p) \ | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS | |
* | |
- * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ | |
- * | |
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | |
* Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
@@ -52,15 +50,15 @@ | |
if (likely(!inverse)) { | |
cp = ip_vs_conn_in_get(IPPROTO_UDP, | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} else { | |
cp = ip_vs_conn_in_get(IPPROTO_UDP, | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} | |
if (!cp) { | |
@@ -89,15 +87,15 @@ | |
if (likely(!inverse)) { | |
cp = ip_vs_conn_out_get(IPPROTO_UDP, | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} else { | |
cp = ip_vs_conn_out_get(IPPROTO_UDP, | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} | |
if (!cp) { | |
@@ -160,6 +158,7 @@ | |
struct ip_vs_protocol ip_vs_protocol_ah = { | |
.name = "AH", | |
.protocol = IPPROTO_AH, | |
+ .num_states = 1, | |
.dont_defrag = 1, | |
.init = ah_init, | |
.exit = ah_exit, | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS | |
* | |
- * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $ | |
- * | |
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | |
* Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
@@ -52,15 +50,15 @@ | |
if (likely(!inverse)) { | |
cp = ip_vs_conn_in_get(IPPROTO_UDP, | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} else { | |
cp = ip_vs_conn_in_get(IPPROTO_UDP, | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} | |
if (!cp) { | |
@@ -89,15 +87,15 @@ | |
if (likely(!inverse)) { | |
cp = ip_vs_conn_out_get(IPPROTO_UDP, | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} else { | |
cp = ip_vs_conn_out_get(IPPROTO_UDP, | |
iph->daddr, | |
- __constant_htons(PORT_ISAKMP), | |
+ htons(PORT_ISAKMP), | |
iph->saddr, | |
- __constant_htons(PORT_ISAKMP)); | |
+ htons(PORT_ISAKMP)); | |
} | |
if (!cp) { | |
@@ -159,6 +157,7 @@ | |
struct ip_vs_protocol ip_vs_protocol_esp = { | |
.name = "ESP", | |
.protocol = IPPROTO_ESP, | |
+ .num_states = 1, | |
.dont_defrag = 1, | |
.init = esp_init, | |
.exit = esp_exit, | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_proto_tcp.c: TCP load balancing support for IPVS | |
* | |
- * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Julian Anastasov <ja@ssi.bg> | |
* | |
@@ -20,6 +18,7 @@ | |
#include <linux/tcp.h> /* for tcphdr */ | |
#include <net/ip.h> | |
#include <net/tcp.h> /* for csum_tcpudp_magic */ | |
+#include <linux/netfilter.h> | |
#include <linux/netfilter_ipv4.h> | |
#include <net/ip_vs.h> | |
@@ -76,16 +75,15 @@ | |
struct ip_vs_service *svc; | |
struct tcphdr _tcph, *th; | |
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4, | |
- sizeof(_tcph), &_tcph); | |
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | |
if (th == NULL) { | |
*verdict = NF_DROP; | |
return 0; | |
} | |
if (th->syn && | |
- (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, | |
- skb->nh.iph->daddr, th->dest))) { | |
+ (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, | |
+ ip_hdr(skb)->daddr, th->dest))) { | |
if (ip_vs_todrop()) { | |
/* | |
* It seems that we are very loaded. | |
@@ -123,27 +121,27 @@ | |
static int | |
-tcp_snat_handler(struct sk_buff **pskb, | |
+tcp_snat_handler(struct sk_buff *skb, | |
struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | |
{ | |
struct tcphdr *tcph; | |
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; | |
+ const unsigned int tcphoff = ip_hdrlen(skb); | |
/* csum_check requires unshared skb */ | |
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) | |
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) | |
return 0; | |
if (unlikely(cp->app != NULL)) { | |
/* Some checks before mangling */ | |
- if (pp->csum_check && !pp->csum_check(*pskb, pp)) | |
+ if (pp->csum_check && !pp->csum_check(skb, pp)) | |
return 0; | |
/* Call application helper if needed */ | |
- if (!ip_vs_app_pkt_out(cp, pskb)) | |
+ if (!ip_vs_app_pkt_out(cp, skb)) | |
return 0; | |
} | |
- tcph = (void *)(*pskb)->nh.iph + tcphoff; | |
+ tcph = (void *)ip_hdr(skb) + tcphoff; | |
tcph->source = cp->vport; | |
/* Adjust TCP checksums */ | |
@@ -151,17 +149,15 @@ | |
/* Only port and addr are changed, do fast csum update */ | |
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, | |
cp->dport, cp->vport); | |
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) | |
- (*pskb)->ip_summed = CHECKSUM_NONE; | |
+ if (skb->ip_summed == CHECKSUM_COMPLETE) | |
+ skb->ip_summed = CHECKSUM_NONE; | |
} else { | |
/* full checksum calculation */ | |
tcph->check = 0; | |
- (*pskb)->csum = skb_checksum(*pskb, tcphoff, | |
- (*pskb)->len - tcphoff, 0); | |
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | |
tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, | |
- (*pskb)->len - tcphoff, | |
- cp->protocol, | |
- (*pskb)->csum); | |
+ skb->len - tcphoff, | |
+ cp->protocol, skb->csum); | |
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | |
pp->name, tcph->check, | |
(char*)&(tcph->check) - (char*)tcph); | |
@@ -171,30 +167,30 @@ | |
static int | |
-tcp_dnat_handler(struct sk_buff **pskb, | |
+tcp_dnat_handler(struct sk_buff *skb, | |
struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | |
{ | |
struct tcphdr *tcph; | |
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4; | |
+ const unsigned int tcphoff = ip_hdrlen(skb); | |
/* csum_check requires unshared skb */ | |
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) | |
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) | |
return 0; | |
if (unlikely(cp->app != NULL)) { | |
/* Some checks before mangling */ | |
- if (pp->csum_check && !pp->csum_check(*pskb, pp)) | |
+ if (pp->csum_check && !pp->csum_check(skb, pp)) | |
return 0; | |
/* | |
* Attempt ip_vs_app call. | |
* It will fix ip_vs_conn and iph ack_seq stuff | |
*/ | |
- if (!ip_vs_app_pkt_in(cp, pskb)) | |
+ if (!ip_vs_app_pkt_in(cp, skb)) | |
return 0; | |
} | |
- tcph = (void *)(*pskb)->nh.iph + tcphoff; | |
+ tcph = (void *)ip_hdr(skb) + tcphoff; | |
tcph->dest = cp->dport; | |
/* | |
@@ -204,18 +200,16 @@ | |
/* Only port and addr are changed, do fast csum update */ | |
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, | |
cp->vport, cp->dport); | |
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) | |
- (*pskb)->ip_summed = CHECKSUM_NONE; | |
+ if (skb->ip_summed == CHECKSUM_COMPLETE) | |
+ skb->ip_summed = CHECKSUM_NONE; | |
} else { | |
/* full checksum calculation */ | |
tcph->check = 0; | |
- (*pskb)->csum = skb_checksum(*pskb, tcphoff, | |
- (*pskb)->len - tcphoff, 0); | |
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | |
tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, | |
- (*pskb)->len - tcphoff, | |
- cp->protocol, | |
- (*pskb)->csum); | |
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; | |
+ skb->len - tcphoff, | |
+ cp->protocol, skb->csum); | |
+ skb->ip_summed = CHECKSUM_UNNECESSARY; | |
} | |
return 1; | |
} | |
@@ -224,15 +218,15 @@ | |
static int | |
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |
{ | |
- unsigned int tcphoff = skb->nh.iph->ihl*4; | |
+ const unsigned int tcphoff = ip_hdrlen(skb); | |
switch (skb->ip_summed) { | |
case CHECKSUM_NONE: | |
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); | |
case CHECKSUM_COMPLETE: | |
- if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, | |
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | |
skb->len - tcphoff, | |
- skb->nh.iph->protocol, skb->csum)) { | |
+ ip_hdr(skb)->protocol, skb->csum)) { | |
IP_VS_DBG_RL_PKT(0, pp, skb, 0, | |
"Failed checksum for"); | |
return 0; | |
@@ -467,8 +461,7 @@ | |
{ | |
struct tcphdr _tcph, *th; | |
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4, | |
- sizeof(_tcph), &_tcph); | |
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); | |
if (th == NULL) | |
return 0; | |
@@ -555,7 +548,7 @@ | |
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | |
"%u.%u.%u.%u:%u to app %s on port %u\n", | |
- __FUNCTION__, | |
+ __func__, | |
NIPQUAD(cp->caddr), ntohs(cp->cport), | |
NIPQUAD(cp->vaddr), ntohs(cp->vport), | |
inc->name, ntohs(inc->port)); | |
@@ -599,6 +592,7 @@ | |
struct ip_vs_protocol ip_vs_protocol_tcp = { | |
.name = "TCP", | |
.protocol = IPPROTO_TCP, | |
+ .num_states = IP_VS_TCP_S_LAST, | |
.dont_defrag = 0, | |
.appcnt = ATOMIC_INIT(0), | |
.init = ip_vs_tcp_init, | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_proto_udp.c: UDP load balancing support for IPVS | |
* | |
- * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Julian Anastasov <ja@ssi.bg> | |
* | |
@@ -18,11 +16,12 @@ | |
#include <linux/in.h> | |
#include <linux/ip.h> | |
#include <linux/kernel.h> | |
+#include <linux/netfilter.h> | |
#include <linux/netfilter_ipv4.h> | |
#include <linux/udp.h> | |
#include <net/ip_vs.h> | |
- | |
+#include <net/ip.h> | |
static struct ip_vs_conn * | |
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |
@@ -56,7 +55,7 @@ | |
struct ip_vs_conn *cp; | |
__be16 _ports[2], *pptr; | |
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, | |
+ pptr = skb_header_pointer(skb, ip_hdrlen(skb), | |
sizeof(_ports), _ports); | |
if (pptr == NULL) | |
return NULL; | |
@@ -82,15 +81,15 @@ | |
struct ip_vs_service *svc; | |
struct udphdr _udph, *uh; | |
- uh = skb_header_pointer(skb, skb->nh.iph->ihl*4, | |
+ uh = skb_header_pointer(skb, ip_hdrlen(skb), | |
sizeof(_udph), &_udph); | |
if (uh == NULL) { | |
*verdict = NF_DROP; | |
return 0; | |
} | |
- if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol, | |
- skb->nh.iph->daddr, uh->dest))) { | |
+ if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, | |
+ ip_hdr(skb)->daddr, uh->dest))) { | |
if (ip_vs_todrop()) { | |
/* | |
* It seems that we are very loaded. | |
@@ -129,29 +128,29 @@ | |
} | |
static int | |
-udp_snat_handler(struct sk_buff **pskb, | |
+udp_snat_handler(struct sk_buff *skb, | |
struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | |
{ | |
struct udphdr *udph; | |
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; | |
+ const unsigned int udphoff = ip_hdrlen(skb); | |
/* csum_check requires unshared skb */ | |
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) | |
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) | |
return 0; | |
if (unlikely(cp->app != NULL)) { | |
/* Some checks before mangling */ | |
- if (pp->csum_check && !pp->csum_check(*pskb, pp)) | |
+ if (pp->csum_check && !pp->csum_check(skb, pp)) | |
return 0; | |
/* | |
* Call application helper if needed | |
*/ | |
- if (!ip_vs_app_pkt_out(cp, pskb)) | |
+ if (!ip_vs_app_pkt_out(cp, skb)) | |
return 0; | |
} | |
- udph = (void *)(*pskb)->nh.iph + udphoff; | |
+ udph = (void *)ip_hdr(skb) + udphoff; | |
udph->source = cp->vport; | |
/* | |
@@ -161,17 +160,15 @@ | |
/* Only port and addr are changed, do fast csum update */ | |
udp_fast_csum_update(udph, cp->daddr, cp->vaddr, | |
cp->dport, cp->vport); | |
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) | |
- (*pskb)->ip_summed = CHECKSUM_NONE; | |
+ if (skb->ip_summed == CHECKSUM_COMPLETE) | |
+ skb->ip_summed = CHECKSUM_NONE; | |
} else { | |
/* full checksum calculation */ | |
udph->check = 0; | |
- (*pskb)->csum = skb_checksum(*pskb, udphoff, | |
- (*pskb)->len - udphoff, 0); | |
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); | |
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, | |
- (*pskb)->len - udphoff, | |
- cp->protocol, | |
- (*pskb)->csum); | |
+ skb->len - udphoff, | |
+ cp->protocol, skb->csum); | |
if (udph->check == 0) | |
udph->check = CSUM_MANGLED_0; | |
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", | |
@@ -183,30 +180,30 @@ | |
static int | |
-udp_dnat_handler(struct sk_buff **pskb, | |
+udp_dnat_handler(struct sk_buff *skb, | |
struct ip_vs_protocol *pp, struct ip_vs_conn *cp) | |
{ | |
struct udphdr *udph; | |
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4; | |
+ unsigned int udphoff = ip_hdrlen(skb); | |
/* csum_check requires unshared skb */ | |
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) | |
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) | |
return 0; | |
if (unlikely(cp->app != NULL)) { | |
/* Some checks before mangling */ | |
- if (pp->csum_check && !pp->csum_check(*pskb, pp)) | |
+ if (pp->csum_check && !pp->csum_check(skb, pp)) | |
return 0; | |
/* | |
* Attempt ip_vs_app call. | |
* It will fix ip_vs_conn | |
*/ | |
- if (!ip_vs_app_pkt_in(cp, pskb)) | |
+ if (!ip_vs_app_pkt_in(cp, skb)) | |
return 0; | |
} | |
- udph = (void *)(*pskb)->nh.iph + udphoff; | |
+ udph = (void *)ip_hdr(skb) + udphoff; | |
udph->dest = cp->dport; | |
/* | |
@@ -216,20 +213,18 @@ | |
/* Only port and addr are changed, do fast csum update */ | |
udp_fast_csum_update(udph, cp->vaddr, cp->daddr, | |
cp->vport, cp->dport); | |
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) | |
- (*pskb)->ip_summed = CHECKSUM_NONE; | |
+ if (skb->ip_summed == CHECKSUM_COMPLETE) | |
+ skb->ip_summed = CHECKSUM_NONE; | |
} else { | |
/* full checksum calculation */ | |
udph->check = 0; | |
- (*pskb)->csum = skb_checksum(*pskb, udphoff, | |
- (*pskb)->len - udphoff, 0); | |
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); | |
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, | |
- (*pskb)->len - udphoff, | |
- cp->protocol, | |
- (*pskb)->csum); | |
+ skb->len - udphoff, | |
+ cp->protocol, skb->csum); | |
if (udph->check == 0) | |
udph->check = CSUM_MANGLED_0; | |
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; | |
+ skb->ip_summed = CHECKSUM_UNNECESSARY; | |
} | |
return 1; | |
} | |
@@ -239,7 +234,7 @@ | |
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) | |
{ | |
struct udphdr _udph, *uh; | |
- unsigned int udphoff = skb->nh.iph->ihl*4; | |
+ const unsigned int udphoff = ip_hdrlen(skb); | |
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); | |
if (uh == NULL) | |
@@ -251,10 +246,10 @@ | |
skb->csum = skb_checksum(skb, udphoff, | |
skb->len - udphoff, 0); | |
case CHECKSUM_COMPLETE: | |
- if (csum_tcpudp_magic(skb->nh.iph->saddr, | |
- skb->nh.iph->daddr, | |
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr, | |
+ ip_hdr(skb)->daddr, | |
skb->len - udphoff, | |
- skb->nh.iph->protocol, | |
+ ip_hdr(skb)->protocol, | |
skb->csum)) { | |
IP_VS_DBG_RL_PKT(0, pp, skb, 0, | |
"Failed checksum for"); | |
@@ -347,7 +342,7 @@ | |
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" | |
"%u.%u.%u.%u:%u to app %s on port %u\n", | |
- __FUNCTION__, | |
+ __func__, | |
NIPQUAD(cp->caddr), ntohs(cp->cport), | |
NIPQUAD(cp->vaddr), ntohs(cp->vport), | |
inc->name, ntohs(inc->port)); | |
@@ -412,6 +407,7 @@ | |
struct ip_vs_protocol ip_vs_protocol_udp = { | |
.name = "UDP", | |
.protocol = IPPROTO_UDP, | |
+ .num_states = IP_VS_UDP_S_LAST, | |
.dont_defrag = 0, | |
.init = udp_init, | |
.exit = udp_exit, | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Round-Robin Scheduling module | |
* | |
- * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* | |
@@ -68,7 +66,7 @@ | |
q = q->next; | |
continue; | |
} | |
- | |
+ | |
dest = list_entry(q, struct ip_vs_dest, n_list); | |
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && | |
atomic_read(&dest->weight) > 0) | |
@@ -96,6 +94,7 @@ | |
.name = "rr", /* name */ | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), | |
.init_service = ip_vs_rr_init_svc, | |
.done_service = ip_vs_rr_done_svc, | |
.update_service = ip_vs_rr_update_svc, | |
@@ -104,7 +103,6 @@ | |
static int __init ip_vs_rr_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_rr_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -5,8 +5,6 @@ | |
* high-performance and highly available server based on a | |
* cluster of servers. | |
* | |
- * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* | |
@@ -20,11 +18,11 @@ | |
*/ | |
#include <linux/module.h> | |
-#include <linux/sched.h> | |
#include <linux/spinlock.h> | |
#include <linux/interrupt.h> | |
#include <asm/string.h> | |
#include <linux/kmod.h> | |
+#include <linux/sysctl.h> | |
#include <net/ip_vs.h> | |
@@ -184,22 +182,9 @@ | |
/* increase the module use count */ | |
ip_vs_use_count_inc(); | |
- /* | |
- * Make sure that the scheduler with this name doesn't exist | |
- * in the scheduler list. | |
- */ | |
- sched = ip_vs_sched_getbyname(scheduler->name); | |
- if (sched) { | |
- ip_vs_scheduler_put(sched); | |
- ip_vs_use_count_dec(); | |
- IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " | |
- "already existed in the system\n", scheduler->name); | |
- return -EINVAL; | |
- } | |
- | |
write_lock_bh(&__ip_vs_sched_lock); | |
- if (scheduler->n_list.next != &scheduler->n_list) { | |
+ if (!list_empty(&scheduler->n_list)) { | |
write_unlock_bh(&__ip_vs_sched_lock); | |
ip_vs_use_count_dec(); | |
IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " | |
@@ -208,6 +193,20 @@ | |
} | |
/* | |
+ * Make sure that the scheduler with this name doesn't exist | |
+ * in the scheduler list. | |
+ */ | |
+ list_for_each_entry(sched, &ip_vs_schedulers, n_list) { | |
+ if (strcmp(scheduler->name, sched->name) == 0) { | |
+ write_unlock_bh(&__ip_vs_sched_lock); | |
+ ip_vs_use_count_dec(); | |
+ IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler " | |
+ "already existed in the system\n", | |
+ scheduler->name); | |
+ return -EINVAL; | |
+ } | |
+ } | |
+ /* | |
* Add it into the d-linked scheduler list | |
*/ | |
list_add(&scheduler->n_list, &ip_vs_schedulers); | |
@@ -230,7 +229,7 @@ | |
} | |
write_lock_bh(&__ip_vs_sched_lock); | |
- if (scheduler->n_list.next == &scheduler->n_list) { | |
+ if (list_empty(&scheduler->n_list)) { | |
write_unlock_bh(&__ip_vs_sched_lock); | |
IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler " | |
"is not in the list. failed\n", scheduler->name); | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Shortest Expected Delay scheduling module | |
* | |
- * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -18,7 +16,7 @@ | |
* The SED algorithm attempts to minimize each job's expected delay until | |
* completion. The expected delay that the job will experience is | |
* (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of | |
- * jobs on the the ith server and Ui is the fixed service rate (weight) of | |
+ * jobs on the ith server and Ui is the fixed service rate (weight) of | |
* the ith server. The SED algorithm adopts a greedy policy that each does | |
* what is in its own best interest, i.e. to join the queue which would | |
* minimize its expected delay of completion. | |
@@ -140,6 +138,7 @@ | |
.name = "sed", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), | |
.init_service = ip_vs_sed_init_svc, | |
.done_service = ip_vs_sed_done_svc, | |
.update_service = ip_vs_sed_update_svc, | |
@@ -149,7 +148,6 @@ | |
static int __init ip_vs_sed_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_sed_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Source Hashing scheduling module | |
* | |
- * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@gnuchina.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -201,7 +199,7 @@ | |
{ | |
struct ip_vs_dest *dest; | |
struct ip_vs_sh_bucket *tbl; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); | |
@@ -232,6 +230,7 @@ | |
.name = "sh", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), | |
.init_service = ip_vs_sh_init_svc, | |
.done_service = ip_vs_sh_done_svc, | |
.update_service = ip_vs_sh_update_svc, | |
@@ -241,7 +240,6 @@ | |
static int __init ip_vs_sh_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_sh_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -5,8 +5,6 @@ | |
* high-performance and highly available server based on a | |
* cluster of servers. | |
* | |
- * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* ip_vs_sync: sync connection info from master load balancer to backups | |
@@ -29,10 +27,12 @@ | |
#include <linux/in.h> | |
#include <linux/igmp.h> /* for ip_mc_join_group */ | |
#include <linux/udp.h> | |
+#include <linux/err.h> | |
+#include <linux/kthread.h> | |
+#include <linux/wait.h> | |
#include <net/ip.h> | |
#include <net/sock.h> | |
-#include <asm/uaccess.h> /* for get_fs and set_fs */ | |
#include <net/ip_vs.h> | |
@@ -67,7 +67,11 @@ | |
struct ip_vs_seq out_seq; /* outgoing seq. struct */ | |
}; | |
-#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) | |
+struct ip_vs_sync_thread_data { | |
+ struct socket *sock; | |
+ char *buf; | |
+}; | |
+ | |
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) | |
#define FULL_CONN_SIZE \ | |
(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) | |
@@ -136,18 +140,19 @@ | |
char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | |
char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; | |
-/* multicast addr */ | |
-static struct sockaddr_in mcast_addr; | |
+/* sync daemon tasks */ | |
+static struct task_struct *sync_master_thread; | |
+static struct task_struct *sync_backup_thread; | |
+/* multicast addr */ | |
+static struct sockaddr_in mcast_addr = { | |
+ .sin_family = AF_INET, | |
+ .sin_port = __constant_htons(IP_VS_SYNC_PORT), | |
+ .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), | |
+}; | |
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | |
-{ | |
- spin_lock(&ip_vs_sync_lock); | |
- list_add_tail(&sb->list, &ip_vs_sync_queue); | |
- spin_unlock(&ip_vs_sync_lock); | |
-} | |
-static inline struct ip_vs_sync_buff * sb_dequeue(void) | |
+static inline struct ip_vs_sync_buff *sb_dequeue(void) | |
{ | |
struct ip_vs_sync_buff *sb; | |
@@ -191,6 +196,16 @@ | |
kfree(sb); | |
} | |
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) | |
+{ | |
+ spin_lock(&ip_vs_sync_lock); | |
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER) | |
+ list_add_tail(&sb->list, &ip_vs_sync_queue); | |
+ else | |
+ ip_vs_sync_buff_release(sb); | |
+ spin_unlock(&ip_vs_sync_lock); | |
+} | |
+ | |
/* | |
* Get the current sync buffer if it has been created for more | |
* than the specified time or the specified time is zero. | |
@@ -279,14 +294,21 @@ | |
struct ip_vs_sync_conn *s; | |
struct ip_vs_sync_conn_options *opt; | |
struct ip_vs_conn *cp; | |
+ struct ip_vs_protocol *pp; | |
+ struct ip_vs_dest *dest; | |
char *p; | |
int i; | |
+ if (buflen < sizeof(struct ip_vs_sync_mesg)) { | |
+ IP_VS_ERR_RL("sync message header too short\n"); | |
+ return; | |
+ } | |
+ | |
/* Convert size back to host byte order */ | |
m->size = ntohs(m->size); | |
if (buflen != m->size) { | |
- IP_VS_ERR("bogus message\n"); | |
+ IP_VS_ERR_RL("bogus sync message size\n"); | |
return; | |
} | |
@@ -299,10 +321,50 @@ | |
p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); | |
for (i=0; i<m->nr_conns; i++) { | |
- unsigned flags; | |
+ unsigned flags, state; | |
+ | |
+ if (p + SIMPLE_CONN_SIZE > buffer+buflen) { | |
+ IP_VS_ERR_RL("bogus conn in sync message\n"); | |
+ return; | |
+ } | |
+ s = (struct ip_vs_sync_conn *) p; | |
+ flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; | |
+ flags &= ~IP_VS_CONN_F_HASHED; | |
+ if (flags & IP_VS_CONN_F_SEQ_MASK) { | |
+ opt = (struct ip_vs_sync_conn_options *)&s[1]; | |
+ p += FULL_CONN_SIZE; | |
+ if (p > buffer+buflen) { | |
+ IP_VS_ERR_RL("bogus conn options in sync message\n"); | |
+ return; | |
+ } | |
+ } else { | |
+ opt = NULL; | |
+ p += SIMPLE_CONN_SIZE; | |
+ } | |
+ | |
+ state = ntohs(s->state); | |
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) { | |
+ pp = ip_vs_proto_get(s->protocol); | |
+ if (!pp) { | |
+ IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", | |
+ s->protocol); | |
+ continue; | |
+ } | |
+ if (state >= pp->num_states) { | |
+ IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", | |
+ pp->name, state); | |
+ continue; | |
+ } | |
+ } else { | |
+ /* protocol in templates is not used for state/timeout */ | |
+ pp = NULL; | |
+ if (state > 0) { | |
+ IP_VS_DBG(2, "Invalid template state %u in sync msg\n", | |
+ state); | |
+ state = 0; | |
+ } | |
+ } | |
- s = (struct ip_vs_sync_conn *)p; | |
- flags = ntohs(s->flags); | |
if (!(flags & IP_VS_CONN_F_TEMPLATE)) | |
cp = ip_vs_conn_in_get(s->protocol, | |
s->caddr, s->cport, | |
@@ -312,38 +374,69 @@ | |
s->caddr, s->cport, | |
s->vaddr, s->vport); | |
if (!cp) { | |
+ /* | |
+ * Find the appropriate destination for the connection. | |
+ * If it is not found the connection will remain unbound | |
+ * but still handled. | |
+ */ | |
+ dest = ip_vs_find_dest(s->daddr, s->dport, | |
+ s->vaddr, s->vport, | |
+ s->protocol); | |
+ /* Set the approprite ativity flag */ | |
+ if (s->protocol == IPPROTO_TCP) { | |
+ if (state != IP_VS_TCP_S_ESTABLISHED) | |
+ flags |= IP_VS_CONN_F_INACTIVE; | |
+ else | |
+ flags &= ~IP_VS_CONN_F_INACTIVE; | |
+ } | |
cp = ip_vs_conn_new(s->protocol, | |
s->caddr, s->cport, | |
s->vaddr, s->vport, | |
s->daddr, s->dport, | |
- flags, NULL); | |
+ flags, dest); | |
+ if (dest) | |
+ atomic_dec(&dest->refcnt); | |
if (!cp) { | |
IP_VS_ERR("ip_vs_conn_new failed\n"); | |
return; | |
} | |
- cp->state = ntohs(s->state); | |
} else if (!cp->dest) { | |
- /* it is an entry created by the synchronization */ | |
- cp->state = ntohs(s->state); | |
- cp->flags = flags | IP_VS_CONN_F_HASHED; | |
- } /* Note that we don't touch its state and flags | |
- if it is a normal entry. */ | |
+ dest = ip_vs_try_bind_dest(cp); | |
+ if (dest) | |
+ atomic_dec(&dest->refcnt); | |
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && | |
+ (cp->state != state)) { | |
+ /* update active/inactive flag for the connection */ | |
+ dest = cp->dest; | |
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | |
+ (state != IP_VS_TCP_S_ESTABLISHED)) { | |
+ atomic_dec(&dest->activeconns); | |
+ atomic_inc(&dest->inactconns); | |
+ cp->flags |= IP_VS_CONN_F_INACTIVE; | |
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | |
+ (state == IP_VS_TCP_S_ESTABLISHED)) { | |
+ atomic_inc(&dest->activeconns); | |
+ atomic_dec(&dest->inactconns); | |
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE; | |
+ } | |
+ } | |
- if (flags & IP_VS_CONN_F_SEQ_MASK) { | |
- opt = (struct ip_vs_sync_conn_options *)&s[1]; | |
+ if (opt) | |
memcpy(&cp->in_seq, opt, sizeof(*opt)); | |
- p += FULL_CONN_SIZE; | |
- } else | |
- p += SIMPLE_CONN_SIZE; | |
- | |
atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); | |
- cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; | |
+ cp->state = state; | |
+ cp->old_state = cp->state; | |
+ /* | |
+ * We can not recover the right timeout for templates | |
+ * in all cases, we can not find the right fwmark | |
+ * virtual service. If needed, we can do it for | |
+ * non-fwmark persistent services. | |
+ */ | |
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) | |
+ cp->timeout = pp->timeout_table[state]; | |
+ else | |
+ cp->timeout = (3*60*HZ); | |
ip_vs_conn_put(cp); | |
- | |
- if (p > buffer+buflen) { | |
- IP_VS_ERR("bogus message\n"); | |
- return; | |
- } | |
} | |
} | |
@@ -382,7 +475,7 @@ | |
struct net_device *dev; | |
struct inet_sock *inet = inet_sk(sk); | |
- if ((dev = __dev_get_by_name(ifname)) == NULL) | |
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | |
return -ENODEV; | |
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | |
@@ -407,7 +500,7 @@ | |
int num; | |
if (sync_state == IP_VS_STATE_MASTER) { | |
- if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) | |
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) | |
return -ENODEV; | |
num = (dev->mtu - sizeof(struct iphdr) - | |
@@ -418,7 +511,7 @@ | |
IP_VS_DBG(7, "setting the maximum length of sync sending " | |
"message %d.\n", sync_send_mesg_maxlen); | |
} else if (sync_state == IP_VS_STATE_BACKUP) { | |
- if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) | |
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) | |
return -ENODEV; | |
sync_recv_mesg_maxlen = dev->mtu - | |
@@ -446,7 +539,7 @@ | |
memset(&mreq, 0, sizeof(mreq)); | |
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); | |
- if ((dev = __dev_get_by_name(ifname)) == NULL) | |
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | |
return -ENODEV; | |
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) | |
return -EINVAL; | |
@@ -467,7 +560,7 @@ | |
__be32 addr; | |
struct sockaddr_in sin; | |
- if ((dev = __dev_get_by_name(ifname)) == NULL) | |
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) | |
return -ENODEV; | |
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | |
@@ -492,14 +585,17 @@ | |
static struct socket * make_send_sock(void) | |
{ | |
struct socket *sock; | |
+ int result; | |
/* First create a socket */ | |
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { | |
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); | |
+ if (result < 0) { | |
IP_VS_ERR("Error during creation of socket; terminating\n"); | |
- return NULL; | |
+ return ERR_PTR(result); | |
} | |
- if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { | |
+ result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); | |
+ if (result < 0) { | |
IP_VS_ERR("Error setting outbound mcast interface\n"); | |
goto error; | |
} | |
@@ -507,14 +603,15 @@ | |
set_mcast_loop(sock->sk, 0); | |
set_mcast_ttl(sock->sk, 1); | |
- if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { | |
+ result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); | |
+ if (result < 0) { | |
IP_VS_ERR("Error binding address of the mcast interface\n"); | |
goto error; | |
} | |
- if (sock->ops->connect(sock, | |
- (struct sockaddr*)&mcast_addr, | |
- sizeof(struct sockaddr), 0) < 0) { | |
+ result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, | |
+ sizeof(struct sockaddr), 0); | |
+ if (result < 0) { | |
IP_VS_ERR("Error connecting to the multicast addr\n"); | |
goto error; | |
} | |
@@ -523,7 +620,7 @@ | |
error: | |
sock_release(sock); | |
- return NULL; | |
+ return ERR_PTR(result); | |
} | |
@@ -533,27 +630,30 @@ | |
static struct socket * make_receive_sock(void) | |
{ | |
struct socket *sock; | |
+ int result; | |
/* First create a socket */ | |
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { | |
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); | |
+ if (result < 0) { | |
IP_VS_ERR("Error during creation of socket; terminating\n"); | |
- return NULL; | |
+ return ERR_PTR(result); | |
} | |
/* it is equivalent to the REUSEADDR option in user-space */ | |
sock->sk->sk_reuse = 1; | |
- if (sock->ops->bind(sock, | |
- (struct sockaddr*)&mcast_addr, | |
- sizeof(struct sockaddr)) < 0) { | |
+ result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, | |
+ sizeof(struct sockaddr)); | |
+ if (result < 0) { | |
IP_VS_ERR("Error binding to the multicast addr\n"); | |
goto error; | |
} | |
/* join the multicast group */ | |
- if (join_mcast_group(sock->sk, | |
- (struct in_addr*)&mcast_addr.sin_addr, | |
- ip_vs_backup_mcast_ifn) < 0) { | |
+ result = join_mcast_group(sock->sk, | |
+ (struct in_addr *) &mcast_addr.sin_addr, | |
+ ip_vs_backup_mcast_ifn); | |
+ if (result < 0) { | |
IP_VS_ERR("Error joining to the multicast group\n"); | |
goto error; | |
} | |
@@ -562,7 +662,7 @@ | |
error: | |
sock_release(sock); | |
- return NULL; | |
+ return ERR_PTR(result); | |
} | |
@@ -620,44 +720,29 @@ | |
} | |
-static DECLARE_WAIT_QUEUE_HEAD(sync_wait); | |
-static pid_t sync_master_pid = 0; | |
-static pid_t sync_backup_pid = 0; | |
- | |
-static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); | |
-static int stop_master_sync = 0; | |
-static int stop_backup_sync = 0; | |
- | |
-static void sync_master_loop(void) | |
+static int sync_thread_master(void *data) | |
{ | |
- struct socket *sock; | |
+ struct ip_vs_sync_thread_data *tinfo = data; | |
struct ip_vs_sync_buff *sb; | |
- /* create the sending multicast socket */ | |
- sock = make_send_sock(); | |
- if (!sock) | |
- return; | |
- | |
IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " | |
"syncid = %d\n", | |
ip_vs_master_mcast_ifn, ip_vs_master_syncid); | |
- for (;;) { | |
- while ((sb=sb_dequeue())) { | |
- ip_vs_send_sync_msg(sock, sb->mesg); | |
+ while (!kthread_should_stop()) { | |
+ while ((sb = sb_dequeue())) { | |
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | |
ip_vs_sync_buff_release(sb); | |
} | |
/* check if entries stay in curr_sb for 2 seconds */ | |
- if ((sb = get_curr_sync_buff(2*HZ))) { | |
- ip_vs_send_sync_msg(sock, sb->mesg); | |
+ sb = get_curr_sync_buff(2 * HZ); | |
+ if (sb) { | |
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg); | |
ip_vs_sync_buff_release(sb); | |
} | |
- if (stop_master_sync) | |
- break; | |
- | |
- msleep_interruptible(1000); | |
+ schedule_timeout_interruptible(HZ); | |
} | |
/* clean up the sync_buff queue */ | |
@@ -671,235 +756,175 @@ | |
} | |
/* release the sending multicast socket */ | |
- sock_release(sock); | |
+ sock_release(tinfo->sock); | |
+ kfree(tinfo); | |
+ | |
+ return 0; | |
} | |
-static void sync_backup_loop(void) | |
+static int sync_thread_backup(void *data) | |
{ | |
- struct socket *sock; | |
- char *buf; | |
+ struct ip_vs_sync_thread_data *tinfo = data; | |
int len; | |
- if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) { | |
- IP_VS_ERR("sync_backup_loop: kmalloc error\n"); | |
- return; | |
- } | |
- | |
- /* create the receiving multicast socket */ | |
- sock = make_receive_sock(); | |
- if (!sock) | |
- goto out; | |
- | |
IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " | |
"syncid = %d\n", | |
ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); | |
- for (;;) { | |
- /* do you have data now? */ | |
- while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { | |
- if ((len = | |
- ip_vs_receive(sock, buf, | |
- sync_recv_mesg_maxlen)) <= 0) { | |
+ while (!kthread_should_stop()) { | |
+ wait_event_interruptible(*tinfo->sock->sk->sk_sleep, | |
+ !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) | |
+ || kthread_should_stop()); | |
+ | |
+ /* do we have data now? */ | |
+ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { | |
+ len = ip_vs_receive(tinfo->sock, tinfo->buf, | |
+ sync_recv_mesg_maxlen); | |
+ if (len <= 0) { | |
IP_VS_ERR("receiving message error\n"); | |
break; | |
} | |
- /* disable bottom half, because it accessed the data | |
+ | |
+ /* disable bottom half, because it accesses the data | |
shared by softirq while getting/creating conns */ | |
local_bh_disable(); | |
- ip_vs_process_message(buf, len); | |
+ ip_vs_process_message(tinfo->buf, len); | |
local_bh_enable(); | |
} | |
- | |
- if (stop_backup_sync) | |
- break; | |
- | |
- msleep_interruptible(1000); | |
} | |
/* release the sending multicast socket */ | |
- sock_release(sock); | |
+ sock_release(tinfo->sock); | |
+ kfree(tinfo->buf); | |
+ kfree(tinfo); | |
- out: | |
- kfree(buf); | |
+ return 0; | |
} | |
-static void set_sync_pid(int sync_state, pid_t sync_pid) | |
-{ | |
- if (sync_state == IP_VS_STATE_MASTER) | |
- sync_master_pid = sync_pid; | |
- else if (sync_state == IP_VS_STATE_BACKUP) | |
- sync_backup_pid = sync_pid; | |
-} | |
- | |
-static void set_stop_sync(int sync_state, int set) | |
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |
{ | |
- if (sync_state == IP_VS_STATE_MASTER) | |
- stop_master_sync = set; | |
- else if (sync_state == IP_VS_STATE_BACKUP) | |
- stop_backup_sync = set; | |
- else { | |
- stop_master_sync = set; | |
- stop_backup_sync = set; | |
- } | |
-} | |
+ struct ip_vs_sync_thread_data *tinfo; | |
+ struct task_struct **realtask, *task; | |
+ struct socket *sock; | |
+ char *name, *buf = NULL; | |
+ int (*threadfn)(void *data); | |
+ int result = -ENOMEM; | |
-static int sync_thread(void *startup) | |
-{ | |
- DECLARE_WAITQUEUE(wait, current); | |
- mm_segment_t oldmm; | |
- int state; | |
- const char *name; | |
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); | |
+ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | |
+ sizeof(struct ip_vs_sync_conn)); | |
- /* increase the module use count */ | |
- ip_vs_use_count_inc(); | |
+ if (state == IP_VS_STATE_MASTER) { | |
+ if (sync_master_thread) | |
+ return -EEXIST; | |
- if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { | |
- state = IP_VS_STATE_MASTER; | |
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, | |
+ sizeof(ip_vs_master_mcast_ifn)); | |
+ ip_vs_master_syncid = syncid; | |
+ realtask = &sync_master_thread; | |
name = "ipvs_syncmaster"; | |
- } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { | |
- state = IP_VS_STATE_BACKUP; | |
+ threadfn = sync_thread_master; | |
+ sock = make_send_sock(); | |
+ } else if (state == IP_VS_STATE_BACKUP) { | |
+ if (sync_backup_thread) | |
+ return -EEXIST; | |
+ | |
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | |
+ sizeof(ip_vs_backup_mcast_ifn)); | |
+ ip_vs_backup_syncid = syncid; | |
+ realtask = &sync_backup_thread; | |
name = "ipvs_syncbackup"; | |
+ threadfn = sync_thread_backup; | |
+ sock = make_receive_sock(); | |
} else { | |
- IP_VS_BUG(); | |
- ip_vs_use_count_dec(); | |
return -EINVAL; | |
} | |
- daemonize(name); | |
- | |
- oldmm = get_fs(); | |
- set_fs(KERNEL_DS); | |
- | |
- /* Block all signals */ | |
- spin_lock_irq(¤t->sighand->siglock); | |
- siginitsetinv(¤t->blocked, 0); | |
- recalc_sigpending(); | |
- spin_unlock_irq(¤t->sighand->siglock); | |
+ if (IS_ERR(sock)) { | |
+ result = PTR_ERR(sock); | |
+ goto out; | |
+ } | |
- /* set the maximum length of sync message */ | |
set_sync_mesg_maxlen(state); | |
+ if (state == IP_VS_STATE_BACKUP) { | |
+ buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); | |
+ if (!buf) | |
+ goto outsocket; | |
+ } | |
- /* set up multicast address */ | |
- mcast_addr.sin_family = AF_INET; | |
- mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); | |
- mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP); | |
- | |
- add_wait_queue(&sync_wait, &wait); | |
- | |
- set_sync_pid(state, current->pid); | |
- complete((struct completion *)startup); | |
- | |
- /* processing master/backup loop here */ | |
- if (state == IP_VS_STATE_MASTER) | |
- sync_master_loop(); | |
- else if (state == IP_VS_STATE_BACKUP) | |
- sync_backup_loop(); | |
- else IP_VS_BUG(); | |
- | |
- remove_wait_queue(&sync_wait, &wait); | |
- | |
- /* thread exits */ | |
- set_sync_pid(state, 0); | |
- IP_VS_INFO("sync thread stopped!\n"); | |
- | |
- set_fs(oldmm); | |
- | |
- /* decrease the module use count */ | |
- ip_vs_use_count_dec(); | |
- | |
- set_stop_sync(state, 0); | |
- wake_up(&stop_sync_wait); | |
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | |
+ if (!tinfo) | |
+ goto outbuf; | |
- return 0; | |
-} | |
+ tinfo->sock = sock; | |
+ tinfo->buf = buf; | |
+ task = kthread_run(threadfn, tinfo, name); | |
+ if (IS_ERR(task)) { | |
+ result = PTR_ERR(task); | |
+ goto outtinfo; | |
+ } | |
-static int fork_sync_thread(void *startup) | |
-{ | |
- pid_t pid; | |
+ /* mark as active */ | |
+ *realtask = task; | |
+ ip_vs_sync_state |= state; | |
- /* fork the sync thread here, then the parent process of the | |
- sync thread is the init process after this thread exits. */ | |
- repeat: | |
- if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { | |
- IP_VS_ERR("could not create sync_thread due to %d... " | |
- "retrying.\n", pid); | |
- msleep_interruptible(1000); | |
- goto repeat; | |
- } | |
+ /* increase the module use count */ | |
+ ip_vs_use_count_inc(); | |
return 0; | |
+ | |
+outtinfo: | |
+ kfree(tinfo); | |
+outbuf: | |
+ kfree(buf); | |
+outsocket: | |
+ sock_release(sock); | |
+out: | |
+ return result; | |
} | |
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) | |
+int stop_sync_thread(int state) | |
{ | |
- DECLARE_COMPLETION_ONSTACK(startup); | |
- pid_t pid; | |
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current)); | |
- if ((state == IP_VS_STATE_MASTER && sync_master_pid) || | |
- (state == IP_VS_STATE_BACKUP && sync_backup_pid)) | |
- return -EEXIST; | |
+ if (state == IP_VS_STATE_MASTER) { | |
+ if (!sync_master_thread) | |
+ return -ESRCH; | |
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); | |
- IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", | |
- sizeof(struct ip_vs_sync_conn)); | |
+ IP_VS_INFO("stopping master sync thread %d ...\n", | |
+ task_pid_nr(sync_master_thread)); | |
- ip_vs_sync_state |= state; | |
- if (state == IP_VS_STATE_MASTER) { | |
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, | |
- sizeof(ip_vs_master_mcast_ifn)); | |
- ip_vs_master_syncid = syncid; | |
+ /* | |
+ * The lock synchronizes with sb_queue_tail(), so that we don't | |
+ * add sync buffers to the queue, when we are already in | |
+ * progress of stopping the master sync daemon. | |
+ */ | |
+ | |
+ spin_lock_bh(&ip_vs_sync_lock); | |
+ ip_vs_sync_state &= ~IP_VS_STATE_MASTER; | |
+ spin_unlock_bh(&ip_vs_sync_lock); | |
+ kthread_stop(sync_master_thread); | |
+ sync_master_thread = NULL; | |
+ } else if (state == IP_VS_STATE_BACKUP) { | |
+ if (!sync_backup_thread) | |
+ return -ESRCH; | |
+ | |
+ IP_VS_INFO("stopping backup sync thread %d ...\n", | |
+ task_pid_nr(sync_backup_thread)); | |
+ | |
+ ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; | |
+ kthread_stop(sync_backup_thread); | |
+ sync_backup_thread = NULL; | |
} else { | |
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, | |
- sizeof(ip_vs_backup_mcast_ifn)); | |
- ip_vs_backup_syncid = syncid; | |
- } | |
- | |
- repeat: | |
- if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { | |
- IP_VS_ERR("could not create fork_sync_thread due to %d... " | |
- "retrying.\n", pid); | |
- msleep_interruptible(1000); | |
- goto repeat; | |
+ return -EINVAL; | |
} | |
- wait_for_completion(&startup); | |
- | |
- return 0; | |
-} | |
- | |
- | |
-int stop_sync_thread(int state) | |
-{ | |
- DECLARE_WAITQUEUE(wait, current); | |
- | |
- if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || | |
- (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) | |
- return -ESRCH; | |
- | |
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); | |
- IP_VS_INFO("stopping sync thread %d ...\n", | |
- (state == IP_VS_STATE_MASTER) ? | |
- sync_master_pid : sync_backup_pid); | |
- | |
- __set_current_state(TASK_UNINTERRUPTIBLE); | |
- add_wait_queue(&stop_sync_wait, &wait); | |
- set_stop_sync(state, 1); | |
- ip_vs_sync_state -= state; | |
- wake_up(&sync_wait); | |
- schedule(); | |
- __set_current_state(TASK_RUNNING); | |
- remove_wait_queue(&stop_sync_wait, &wait); | |
- | |
- /* Note: no need to reap the sync thread, because its parent | |
- process is the init process */ | |
- | |
- if ((state == IP_VS_STATE_MASTER && stop_master_sync) || | |
- (state == IP_VS_STATE_BACKUP && stop_backup_sync)) | |
- IP_VS_BUG(); | |
+ /* decrease the module use count */ | |
+ ip_vs_use_count_dec(); | |
return 0; | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Weighted Least-Connection Scheduling module | |
* | |
- * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Peter Kese <peter.kese@ijs.si> | |
* | |
@@ -128,6 +126,7 @@ | |
.name = "wlc", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), | |
.init_service = ip_vs_wlc_init_svc, | |
.done_service = ip_vs_wlc_done_svc, | |
.update_service = ip_vs_wlc_update_svc, | |
@@ -137,7 +136,6 @@ | |
static int __init ip_vs_wlc_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_wlc_scheduler); | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* IPVS: Weighted Round-Robin Scheduling module | |
* | |
- * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* | |
* This program is free software; you can redistribute it and/or | |
@@ -22,6 +20,7 @@ | |
#include <linux/module.h> | |
#include <linux/kernel.h> | |
+#include <linux/net.h> | |
#include <net/ip_vs.h> | |
@@ -169,7 +168,7 @@ | |
*/ | |
if (mark->cw == 0) { | |
mark->cl = &svc->destinations; | |
- IP_VS_INFO("ip_vs_wrr_schedule(): " | |
+ IP_VS_ERR_RL("ip_vs_wrr_schedule(): " | |
"no available servers\n"); | |
dest = NULL; | |
goto out; | |
@@ -213,6 +212,7 @@ | |
.name = "wrr", | |
.refcnt = ATOMIC_INIT(0), | |
.module = THIS_MODULE, | |
+ .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), | |
.init_service = ip_vs_wrr_init_svc, | |
.done_service = ip_vs_wrr_done_svc, | |
.update_service = ip_vs_wrr_update_svc, | |
@@ -221,7 +221,6 @@ | |
static int __init ip_vs_wrr_init(void) | |
{ | |
- INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list); | |
return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; | |
} | |
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c | |
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 11:57:22.000000000 -0400 | |
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 12:56:22.000000000 -0400 | |
@@ -1,8 +1,6 @@ | |
/* | |
* ip_vs_xmit.c: various packet transmitters for IPVS | |
* | |
- * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $ | |
- * | |
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
* Julian Anastasov <ja@ssi.bg> | |
* | |
@@ -16,8 +14,8 @@ | |
*/ | |
#include <linux/kernel.h> | |
-#include <linux/ip.h> | |
#include <linux/tcp.h> /* for tcphdr */ | |
+#include <net/ip.h> | |
#include <net/tcp.h> /* for csum_tcpudp_magic */ | |
#include <net/udp.h> | |
#include <net/icmp.h> /* for icmp_send */ | |
@@ -59,7 +57,7 @@ | |
return dst; | |
} | |
-static inline struct rtable * | |
+static struct rtable * | |
__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) | |
{ | |
struct rtable *rt; /* Route to the other host */ | |
@@ -78,7 +76,7 @@ | |
.tos = rtos, } }, | |
}; | |
- if (ip_route_output_key(&rt, &fl)) { | |
+ if (ip_route_output_key(&init_net, &rt, &fl)) { | |
spin_unlock(&dest->dst_lock); | |
IP_VS_DBG_RL("ip_route_output error, " | |
"dest: %u.%u.%u.%u\n", | |
@@ -101,7 +99,7 @@ | |
.tos = rtos, } }, | |
}; | |
- if (ip_route_output_key(&rt, &fl)) { | |
+ if (ip_route_output_key(&init_net, &rt, &fl)) { | |
IP_VS_DBG_RL("ip_route_output error, dest: " | |
"%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); | |
return NULL; | |
@@ -128,8 +126,8 @@ | |
#define IP_VS_XMIT(skb, rt) \ | |
do { \ | |
(skb)->ipvs_property = 1; \ | |
- (skb)->ip_summed = CHECKSUM_NONE; \ | |
- NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ | |
+ skb_forward_csum(skb); \ | |
+ NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \ | |
(rt)->u.dst.dev, dst_output); \ | |
} while (0) | |
@@ -156,7 +154,7 @@ | |
struct ip_vs_protocol *pp) | |
{ | |
struct rtable *rt; /* Route to the other host */ | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
u8 tos = iph->tos; | |
int mtu; | |
struct flowi fl = { | |
@@ -170,7 +168,7 @@ | |
EnterFunction(10); | |
- if (ip_route_output_key(&rt, &fl)) { | |
+ if (ip_route_output_key(&init_net, &rt, &fl)) { | |
IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, " | |
"dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr)); | |
goto tx_error_icmp; | |
@@ -178,7 +176,7 @@ | |
/* MTU checking */ | |
mtu = dst_mtu(&rt->u.dst); | |
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { | |
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | |
ip_rt_put(rt); | |
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | |
IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n"); | |
@@ -193,7 +191,7 @@ | |
ip_rt_put(rt); | |
return NF_STOLEN; | |
} | |
- ip_send_check(skb->nh.iph); | |
+ ip_send_check(ip_hdr(skb)); | |
/* drop old route */ | |
dst_release(skb->dst); | |
@@ -226,7 +224,7 @@ | |
{ | |
struct rtable *rt; /* Route to the other host */ | |
int mtu; | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
EnterFunction(10); | |
@@ -245,7 +243,7 @@ | |
/* MTU checking */ | |
mtu = dst_mtu(&rt->u.dst); | |
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { | |
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { | |
ip_rt_put(rt); | |
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | |
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); | |
@@ -253,7 +251,7 @@ | |
} | |
/* copy-on-write the packet before mangling it */ | |
- if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr))) | |
+ if (!skb_make_writable(skb, sizeof(struct iphdr))) | |
goto tx_error_put; | |
if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) | |
@@ -264,10 +262,10 @@ | |
skb->dst = &rt->u.dst; | |
/* mangle the packet */ | |
- if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) | |
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | |
goto tx_error; | |
- skb->nh.iph->daddr = cp->daddr; | |
- ip_send_check(skb->nh.iph); | |
+ ip_hdr(skb)->daddr = cp->daddr; | |
+ ip_send_check(ip_hdr(skb)); | |
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); | |
@@ -320,19 +318,20 @@ | |
{ | |
struct rtable *rt; /* Route to the other host */ | |
struct net_device *tdev; /* Device to other host */ | |
- struct iphdr *old_iph = skb->nh.iph; | |
+ struct iphdr *old_iph = ip_hdr(skb); | |
u8 tos = old_iph->tos; | |
__be16 df = old_iph->frag_off; | |
+ sk_buff_data_t old_transport_header = skb->transport_header; | |
struct iphdr *iph; /* Our new IP header */ | |
- int max_headroom; /* The extra header space needed */ | |
+ unsigned int max_headroom; /* The extra header space needed */ | |
int mtu; | |
EnterFunction(10); | |
- if (skb->protocol != __constant_htons(ETH_P_IP)) { | |
+ if (skb->protocol != htons(ETH_P_IP)) { | |
IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, " | |
"ETH_P_IP: %d, skb protocol: %d\n", | |
- __constant_htons(ETH_P_IP), skb->protocol); | |
+ htons(ETH_P_IP), skb->protocol); | |
goto tx_error; | |
} | |
@@ -350,9 +349,9 @@ | |
if (skb->dst) | |
skb->dst->ops->update_pmtu(skb->dst, mtu); | |
- df |= (old_iph->frag_off&__constant_htons(IP_DF)); | |
+ df |= (old_iph->frag_off & htons(IP_DF)); | |
- if ((old_iph->frag_off&__constant_htons(IP_DF)) | |
+ if ((old_iph->frag_off & htons(IP_DF)) | |
&& mtu < ntohs(old_iph->tot_len)) { | |
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | |
ip_rt_put(rt); | |
@@ -377,15 +376,16 @@ | |
} | |
kfree_skb(skb); | |
skb = new_skb; | |
- old_iph = skb->nh.iph; | |
+ old_iph = ip_hdr(skb); | |
} | |
- skb->h.raw = (void *) old_iph; | |
+ skb->transport_header = old_transport_header; | |
/* fix old IP header checksum */ | |
ip_send_check(old_iph); | |
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); | |
+ skb_push(skb, sizeof(struct iphdr)); | |
+ skb_reset_network_header(skb); | |
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
/* drop old route */ | |
@@ -395,7 +395,7 @@ | |
/* | |
* Push down and install the IPIP header. | |
*/ | |
- iph = skb->nh.iph; | |
+ iph = ip_hdr(skb); | |
iph->version = 4; | |
iph->ihl = sizeof(struct iphdr)>>2; | |
iph->frag_off = df; | |
@@ -404,14 +404,12 @@ | |
iph->daddr = rt->rt_dst; | |
iph->saddr = rt->rt_src; | |
iph->ttl = old_iph->ttl; | |
- iph->tot_len = htons(skb->len); | |
ip_select_ident(iph, &rt->u.dst, NULL); | |
- ip_send_check(iph); | |
/* Another hack: avoid icmp_send in ip_fragment */ | |
skb->local_df = 1; | |
- IP_VS_XMIT(skb, rt); | |
+ ip_local_out(skb); | |
LeaveFunction(10); | |
@@ -435,7 +433,7 @@ | |
struct ip_vs_protocol *pp) | |
{ | |
struct rtable *rt; /* Route to the other host */ | |
- struct iphdr *iph = skb->nh.iph; | |
+ struct iphdr *iph = ip_hdr(skb); | |
int mtu; | |
EnterFunction(10); | |
@@ -445,7 +443,7 @@ | |
/* MTU checking */ | |
mtu = dst_mtu(&rt->u.dst); | |
- if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) { | |
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { | |
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); | |
ip_rt_put(rt); | |
IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n"); | |
@@ -460,7 +458,7 @@ | |
ip_rt_put(rt); | |
return NF_STOLEN; | |
} | |
- ip_send_check(skb->nh.iph); | |
+ ip_send_check(ip_hdr(skb)); | |
/* drop old route */ | |
dst_release(skb->dst); | |
@@ -514,12 +512,12 @@ | |
* mangle and send the packet here (only for VS/NAT) | |
*/ | |
- if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos)))) | |
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) | |
goto tx_error_icmp; | |
/* MTU checking */ | |
mtu = dst_mtu(&rt->u.dst); | |
- if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) { | |
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { | |
ip_rt_put(rt); | |
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); | |
IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); | |
@@ -527,7 +525,7 @@ | |
} | |
/* copy-on-write the packet before mangling it */ | |
- if (!ip_vs_make_skb_writable(&skb, offset)) | |
+ if (!skb_make_writable(skb, offset)) | |
goto tx_error_put; | |
if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment