Skip to content

Instantly share code, notes, and snippets.

@danp
Created February 18, 2009 04:30
Show Gist options
  • Save danp/66185 to your computer and use it in GitHub Desktop.
Save danp/66185 to your computer and use it in GitHub Desktop.
diff -u linux-2.6.20.y/net/ipv4/ipvs/Kconfig linux-2.6.27.y/net/ipv4/ipvs/Kconfig
--- linux-2.6.20.y/net/ipv4/ipvs/Kconfig 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/Kconfig 2009-02-16 12:56:22.000000000 -0400
@@ -1,10 +1,7 @@
#
# IP Virtual Server configuration
#
-menu "IP: Virtual Server Configuration"
- depends on NETFILTER
-
-config IP_VS
+menuconfig IP_VS
tristate "IP virtual server support (EXPERIMENTAL)"
depends on NETFILTER
---help---
@@ -25,9 +22,10 @@
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+if IP_VS
+
config IP_VS_DEBUG
bool "IP virtual server debugging"
- depends on IP_VS
---help---
Say Y here if you want to get additional messages useful in
debugging the IP virtual server code. You can change the debug
@@ -35,7 +33,6 @@
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- depends on IP_VS
default "12"
---help---
The IPVS connection hash table uses the chaining scheme to handle
@@ -61,42 +58,35 @@
needed for your box.
comment "IPVS transport protocol load balancing support"
- depends on IP_VS
config IP_VS_PROTO_TCP
bool "TCP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing TCP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_UDP
bool "UDP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
- depends on IP_VS
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
comment "IPVS scheduler"
- depends on IP_VS
config IP_VS_RR
tristate "round-robin scheduling"
- depends on IP_VS
---help---
The robin-robin scheduling algorithm simply directs network
connections to different real servers in a round-robin manner.
@@ -106,7 +96,6 @@
config IP_VS_WRR
tristate "weighted round-robin scheduling"
- depends on IP_VS
---help---
The weighted robin-robin scheduling algorithm directs network
connections to different real servers based on server weights
@@ -120,7 +109,6 @@
config IP_VS_LC
tristate "least-connection scheduling"
- depends on IP_VS
---help---
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
@@ -131,7 +119,6 @@
config IP_VS_WLC
tristate "weighted least-connection scheduling"
- depends on IP_VS
---help---
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
@@ -142,7 +129,6 @@
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
- depends on IP_VS
---help---
The locality-based least-connection scheduling algorithm is for
destination IP load balancing. It is usually used in cache cluster.
@@ -157,7 +143,6 @@
config IP_VS_LBLCR
tristate "locality-based least-connection with replication scheduling"
- depends on IP_VS
---help---
The locality-based least-connection with replication scheduling
algorithm is also for destination IP load balancing. It is
@@ -176,7 +161,6 @@
config IP_VS_DH
tristate "destination hashing scheduling"
- depends on IP_VS
---help---
The destination hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -187,7 +171,6 @@
config IP_VS_SH
tristate "source hashing scheduling"
- depends on IP_VS
---help---
The source hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
@@ -198,7 +181,6 @@
config IP_VS_SED
tristate "shortest expected delay scheduling"
- depends on IP_VS
---help---
The shortest expected delay scheduling algorithm assigns network
connections to the server with the shortest expected delay. The
@@ -212,7 +194,6 @@
config IP_VS_NQ
tristate "never queue scheduling"
- depends on IP_VS
---help---
The never queue scheduling algorithm adopts a two-speed model.
When there is an idle server available, the job will be sent to
@@ -225,11 +206,10 @@
module, choose M here. If unsure, say N.
comment 'IPVS application helper'
- depends on IP_VS
config IP_VS_FTP
tristate "FTP protocol helper"
- depends on IP_VS && IP_VS_PROTO_TCP
+ depends on IP_VS_PROTO_TCP
---help---
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
@@ -241,4 +221,4 @@
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
-endmenu
+endif # IP_VS
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_app.c: Application module support for IPVS
*
- * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -25,6 +23,8 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <asm/system.h>
@@ -49,18 +49,13 @@
*/
static inline int ip_vs_app_get(struct ip_vs_app *app)
{
- /* test and get the module atomically */
- if (app->module)
- return try_module_get(app->module);
- else
- return 1;
+ return try_module_get(app->module);
}
static inline void ip_vs_app_put(struct ip_vs_app *app)
{
- if (app->module)
- module_put(app->module);
+ module_put(app->module);
}
@@ -327,18 +322,18 @@
spin_unlock(&cp->lock);
}
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -359,7 +354,7 @@
if (app->pkt_out == NULL)
return 1;
- if (!app->pkt_out(app, cp, pskb, &diff))
+ if (!app->pkt_out(app, cp, skb, &diff))
return 0;
/*
@@ -377,7 +372,7 @@
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -390,7 +385,7 @@
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_out(cp, pskb, app);
+ return app_tcp_pkt_out(cp, skb, app);
/*
* Call private output hook function
@@ -398,22 +393,22 @@
if (app->pkt_out == NULL)
return 1;
- return app->pkt_out(app, cp, pskb, NULL);
+ return app->pkt_out(app, cp, skb, NULL);
}
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -434,7 +429,7 @@
if (app->pkt_in == NULL)
return 1;
- if (!app->pkt_in(app, cp, pskb, &diff))
+ if (!app->pkt_in(app, cp, skb, &diff))
return 0;
/*
@@ -452,7 +447,7 @@
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -465,7 +460,7 @@
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_in(cp, pskb, app);
+ return app_tcp_pkt_in(cp, skb, app);
/*
* Call private input hook function
@@ -473,7 +468,7 @@
if (app->pkt_in == NULL)
return 1;
- return app->pkt_in(app, cp, pskb, NULL);
+ return app->pkt_in(app, cp, skb, NULL);
}
@@ -549,7 +544,7 @@
return 0;
}
-static struct seq_operations ip_vs_app_seq_ops = {
+static const struct seq_operations ip_vs_app_seq_ops = {
.start = ip_vs_app_seq_start,
.next = ip_vs_app_seq_next,
.stop = ip_vs_app_seq_stop,
@@ -561,7 +556,7 @@
return seq_open(file, &ip_vs_app_seq_ops);
}
-static struct file_operations ip_vs_app_fops = {
+static const struct file_operations ip_vs_app_fops = {
.owner = THIS_MODULE,
.open = ip_vs_app_open,
.read = seq_read,
@@ -577,7 +572,6 @@
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
- struct iphdr *iph;
int diff;
int o_offset;
int o_left;
@@ -603,27 +597,26 @@
skb_put(skb, diff);
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
- memcpy(skb->data + o_offset, n_buf, n_len);
+ skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
}
/* must update the iph total length here */
- iph = skb->nh.iph;
- iph->tot_len = htons(skb->len);
+ ip_hdr(skb)->tot_len = htons(skb->len);
LeaveFunction(9);
return 0;
}
-int ip_vs_app_init(void)
+int __init ip_vs_app_init(void)
{
/* we will replace it with proc_net_ipvs_create() soon */
- proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
+ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
void ip_vs_app_cleanup(void)
{
- proc_net_remove("ip_vs_app");
+ proc_net_remove(&init_net, "ip_vs_app");
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 12:56:22.000000000 -0400
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
@@ -35,6 +33,7 @@
#include <linux/jhash.h>
#include <linux/random.h>
+#include <net/net_namespace.h>
#include <net/ip_vs.h>
@@ -392,7 +391,15 @@
atomic_inc(&dest->refcnt);
/* Bind with the destination and its corresponding transmitter */
- cp->flags |= atomic_read(&dest->conn_flags);
+ if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+ (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
+ /* if the connection is not template and is created
+ * by sync, preserve the activity flag.
+ */
+ cp->flags |= atomic_read(&dest->conn_flags) &
+ (~IP_VS_CONN_F_INACTIVE);
+ else
+ cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
@@ -411,7 +418,11 @@
/* It is a normal connection, so increase the inactive
connection counter because it is in TCP SYNRECV
state (inactive) or other protocol inacive state */
- atomic_inc(&dest->inactconns);
+ if ((cp->flags & IP_VS_CONN_F_SYNC) &&
+ (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+ atomic_inc(&dest->activeconns);
+ else
+ atomic_inc(&dest->inactconns);
} else {
/* It is a persistent connection/template, so increase
the peristent connection counter */
@@ -425,6 +436,24 @@
/*
+ * Check if there is a destination for the connection, if so
+ * bind the connection to the destination.
+ */
+struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+{
+ struct ip_vs_dest *dest;
+
+ if ((cp) && (!cp->dest)) {
+ dest = ip_vs_find_dest(cp->daddr, cp->dport,
+ cp->vaddr, cp->vport, cp->protocol);
+ ip_vs_bind_dest(cp, dest);
+ return dest;
+ } else
+ return NULL;
+}
+
+
+/*
* Unbind a connection entry with its VS destination
* Called by the ip_vs_conn_expire function.
*/
@@ -494,8 +523,8 @@
* Checking the dest server status.
*/
if ((dest == NULL) ||
- !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (sysctl_ip_vs_expire_quiescent_template &&
+ !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+ (sysctl_ip_vs_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG(9, "check_template: dest not available for "
"protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
@@ -603,17 +632,14 @@
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
- cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
return NULL;
}
- memset(cp, 0, sizeof(*cp));
INIT_LIST_HEAD(&cp->c_list);
- init_timer(&cp->timer);
- cp->timer.data = (unsigned long)cp;
- cp->timer.function = ip_vs_conn_expire;
+ setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
cp->protocol = proto;
cp->caddr = caddr;
cp->cport = cport;
@@ -667,7 +693,7 @@
{
int idx;
struct ip_vs_conn *cp;
-
+
for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
@@ -695,7 +721,7 @@
int idx;
++*pos;
- if (v == SEQ_START_TOKEN)
+ if (v == SEQ_START_TOKEN)
return ip_vs_conn_array(seq, 0);
/* more on same hash chain? */
@@ -710,7 +736,7 @@
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
seq->private = &ip_vs_conn_tab[idx];
return cp;
- }
+ }
ct_read_unlock_bh(idx);
}
seq->private = NULL;
@@ -746,7 +772,7 @@
return 0;
}
-static struct seq_operations ip_vs_conn_seq_ops = {
+static const struct seq_operations ip_vs_conn_seq_ops = {
.start = ip_vs_conn_seq_start,
.next = ip_vs_conn_seq_next,
.stop = ip_vs_conn_seq_stop,
@@ -758,13 +784,64 @@
return seq_open(file, &ip_vs_conn_seq_ops);
}
-static struct file_operations ip_vs_conn_fops = {
+static const struct file_operations ip_vs_conn_fops = {
.owner = THIS_MODULE,
.open = ip_vs_conn_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
+
+static const char *ip_vs_origin_name(unsigned flags)
+{
+ if (flags & IP_VS_CONN_F_SYNC)
+ return "SYNC";
+ else
+ return "LOCAL";
+}
+
+static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
+{
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq,
+ "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
+ else {
+ const struct ip_vs_conn *cp = v;
+
+ seq_printf(seq,
+ "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
+ ip_vs_proto_name(cp->protocol),
+ ntohl(cp->caddr), ntohs(cp->cport),
+ ntohl(cp->vaddr), ntohs(cp->vport),
+ ntohl(cp->daddr), ntohs(cp->dport),
+ ip_vs_state_name(cp->protocol, cp->state),
+ ip_vs_origin_name(cp->flags),
+ (cp->timer.expires-jiffies)/HZ);
+ }
+ return 0;
+}
+
+static const struct seq_operations ip_vs_conn_sync_seq_ops = {
+ .start = ip_vs_conn_seq_start,
+ .next = ip_vs_conn_seq_next,
+ .stop = ip_vs_conn_seq_stop,
+ .show = ip_vs_conn_sync_seq_show,
+};
+
+static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &ip_vs_conn_sync_seq_ops);
+}
+
+static const struct file_operations ip_vs_conn_sync_fops = {
+ .owner = THIS_MODULE,
+ .open = ip_vs_conn_sync_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
#endif
@@ -888,7 +965,7 @@
}
-int ip_vs_conn_init(void)
+int __init ip_vs_conn_init(void)
{
int idx;
@@ -902,7 +979,7 @@
/* Allocate ip_vs_conn slab cache */
ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
sizeof(struct ip_vs_conn), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL);
if (!ip_vs_conn_cachep) {
vfree(ip_vs_conn_tab);
return -ENOMEM;
@@ -923,7 +1000,8 @@
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
- proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
+ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+ proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
/* calculate the random value for connection hash */
get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -939,6 +1017,7 @@
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
- proc_net_remove("ip_vs_conn");
+ proc_net_remove(&init_net, "ip_vs_conn");
+ proc_net_remove(&init_net, "ip_vs_conn_sync");
vfree(ip_vs_conn_tab);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 12:56:22.000000000 -0400
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
@@ -58,7 +56,6 @@
#ifdef CONFIG_IP_VS_DEBUG
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
/* ID used in ICMP lookups */
@@ -163,42 +160,6 @@
}
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
- struct sk_buff *skb = *pskb;
-
- /* skb is already used, better copy skb and its payload */
- if (unlikely(skb_shared(skb) || skb->sk))
- goto copy_skb;
-
- /* skb data is already used, copy it */
- if (unlikely(skb_cloned(skb)))
- goto copy_data;
-
- return pskb_may_pull(skb, writable_len);
-
- copy_data:
- if (unlikely(writable_len > skb->len))
- return 0;
- return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
- copy_skb:
- if (unlikely(writable_len > skb->len))
- return 0;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb)
- return 0;
- BUG_ON(skb_is_nonlinear(skb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(skb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = skb;
- return 1;
-}
-
/*
* IPVS persistent scheduling function
* It creates a connection entry according to its template if exists,
@@ -212,7 +173,7 @@
__be16 ports[2])
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
struct ip_vs_conn *ct;
__be16 dport; /* destination port to forward */
@@ -381,7 +342,7 @@
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_conn *cp = NULL;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
@@ -447,7 +408,7 @@
struct ip_vs_protocol *pp)
{
__be16 _ports[2], *pptr;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
pptr = skb_header_pointer(skb, iph->ihl*4,
sizeof(_ports), _ports);
@@ -460,7 +421,7 @@
and the destination is RTN_UNICAST (and not local), then create
a cache_bypass connection entry */
if (sysctl_ip_vs_cache_bypass && svc->fwmark
- && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
+ && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
int ret, cs;
struct ip_vs_conn *cp;
@@ -518,19 +479,19 @@
/*
- * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
+ * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
* chain, and is used for VS/NAT.
* It detects packets for VS/NAT connections and sends the packets
* immediately. This can avoid that iptable_nat mangles the packets
* for VS/NAT.
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (!((*pskb)->ipvs_property))
+ if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
@@ -541,13 +502,14 @@
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- skb = ip_defrag(skb, user);
- if (skb)
- ip_send_check(skb->nh.iph);
- return skb;
+ int err = ip_defrag(skb, user);
+
+ if (!err)
+ ip_send_check(ip_hdr(skb));
+
+ return err;
}
/*
@@ -557,9 +519,10 @@
void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, int inout)
{
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
unsigned int icmp_offset = iph->ihl*4;
- struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+ struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
+ icmp_offset);
struct iphdr *ciph = (struct iphdr *)(icmph + 1);
if (inout) {
@@ -604,9 +567,8 @@
* Currently handles error types - unreachable, quench, ttl exceeded.
* (Only used in VS/NAT)
*/
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -617,14 +579,12 @@
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
- *pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -659,7 +619,7 @@
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -675,13 +635,12 @@
verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
- IP_VS_ERR("shouldn't reach here, because the box is on the"
+ IP_VS_ERR("shouldn't reach here, because the box is on the "
"half connection in the tun/dr module.\n");
}
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -690,9 +649,8 @@
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
- if (!ip_vs_make_skb_writable(pskb, offset))
+ if (!skb_make_writable(skb, offset))
goto out;
- skb = *pskb;
ip_vs_nat_icmp(skb, pp, cp, 1);
@@ -712,24 +670,22 @@
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
return th->rst;
}
/*
- * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
+ * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
* Check if outgoing packet belongs to the established ip_vs_conn,
* rewrite addresses of the packet and send it on its way...
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -740,14 +696,13 @@
if (skb->ipvs_property)
return NF_ACCEPT;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_out_icmp(pskb, &related);
+ int related, verdict = ip_vs_out_icmp(skb, &related);
if (related)
return verdict;
- skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
pp = ip_vs_proto_get(iph->protocol);
@@ -755,13 +710,11 @@
return NF_ACCEPT;
/* reassemble IP fragments */
- if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
- iph = skb->nh.iph;
- *pskb = skb;
+ iph = ip_hdr(skb);
}
ihl = iph->ihl << 2;
@@ -803,25 +756,23 @@
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
- if (!ip_vs_make_skb_writable(pskb, ihl))
+ if (!skb_make_writable(skb, ihl))
goto drop;
/* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+ goto drop;
+ ip_hdr(skb)->saddr = cp->vaddr;
+ ip_send_check(ip_hdr(skb));
+
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
- skb = *pskb;
- skb->nh.iph->saddr = cp->vaddr;
- ip_send_check(skb->nh.iph);
-
- /* For policy routing, packets originating from this
- * machine itself may be routed differently to packets
- * passing through. We want this packet to be routed as
- * if it came from this machine itself. So re-compute
- * the routing information.
- */
- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
- goto drop;
- skb = *pskb;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
@@ -836,7 +787,7 @@
drop:
ip_vs_conn_put(cp);
- kfree_skb(*pskb);
+ kfree_skb(skb);
return NF_STOLEN;
}
@@ -847,10 +798,9 @@
* forward to the right destination host if relevant.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+static int
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -861,16 +811,13 @@
*related = 1;
/* reassemble IP fragments */
- if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb,
- hooknum == NF_IP_LOCAL_IN ?
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
- if (!skb)
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
return NF_STOLEN;
- *pskb = skb;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
offset = ihl = iph->ihl * 4;
ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
if (ic == NULL)
@@ -905,7 +852,7 @@
return NF_ACCEPT;
/* Is the embedded protocol header present? */
- if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+ if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
pp->dont_defrag))
return NF_ACCEPT;
@@ -921,8 +868,7 @@
verdict = NF_DROP;
/* Ensure the checksum is correct */
- if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- ip_vs_checksum_complete(skb, ihl)) {
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
/* Failed checksum! */
IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
NIPQUAD(iph->saddr));
@@ -947,11 +893,10 @@
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -963,22 +908,21 @@
* ... don't know why 1st test DOES NOT include 2nd (?)
*/
if (unlikely(skb->pkt_type != PACKET_HOST
- || skb->dev == &loopback_dev || skb->sk)) {
+ || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
skb->pkt_type,
- skb->nh.iph->protocol,
- NIPQUAD(skb->nh.iph->daddr));
+ ip_hdr(skb)->protocol,
+ NIPQUAD(ip_hdr(skb)->daddr));
return NF_ACCEPT;
}
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+ int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
- skb = *pskb;
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
}
/* Protocol supported? */
@@ -1033,15 +977,24 @@
ret = NF_ACCEPT;
}
- /* increase its packet counter and check if it is needed
- to be synchronized */
+ /* Increase its packet counter and check if it is needed
+ * to be synchronized
+ *
+ * Sync connection if it is about to close to
+ * encorage the standby servers to update the connections timeout
+ */
atomic_inc(&cp->in_pkts);
if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
- (cp->protocol != IPPROTO_TCP ||
- cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0]))
+ (((cp->protocol != IPPROTO_TCP ||
+ cp->state == IP_VS_TCP_S_ESTABLISHED) &&
+ (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
+ == sysctl_ip_vs_sync_threshold[0])) ||
+ ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
+ ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
+ (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+ (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
+ cp->old_state = cp->state;
ip_vs_conn_put(cp);
return ret;
@@ -1049,65 +1002,64 @@
/*
- * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
+ * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
* related packets destined for 0.0.0.0/0.
* When fwmark-based virtual service is used, such as transparent
* cache cluster, TCP packets can be marked and routed to ip_vs_in,
* but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
+ * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
int r;
- if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
- return ip_vs_in_icmp(pskb, &r, hooknum);
+ return ip_vs_in_icmp(skb, &r, hooknum);
}
-/* After packet filtering, forward packet through VS/DR, VS/TUN,
- or VS/NAT(change destination), so that filtering rules can be
- applied to IPVS. */
-static struct nf_hook_ops ip_vs_in_ops = {
- .hook = ip_vs_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = 100,
-};
-
-/* After packet filtering, change source only for VS/NAT */
-static struct nf_hook_ops ip_vs_out_ops = {
- .hook = ip_vs_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = 100,
-};
-
-/* After packet filtering (but before ip_vs_out_icmp), catch icmp
- destined for 0.0.0.0/0, which is for incoming IPVS connections */
-static struct nf_hook_ops ip_vs_forward_icmp_ops = {
- .hook = ip_vs_forward_icmp,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = 99,
-};
-
-/* Before the netfilter connection tracking, exit from POST_ROUTING */
-static struct nf_hook_ops ip_vs_post_routing_ops = {
- .hook = ip_vs_post_routing,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC-1,
+static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+ /* After packet filtering, forward packet through VS/DR, VS/TUN,
+ * or VS/NAT(change destination), so that filtering rules can be
+ * applied to IPVS. */
+ {
+ .hook = ip_vs_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = 100,
+ },
+ /* After packet filtering, change source only for VS/NAT */
+ {
+ .hook = ip_vs_out,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 100,
+ },
+ /* After packet filtering (but before ip_vs_out_icmp), catch icmp
+ * destined for 0.0.0.0/0, which is for incoming IPVS connections */
+ {
+ .hook = ip_vs_forward_icmp,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = 99,
+ },
+ /* Before the netfilter connection tracking, exit from POST_ROUTING */
+ {
+ .hook = ip_vs_post_routing,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC-1,
+ },
};
@@ -1138,37 +1090,15 @@
goto cleanup_app;
}
- ret = nf_register_hook(&ip_vs_in_ops);
+ ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
if (ret < 0) {
- IP_VS_ERR("can't register in hook.\n");
+ IP_VS_ERR("can't register hooks.\n");
goto cleanup_conn;
}
- ret = nf_register_hook(&ip_vs_out_ops);
- if (ret < 0) {
- IP_VS_ERR("can't register out hook.\n");
- goto cleanup_inops;
- }
- ret = nf_register_hook(&ip_vs_post_routing_ops);
- if (ret < 0) {
- IP_VS_ERR("can't register post_routing hook.\n");
- goto cleanup_outops;
- }
- ret = nf_register_hook(&ip_vs_forward_icmp_ops);
- if (ret < 0) {
- IP_VS_ERR("can't register forward_icmp hook.\n");
- goto cleanup_postroutingops;
- }
-
IP_VS_INFO("ipvs loaded.\n");
return ret;
- cleanup_postroutingops:
- nf_unregister_hook(&ip_vs_post_routing_ops);
- cleanup_outops:
- nf_unregister_hook(&ip_vs_out_ops);
- cleanup_inops:
- nf_unregister_hook(&ip_vs_in_ops);
cleanup_conn:
ip_vs_conn_cleanup();
cleanup_app:
@@ -1182,10 +1112,7 @@
static void __exit ip_vs_cleanup(void)
{
- nf_unregister_hook(&ip_vs_forward_icmp_ops);
- nf_unregister_hook(&ip_vs_post_routing_ops);
- nf_unregister_hook(&ip_vs_out_ops);
- nf_unregister_hook(&ip_vs_in_ops);
+ nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ip_vs_conn_cleanup();
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 12:56:22.000000000 -0400
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
@@ -29,13 +27,13 @@
#include <linux/proc_fs.h>
#include <linux/workqueue.h>
#include <linux/swap.h>
-#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/mutex.h>
+#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/sock.h>
@@ -579,6 +577,31 @@
return NULL;
}
+/*
+ * Find destination by {daddr,dport,vaddr,protocol}
+ * Cretaed to be used in ip_vs_process_message() in
+ * the backup synchronization daemon. It finds the
+ * destination to be bound to the received connection
+ * on the backup.
+ *
+ * ip_vs_lookup_real_service() looked promissing, but
+ * seems not working as expected.
+ */
+struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
+ __be32 vaddr, __be16 vport, __u16 protocol)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_service *svc;
+
+ svc = ip_vs_service_get(0, protocol, vaddr, vport);
+ if (!svc)
+ return NULL;
+ dest = ip_vs_lookup_dest(svc, daddr, dport);
+ if (dest)
+ atomic_inc(&dest->refcnt);
+ ip_vs_service_put(svc);
+ return dest;
+}
/*
* Lookup dest by {svc,addr,port} in the destination trash.
@@ -660,9 +683,22 @@
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
- memset(stats, 0, (char *)&stats->lock - (char *)stats);
- spin_unlock_bh(&stats->lock);
+
+ stats->conns = 0;
+ stats->inpkts = 0;
+ stats->outpkts = 0;
+ stats->inbytes = 0;
+ stats->outbytes = 0;
+
+ stats->cps = 0;
+ stats->inpps = 0;
+ stats->outpps = 0;
+ stats->inbps = 0;
+ stats->outbps = 0;
+
ip_vs_zero_estimator(stats);
+
+ spin_unlock_bh(&stats->lock);
}
/*
@@ -679,7 +715,7 @@
conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
/* check if local node and update the flags */
- if (inet_addr_type(udest->addr) == RTN_LOCAL) {
+ if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
| IP_VS_CONN_F_LOCALNODE;
}
@@ -731,7 +767,7 @@
EnterFunction(2);
- atype = inet_addr_type(udest->addr);
+ atype = inet_addr_type(&init_net, udest->addr);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
@@ -909,7 +945,7 @@
write_lock_bh(&__ip_vs_svc_lock);
/* Wait until all other svc users go away */
- while (atomic_read(&svc->usecnt) > 1) {};
+ IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
/* call the update_service, because server weight may be changed */
svc->scheduler->update_service(svc);
@@ -1399,7 +1435,6 @@
static struct ctl_table vs_vars[] = {
{
- .ctl_name = NET_IPV4_VS_AMEMTHRESH,
.procname = "amemthresh",
.data = &sysctl_ip_vs_amemthresh,
.maxlen = sizeof(int),
@@ -1408,7 +1443,6 @@
},
#ifdef CONFIG_IP_VS_DEBUG
{
- .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
.procname = "debug_level",
.data = &sysctl_ip_vs_debug_level,
.maxlen = sizeof(int),
@@ -1417,7 +1451,6 @@
},
#endif
{
- .ctl_name = NET_IPV4_VS_AMDROPRATE,
.procname = "am_droprate",
.data = &sysctl_ip_vs_am_droprate,
.maxlen = sizeof(int),
@@ -1425,7 +1458,6 @@
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = NET_IPV4_VS_DROP_ENTRY,
.procname = "drop_entry",
.data = &sysctl_ip_vs_drop_entry,
.maxlen = sizeof(int),
@@ -1433,7 +1465,6 @@
.proc_handler = &proc_do_defense_mode,
},
{
- .ctl_name = NET_IPV4_VS_DROP_PACKET,
.procname = "drop_packet",
.data = &sysctl_ip_vs_drop_packet,
.maxlen = sizeof(int),
@@ -1441,7 +1472,6 @@
.proc_handler = &proc_do_defense_mode,
},
{
- .ctl_name = NET_IPV4_VS_SECURE_TCP,
.procname = "secure_tcp",
.data = &sysctl_ip_vs_secure_tcp,
.maxlen = sizeof(int),
@@ -1450,7 +1480,6 @@
},
#if 0
{
- .ctl_name = NET_IPV4_VS_TO_ES,
.procname = "timeout_established",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
.maxlen = sizeof(int),
@@ -1458,7 +1487,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_SS,
.procname = "timeout_synsent",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
.maxlen = sizeof(int),
@@ -1466,7 +1494,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_SR,
.procname = "timeout_synrecv",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
.maxlen = sizeof(int),
@@ -1474,7 +1501,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_FW,
.procname = "timeout_finwait",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
.maxlen = sizeof(int),
@@ -1482,7 +1508,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_TW,
.procname = "timeout_timewait",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
.maxlen = sizeof(int),
@@ -1490,7 +1515,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_CL,
.procname = "timeout_close",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
.maxlen = sizeof(int),
@@ -1498,7 +1522,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_CW,
.procname = "timeout_closewait",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
.maxlen = sizeof(int),
@@ -1506,7 +1529,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_LA,
.procname = "timeout_lastack",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
.maxlen = sizeof(int),
@@ -1514,7 +1536,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_LI,
.procname = "timeout_listen",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
.maxlen = sizeof(int),
@@ -1522,7 +1543,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_SA,
.procname = "timeout_synack",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
.maxlen = sizeof(int),
@@ -1530,7 +1550,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_UDP,
.procname = "timeout_udp",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
.maxlen = sizeof(int),
@@ -1538,7 +1557,6 @@
.proc_handler = &proc_dointvec_jiffies,
},
{
- .ctl_name = NET_IPV4_VS_TO_ICMP,
.procname = "timeout_icmp",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
.maxlen = sizeof(int),
@@ -1547,7 +1565,6 @@
},
#endif
{
- .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
.procname = "cache_bypass",
.data = &sysctl_ip_vs_cache_bypass,
.maxlen = sizeof(int),
@@ -1555,7 +1572,6 @@
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
.procname = "expire_nodest_conn",
.data = &sysctl_ip_vs_expire_nodest_conn,
.maxlen = sizeof(int),
@@ -1563,7 +1579,6 @@
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
.procname = "expire_quiescent_template",
.data = &sysctl_ip_vs_expire_quiescent_template,
.maxlen = sizeof(int),
@@ -1571,7 +1586,6 @@
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
.procname = "sync_threshold",
.data = &sysctl_ip_vs_sync_threshold,
.maxlen = sizeof(sysctl_ip_vs_sync_threshold),
@@ -1579,7 +1593,6 @@
.proc_handler = &proc_do_sync_threshold,
},
{
- .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
.procname = "nat_icmp_send",
.data = &sysctl_ip_vs_nat_icmp_send,
.maxlen = sizeof(int),
@@ -1589,35 +1602,13 @@
{ .ctl_name = 0 }
};
-static ctl_table vs_table[] = {
- {
- .ctl_name = NET_IPV4_VS,
- .procname = "vs",
- .mode = 0555,
- .child = vs_vars
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = vs_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table vs_root_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ipvs_ipv4_table,
- },
- { .ctl_name = 0 }
+const struct ctl_path net_vs_ctl_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "ipv4", .ctl_name = NET_IPV4, },
+ { .procname = "vs", },
+ { }
};
+EXPORT_SYMBOL_GPL(net_vs_ctl_path);
static struct ctl_table_header * sysctl_header;
@@ -1783,7 +1774,7 @@
return 0;
}
-static struct seq_operations ip_vs_info_seq_ops = {
+static const struct seq_operations ip_vs_info_seq_ops = {
.start = ip_vs_info_seq_start,
.next = ip_vs_info_seq_next,
.stop = ip_vs_info_seq_stop,
@@ -1792,27 +1783,11 @@
static int ip_vs_info_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- int rc = -ENOMEM;
- struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
-
- if (!s)
- goto out;
-
- rc = seq_open(file, &ip_vs_info_seq_ops);
- if (rc)
- goto out_kfree;
-
- seq = file->private_data;
- seq->private = s;
-out:
- return rc;
-out_kfree:
- kfree(s);
- goto out;
+ return seq_open_private(file, &ip_vs_info_seq_ops,
+ sizeof(struct ip_vs_iter));
}
-static struct file_operations ip_vs_info_fops = {
+static const struct file_operations ip_vs_info_fops = {
.owner = THIS_MODULE,
.open = ip_vs_info_open,
.read = seq_read,
@@ -1822,7 +1797,9 @@
#endif
-struct ip_vs_stats ip_vs_stats;
+struct ip_vs_stats ip_vs_stats = {
+ .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
+};
#ifdef CONFIG_PROC_FS
static int ip_vs_stats_show(struct seq_file *seq, void *v)
@@ -1859,7 +1836,7 @@
return single_open(file, ip_vs_stats_show, NULL);
}
-static struct file_operations ip_vs_stats_fops = {
+static const struct file_operations ip_vs_stats_fops = {
.owner = THIS_MODULE,
.open = ip_vs_stats_seq_open,
.read = seq_read,
@@ -2340,10 +2317,11 @@
.get_optmin = IP_VS_BASE_CTL,
.get_optmax = IP_VS_SO_GET_MAX+1,
.get = do_ip_vs_get_ctl,
+ .owner = THIS_MODULE,
};
-int ip_vs_control_init(void)
+int __init ip_vs_control_init(void)
{
int ret;
int idx;
@@ -2356,10 +2334,10 @@
return ret;
}
- proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
- proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
+ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
+ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
- sysctl_header = register_sysctl_table(vs_root_table, 0);
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
@@ -2370,8 +2348,6 @@
INIT_LIST_HEAD(&ip_vs_rtable[idx]);
}
- memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
- spin_lock_init(&ip_vs_stats.lock);
ip_vs_new_estimator(&ip_vs_stats);
/* Hook the defense timer */
@@ -2387,10 +2363,11 @@
EnterFunction(2);
ip_vs_trash_cleanup();
cancel_rearming_delayed_work(&defense_work);
+ cancel_work_sync(&defense_work.work);
ip_vs_kill_estimator(&ip_vs_stats);
unregister_sysctl_table(sysctl_header);
- proc_net_remove("ip_vs_stats");
- proc_net_remove("ip_vs");
+ proc_net_remove(&init_net, "ip_vs_stats");
+ proc_net_remove(&init_net, "ip_vs");
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Destination Hashing scheduling module
*
- * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* Inspired by the consistent hashing scheduler patch from
@@ -204,7 +202,7 @@
{
struct ip_vs_dest *dest;
struct ip_vs_dh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
@@ -235,6 +233,7 @@
.name = "dh",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
.init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc,
.update_service = ip_vs_dh_update_svc,
@@ -244,7 +243,6 @@
static int __init ip_vs_dh_init(void)
{
- INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_est.c: simple rate estimator for IPVS
*
- * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -18,6 +16,8 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
#include <net/ip_vs.h>
@@ -45,28 +45,11 @@
*/
-struct ip_vs_estimator
-{
- struct ip_vs_estimator *next;
- struct ip_vs_stats *stats;
+static void estimation_timer(unsigned long arg);
- u32 last_conns;
- u32 last_inpkts;
- u32 last_outpkts;
- u64 last_inbytes;
- u64 last_outbytes;
-
- u32 cps;
- u32 inpps;
- u32 outpps;
- u32 inbps;
- u32 outbps;
-};
-
-
-static struct ip_vs_estimator *est_list = NULL;
-static DEFINE_RWLOCK(est_lock);
-static struct timer_list est_timer;
+static LIST_HEAD(est_list);
+static DEFINE_SPINLOCK(est_lock);
+static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
static void estimation_timer(unsigned long arg)
{
@@ -77,9 +60,9 @@
u64 n_inbytes, n_outbytes;
u32 rate;
- read_lock(&est_lock);
- for (e = est_list; e; e = e->next) {
- s = e->stats;
+ spin_lock(&est_lock);
+ list_for_each_entry(e, &est_list, list) {
+ s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
n_conns = s->conns;
@@ -115,19 +98,16 @@
s->outbps = (e->outbps+0xF)>>5;
spin_unlock(&s->lock);
}
- read_unlock(&est_lock);
+ spin_unlock(&est_lock);
mod_timer(&est_timer, jiffies + 2*HZ);
}
-int ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct ip_vs_stats *stats)
{
- struct ip_vs_estimator *est;
+ struct ip_vs_estimator *est = &stats->est;
- est = kzalloc(sizeof(*est), GFP_KERNEL);
- if (est == NULL)
- return -ENOMEM;
+ INIT_LIST_HEAD(&est->list);
- est->stats = stats;
est->last_conns = stats->conns;
est->cps = stats->cps<<10;
@@ -143,60 +123,40 @@
est->last_outbytes = stats->outbytes;
est->outbps = stats->outbps<<5;
- write_lock_bh(&est_lock);
- est->next = est_list;
- if (est->next == NULL) {
- init_timer(&est_timer);
- est_timer.expires = jiffies + 2*HZ;
- est_timer.function = estimation_timer;
- add_timer(&est_timer);
- }
- est_list = est;
- write_unlock_bh(&est_lock);
- return 0;
+ spin_lock_bh(&est_lock);
+ if (list_empty(&est_list))
+ mod_timer(&est_timer, jiffies + 2 * HZ);
+ list_add(&est->list, &est_list);
+ spin_unlock_bh(&est_lock);
}
void ip_vs_kill_estimator(struct ip_vs_stats *stats)
{
- struct ip_vs_estimator *est, **pest;
- int killed = 0;
+ struct ip_vs_estimator *est = &stats->est;
- write_lock_bh(&est_lock);
- pest = &est_list;
- while ((est=*pest) != NULL) {
- if (est->stats != stats) {
- pest = &est->next;
- continue;
- }
- *pest = est->next;
- kfree(est);
- killed++;
+ spin_lock_bh(&est_lock);
+ list_del(&est->list);
+ while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
+ spin_unlock_bh(&est_lock);
+ cpu_relax();
+ spin_lock_bh(&est_lock);
}
- if (killed && est_list == NULL)
- del_timer_sync(&est_timer);
- write_unlock_bh(&est_lock);
+ spin_unlock_bh(&est_lock);
}
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
- struct ip_vs_estimator *e;
+ struct ip_vs_estimator *est = &stats->est;
- write_lock_bh(&est_lock);
- for (e = est_list; e; e = e->next) {
- if (e->stats != stats)
- continue;
-
- /* set counters zero */
- e->last_conns = 0;
- e->last_inpkts = 0;
- e->last_outpkts = 0;
- e->last_inbytes = 0;
- e->last_outbytes = 0;
- e->cps = 0;
- e->inpps = 0;
- e->outpps = 0;
- e->inbps = 0;
- e->outbps = 0;
- }
- write_unlock_bh(&est_lock);
+ /* set counters zero, caller must hold the stats->lock lock */
+ est->last_inbytes = 0;
+ est->last_outbytes = 0;
+ est->last_conns = 0;
+ est->last_inpkts = 0;
+ est->last_outpkts = 0;
+ est->cps = 0;
+ est->inpps = 0;
+ est->outpps = 0;
+ est->inbps = 0;
+ est->outbps = 0;
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_ftp.c: IPVS ftp application module
*
- * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* Changes:
@@ -30,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <asm/unaligned.h>
@@ -135,7 +134,7 @@
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -155,14 +154,14 @@
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(skb);
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
@@ -213,7 +212,7 @@
memcpy(start, buf, buf_len);
ret = 1;
} else {
- ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+ ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
@@ -238,7 +237,7 @@
* the client.
*/
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -256,20 +255,20 @@
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
/*
* Detecting whether it is passive
*/
- iph = (*pskb)->nh.iph;
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2);
- data_limit = (*pskb)->tail;
+ data_limit = skb_tail_pointer(skb);
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
@@ -370,7 +369,7 @@
if (ret)
break;
IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
- app->name, i, ports[i]);
+ app->name, i, ports[i]);
}
if (ret)
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection scheduling module
*
- * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
@@ -114,46 +112,15 @@
static ctl_table vs_vars_table[] = {
{
- .ctl_name = NET_IPV4_VS_LBLC_EXPIRE,
.procname = "lblc_expiration",
.data = &sysctl_ip_vs_lblc_expiration,
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
-static ctl_table vs_table[] = {
- {
- .ctl_name = NET_IPV4_VS,
- .procname = "vs",
- .mode = 0555,
- .child = vs_vars_table
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = vs_table
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table lblc_root_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ipvs_ipv4_table
- },
- { .ctl_name = 0 }
-};
-
static struct ctl_table_header * sysctl_header;
/*
@@ -288,7 +255,7 @@
write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- if (time_before(now,
+ if (time_before(now,
en->lastuse + sysctl_ip_vs_lblc_expiration))
continue;
@@ -393,9 +360,8 @@
/*
* Hook periodic timer for garbage collection
*/
- init_timer(&tbl->periodic_timer);
- tbl->periodic_timer.data = (unsigned long)tbl;
- tbl->periodic_timer.function = ip_vs_lblc_check_expire;
+ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
+ (unsigned long)tbl);
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
add_timer(&tbl->periodic_timer);
@@ -521,7 +487,7 @@
struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
@@ -573,6 +539,7 @@
.name = "lblc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
.init_service = ip_vs_lblc_init_svc,
.done_service = ip_vs_lblc_done_svc,
.update_service = ip_vs_lblc_update_svc,
@@ -582,9 +549,13 @@
static int __init ip_vs_lblc_init(void)
{
- INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
- sysctl_header = register_sysctl_table(lblc_root_table, 0);
- return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+ int ret;
+
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+ if (ret)
+ unregister_sysctl_table(sysctl_header);
+ return ret;
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection with Replication scheduler
*
- * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
@@ -48,8 +46,7 @@
/* for sysctl */
#include <linux/fs.h>
#include <linux/sysctl.h>
-/* for proc_net_create/proc_net_remove */
-#include <linux/proc_fs.h>
+#include <net/net_namespace.h>
#include <net/ip_vs.h>
@@ -303,46 +300,15 @@
static ctl_table vs_vars_table[] = {
{
- .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE,
.procname = "lblcr_expiration",
.data = &sysctl_ip_vs_lblcr_expiration,
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{ .ctl_name = 0 }
};
-static ctl_table vs_table[] = {
- {
- .ctl_name = NET_IPV4_VS,
- .procname = "vs",
- .mode = 0555,
- .child = vs_vars_table
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ipvs_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = vs_table
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table lblcr_root_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ipvs_ipv4_table
- },
- { .ctl_name = 0 }
-};
-
static struct ctl_table_header * sysctl_header;
/*
@@ -546,71 +512,6 @@
mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
}
-
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG
-static struct ip_vs_lblcr_table *lblcr_table_list;
-
-/*
- * /proc/net/ip_vs_lblcr to display the mappings of
- * destination IP address <==> its serverSet
- */
-static int
-ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
-{
- off_t pos=0, begin;
- int len=0, size;
- struct ip_vs_lblcr_table *tbl;
- unsigned long now = jiffies;
- int i;
- struct ip_vs_lblcr_entry *en;
-
- tbl = lblcr_table_list;
-
- size = sprintf(buffer, "LastTime Dest IP address Server set\n");
- pos += size;
- len += size;
-
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- read_lock_bh(&tbl->lock);
- list_for_each_entry(en, &tbl->bucket[i], list) {
- char tbuf[16];
- struct ip_vs_dest_list *d;
-
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
- size = sprintf(buffer+len, "%8lu %-16s ",
- now-en->lastuse, tbuf);
-
- read_lock(&en->set.lock);
- for (d=en->set.list; d!=NULL; d=d->next) {
- size += sprintf(buffer+len+size,
- "%u.%u.%u.%u ",
- NIPQUAD(d->dest->addr));
- }
- read_unlock(&en->set.lock);
- size += sprintf(buffer+len+size, "\n");
- len += size;
- pos += size;
- if (pos <= offset)
- len=0;
- if (pos >= offset+length) {
- read_unlock_bh(&tbl->lock);
- goto done;
- }
- }
- read_unlock_bh(&tbl->lock);
- }
-
- done:
- begin = len - (pos - offset);
- *start = buffer + begin;
- len -= begin;
- if(len>length)
- len = length;
- return len;
-}
-#endif
-
-
static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
{
int i;
@@ -643,15 +544,11 @@
/*
* Hook periodic timer for garbage collection
*/
- init_timer(&tbl->periodic_timer);
- tbl->periodic_timer.data = (unsigned long)tbl;
- tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
+ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
+ (unsigned long)tbl);
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
add_timer(&tbl->periodic_timer);
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- lblcr_table_list = tbl;
-#endif
return 0;
}
@@ -775,7 +672,7 @@
struct ip_vs_dest *dest;
struct ip_vs_lblcr_table *tbl;
struct ip_vs_lblcr_entry *en;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
@@ -831,6 +728,7 @@
.name = "lblcr",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
.init_service = ip_vs_lblcr_init_svc,
.done_service = ip_vs_lblcr_done_svc,
.update_service = ip_vs_lblcr_update_svc,
@@ -840,20 +738,18 @@
static int __init ip_vs_lblcr_init(void)
{
- INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
- sysctl_header = register_sysctl_table(lblcr_root_table, 0);
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
-#endif
- return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ int ret;
+
+ sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+ ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ if (ret)
+ unregister_sysctl_table(sysctl_header);
+ return ret;
}
static void __exit ip_vs_lblcr_cleanup(void)
{
-#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- proc_net_remove("ip_vs_lblcr");
-#endif
unregister_sysctl_table(sysctl_header);
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -100,6 +98,7 @@
.name = "lc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
.init_service = ip_vs_lc_init_svc,
.done_service = ip_vs_lc_done_svc,
.update_service = ip_vs_lc_update_svc,
@@ -109,7 +108,6 @@
static int __init ip_vs_lc_init(void)
{
- INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Never Queue scheduling module
*
- * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -138,6 +136,7 @@
.name = "nq",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
.init_service = ip_vs_nq_init_svc,
.done_service = ip_vs_nq_done_svc,
.update_service = ip_vs_nq_update_svc,
@@ -147,7 +146,6 @@
static int __init ip_vs_nq_init(void)
{
- INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_proto.c: transport protocol load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
@@ -45,7 +43,7 @@
/*
* register an ipvs protocol
*/
-static int register_ip_vs_protocol(struct ip_vs_protocol *pp)
+static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
{
unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
@@ -148,7 +146,7 @@
struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
if (pp == NULL || pp->state_name == NULL)
- return "ERR!";
+ return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
return pp->state_name(state);
}
@@ -165,7 +163,7 @@
ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
if (ih == NULL)
sprintf(buf, "%s TRUNCATED", pp->name);
- else if (ih->frag_off & __constant_htons(IP_OFFSET))
+ else if (ih->frag_off & htons(IP_OFFSET))
sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
pp->name, NIPQUAD(ih->saddr),
NIPQUAD(ih->daddr));
@@ -192,7 +190,7 @@
}
-int ip_vs_protocol_init(void)
+int __init ip_vs_protocol_init(void)
{
char protocols[64];
#define REGISTER_PROTOCOL(p) \
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
@@ -52,15 +50,15 @@
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -89,15 +87,15 @@
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -160,6 +158,7 @@
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
+ .num_states = 1,
.dont_defrag = 1,
.init = ah_init,
.exit = ah_exit,
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
@@ -52,15 +50,15 @@
if (likely(!inverse)) {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_in_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -89,15 +87,15 @@
if (likely(!inverse)) {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->saddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->daddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
} else {
cp = ip_vs_conn_out_get(IPPROTO_UDP,
iph->daddr,
- __constant_htons(PORT_ISAKMP),
+ htons(PORT_ISAKMP),
iph->saddr,
- __constant_htons(PORT_ISAKMP));
+ htons(PORT_ISAKMP));
}
if (!cp) {
@@ -159,6 +157,7 @@
struct ip_vs_protocol ip_vs_protocol_esp = {
.name = "ESP",
.protocol = IPPROTO_ESP,
+ .num_states = 1,
.dont_defrag = 1,
.init = esp_init,
.exit = esp_exit,
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_tcp.c: TCP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
@@ -20,6 +18,7 @@
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h>
@@ -76,16 +75,15 @@
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL) {
*verdict = NF_DROP;
return 0;
}
if (th->syn &&
- (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, th->dest))) {
+ (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, th->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -123,27 +121,27 @@
static int
-tcp_snat_handler(struct sk_buff **pskb,
+tcp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/* Call application helper if needed */
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
@@ -151,17 +149,15 @@
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check,
(char*)&(tcph->check) - (char*)tcph);
@@ -171,30 +167,30 @@
static int
-tcp_dnat_handler(struct sk_buff **pskb,
+tcp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- tcph = (void *)(*pskb)->nh.iph + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->dest = cp->dport;
/*
@@ -204,18 +200,16 @@
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
@@ -224,15 +218,15 @@
static int
tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
- unsigned int tcphoff = skb->nh.iph->ihl*4;
+ const unsigned int tcphoff = ip_hdrlen(skb);
switch (skb->ip_summed) {
case CHECKSUM_NONE:
skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
skb->len - tcphoff,
- skb->nh.iph->protocol, skb->csum)) {
+ ip_hdr(skb)->protocol, skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
return 0;
@@ -467,8 +461,7 @@
{
struct tcphdr _tcph, *th;
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
@@ -555,7 +548,7 @@
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
- __FUNCTION__,
+ __func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
@@ -599,6 +592,7 @@
struct ip_vs_protocol ip_vs_protocol_tcp = {
.name = "TCP",
.protocol = IPPROTO_TCP,
+ .num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
.init = ip_vs_tcp_init,
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_udp.c: UDP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
@@ -18,11 +16,12 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/kernel.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/udp.h>
#include <net/ip_vs.h>
-
+#include <net/ip.h>
static struct ip_vs_conn *
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +55,7 @@
struct ip_vs_conn *cp;
__be16 _ports[2], *pptr;
- pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_ports), _ports);
if (pptr == NULL)
return NULL;
@@ -82,15 +81,15 @@
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
- uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ uh = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_udph), &_udph);
if (uh == NULL) {
*verdict = NF_DROP;
return 0;
}
- if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- skb->nh.iph->daddr, uh->dest))) {
+ if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+ ip_hdr(skb)->daddr, uh->dest))) {
if (ip_vs_todrop()) {
/*
* It seems that we are very loaded.
@@ -129,29 +128,29 @@
}
static int
-udp_snat_handler(struct sk_buff **pskb,
+udp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ const unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Call application helper if needed
*/
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->source = cp->vport;
/*
@@ -161,17 +160,15 @@
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -183,30 +180,30 @@
static int
-udp_dnat_handler(struct sk_buff **pskb,
+udp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+ unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- udph = (void *)(*pskb)->nh.iph + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->dest = cp->dport;
/*
@@ -216,20 +213,18 @@
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
@@ -239,7 +234,7 @@
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
{
struct udphdr _udph, *uh;
- unsigned int udphoff = skb->nh.iph->ihl*4;
+ const unsigned int udphoff = ip_hdrlen(skb);
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
if (uh == NULL)
@@ -251,10 +246,10 @@
skb->csum = skb_checksum(skb, udphoff,
skb->len - udphoff, 0);
case CHECKSUM_COMPLETE:
- if (csum_tcpudp_magic(skb->nh.iph->saddr,
- skb->nh.iph->daddr,
+ if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
skb->len - udphoff,
- skb->nh.iph->protocol,
+ ip_hdr(skb)->protocol,
skb->csum)) {
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"Failed checksum for");
@@ -347,7 +342,7 @@
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
"%u.%u.%u.%u:%u to app %s on port %u\n",
- __FUNCTION__,
+ __func__,
NIPQUAD(cp->caddr), ntohs(cp->cport),
NIPQUAD(cp->vaddr), ntohs(cp->vport),
inc->name, ntohs(inc->port));
@@ -412,6 +407,7 @@
struct ip_vs_protocol ip_vs_protocol_udp = {
.name = "UDP",
.protocol = IPPROTO_UDP,
+ .num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
.init = udp_init,
.exit = udp_exit,
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
@@ -68,7 +66,7 @@
q = q->next;
continue;
}
-
+
dest = list_entry(q, struct ip_vs_dest, n_list);
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0)
@@ -96,6 +94,7 @@
.name = "rr", /* name */
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc,
.done_service = ip_vs_rr_done_svc,
.update_service = ip_vs_rr_update_svc,
@@ -104,7 +103,6 @@
static int __init ip_vs_rr_init(void)
{
- INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 12:56:22.000000000 -0400
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
@@ -20,11 +18,11 @@
*/
#include <linux/module.h>
-#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <asm/string.h>
#include <linux/kmod.h>
+#include <linux/sysctl.h>
#include <net/ip_vs.h>
@@ -184,22 +182,9 @@
/* increase the module use count */
ip_vs_use_count_inc();
- /*
- * Make sure that the scheduler with this name doesn't exist
- * in the scheduler list.
- */
- sched = ip_vs_sched_getbyname(scheduler->name);
- if (sched) {
- ip_vs_scheduler_put(sched);
- ip_vs_use_count_dec();
- IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
- "already existed in the system\n", scheduler->name);
- return -EINVAL;
- }
-
write_lock_bh(&__ip_vs_sched_lock);
- if (scheduler->n_list.next != &scheduler->n_list) {
+ if (!list_empty(&scheduler->n_list)) {
write_unlock_bh(&__ip_vs_sched_lock);
ip_vs_use_count_dec();
IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
@@ -208,6 +193,20 @@
}
/*
+ * Make sure that the scheduler with this name doesn't exist
+ * in the scheduler list.
+ */
+ list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
+ if (strcmp(scheduler->name, sched->name) == 0) {
+ write_unlock_bh(&__ip_vs_sched_lock);
+ ip_vs_use_count_dec();
+ IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
+ "already existed in the system\n",
+ scheduler->name);
+ return -EINVAL;
+ }
+ }
+ /*
* Add it into the d-linked scheduler list
*/
list_add(&scheduler->n_list, &ip_vs_schedulers);
@@ -230,7 +229,7 @@
}
write_lock_bh(&__ip_vs_sched_lock);
- if (scheduler->n_list.next == &scheduler->n_list) {
+ if (list_empty(&scheduler->n_list)) {
write_unlock_bh(&__ip_vs_sched_lock);
IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
"is not in the list. failed\n", scheduler->name);
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Shortest Expected Delay scheduling module
*
- * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -18,7 +16,7 @@
* The SED algorithm attempts to minimize each job's expected delay until
* completion. The expected delay that the job will experience is
* (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
- * jobs on the the ith server and Ui is the fixed service rate (weight) of
+ * jobs on the ith server and Ui is the fixed service rate (weight) of
* the ith server. The SED algorithm adopts a greedy policy that each does
* what is in its own best interest, i.e. to join the queue which would
* minimize its expected delay of completion.
@@ -140,6 +138,7 @@
.name = "sed",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
.init_service = ip_vs_sed_init_svc,
.done_service = ip_vs_sed_done_svc,
.update_service = ip_vs_sed_update_svc,
@@ -149,7 +148,6 @@
static int __init ip_vs_sed_init(void)
{
- INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Source Hashing scheduling module
*
- * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
@@ -201,7 +199,7 @@
{
struct ip_vs_dest *dest;
struct ip_vs_sh_bucket *tbl;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
@@ -232,6 +230,7 @@
.name = "sh",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
.init_service = ip_vs_sh_init_svc,
.done_service = ip_vs_sh_done_svc,
.update_service = ip_vs_sh_update_svc,
@@ -241,7 +240,6 @@
static int __init ip_vs_sh_init(void)
{
- INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 12:56:22.000000000 -0400
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -29,10 +27,12 @@
#include <linux/in.h>
#include <linux/igmp.h> /* for ip_mc_join_group */
#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
#include <net/ip.h>
#include <net/sock.h>
-#include <asm/uaccess.h> /* for get_fs and set_fs */
#include <net/ip_vs.h>
@@ -67,7 +67,11 @@
struct ip_vs_seq out_seq; /* outgoing seq. struct */
};
-#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
+struct ip_vs_sync_thread_data {
+ struct socket *sock;
+ char *buf;
+};
+
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
#define FULL_CONN_SIZE \
(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
@@ -136,18 +140,19 @@
char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-/* multicast addr */
-static struct sockaddr_in mcast_addr;
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+/* multicast addr */
+static struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = __constant_htons(IP_VS_SYNC_PORT),
+ .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
+};
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
- spin_lock(&ip_vs_sync_lock);
- list_add_tail(&sb->list, &ip_vs_sync_queue);
- spin_unlock(&ip_vs_sync_lock);
-}
-static inline struct ip_vs_sync_buff * sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
struct ip_vs_sync_buff *sb;
@@ -191,6 +196,16 @@
kfree(sb);
}
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+ spin_lock(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(sb);
+ spin_unlock(&ip_vs_sync_lock);
+}
+
/*
* Get the current sync buffer if it has been created for more
* than the specified time or the specified time is zero.
@@ -279,14 +294,21 @@
struct ip_vs_sync_conn *s;
struct ip_vs_sync_conn_options *opt;
struct ip_vs_conn *cp;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_dest *dest;
char *p;
int i;
+ if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+ IP_VS_ERR_RL("sync message header too short\n");
+ return;
+ }
+
/* Convert size back to host byte order */
m->size = ntohs(m->size);
if (buflen != m->size) {
- IP_VS_ERR("bogus message\n");
+ IP_VS_ERR_RL("bogus sync message size\n");
return;
}
@@ -299,10 +321,50 @@
p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
for (i=0; i<m->nr_conns; i++) {
- unsigned flags;
+ unsigned flags, state;
+
+ if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn in sync message\n");
+ return;
+ }
+ s = (struct ip_vs_sync_conn *) p;
+ flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
+ flags &= ~IP_VS_CONN_F_HASHED;
+ if (flags & IP_VS_CONN_F_SEQ_MASK) {
+ opt = (struct ip_vs_sync_conn_options *)&s[1];
+ p += FULL_CONN_SIZE;
+ if (p > buffer+buflen) {
+ IP_VS_ERR_RL("bogus conn options in sync message\n");
+ return;
+ }
+ } else {
+ opt = NULL;
+ p += SIMPLE_CONN_SIZE;
+ }
+
+ state = ntohs(s->state);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->protocol);
+ if (!pp) {
+ IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ s->protocol);
+ continue;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ pp->name, state);
+ continue;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ pp = NULL;
+ if (state > 0) {
+ IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ state);
+ state = 0;
+ }
+ }
- s = (struct ip_vs_sync_conn *)p;
- flags = ntohs(s->flags);
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(s->protocol,
s->caddr, s->cport,
@@ -312,38 +374,69 @@
s->caddr, s->cport,
s->vaddr, s->vport);
if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(s->daddr, s->dport,
+ s->vaddr, s->vport,
+ s->protocol);
+ /* Set the approprite ativity flag */
+ if (s->protocol == IPPROTO_TCP) {
+ if (state != IP_VS_TCP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
cp = ip_vs_conn_new(s->protocol,
s->caddr, s->cport,
s->vaddr, s->vport,
s->daddr, s->dport,
- flags, NULL);
+ flags, dest);
+ if (dest)
+ atomic_dec(&dest->refcnt);
if (!cp) {
IP_VS_ERR("ip_vs_conn_new failed\n");
return;
}
- cp->state = ntohs(s->state);
} else if (!cp->dest) {
- /* it is an entry created by the synchronization */
- cp->state = ntohs(s->state);
- cp->flags = flags | IP_VS_CONN_F_HASHED;
- } /* Note that we don't touch its state and flags
- if it is a normal entry. */
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+ (cp->state != state)) {
+ /* update active/inactive flag for the connection */
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
- if (flags & IP_VS_CONN_F_SEQ_MASK) {
- opt = (struct ip_vs_sync_conn_options *)&s[1];
+ if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- p += FULL_CONN_SIZE;
- } else
- p += SIMPLE_CONN_SIZE;
-
atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
- cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
+ cp->state = state;
+ cp->old_state = cp->state;
+ /*
+ * We can not recover the right timeout for templates
+ * in all cases, we can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ */
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+ cp->timeout = pp->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
ip_vs_conn_put(cp);
-
- if (p > buffer+buflen) {
- IP_VS_ERR("bogus message\n");
- return;
- }
}
}
@@ -382,7 +475,7 @@
struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
- if ((dev = __dev_get_by_name(ifname)) == NULL)
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -407,7 +500,7 @@
int num;
if (sync_state == IP_VS_STATE_MASTER) {
- if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
@@ -418,7 +511,7 @@
IP_VS_DBG(7, "setting the maximum length of sync sending "
"message %d.\n", sync_send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
- if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
+ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
return -ENODEV;
sync_recv_mesg_maxlen = dev->mtu -
@@ -446,7 +539,7 @@
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- if ((dev = __dev_get_by_name(ifname)) == NULL)
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -467,7 +560,7 @@
__be32 addr;
struct sockaddr_in sin;
- if ((dev = __dev_get_by_name(ifname)) == NULL)
+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
return -ENODEV;
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -492,14 +585,17 @@
static struct socket * make_send_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
- if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
+ result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error setting outbound mcast interface\n");
goto error;
}
@@ -507,14 +603,15 @@
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
+ result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error binding address of the mcast interface\n");
goto error;
}
- if (sock->ops->connect(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr), 0) < 0) {
+ result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr), 0);
+ if (result < 0) {
IP_VS_ERR("Error connecting to the multicast addr\n");
goto error;
}
@@ -523,7 +620,7 @@
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -533,27 +630,30 @@
static struct socket * make_receive_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = 1;
- if (sock->ops->bind(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr)) < 0) {
+ result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr));
+ if (result < 0) {
IP_VS_ERR("Error binding to the multicast addr\n");
goto error;
}
/* join the multicast group */
- if (join_mcast_group(sock->sk,
- (struct in_addr*)&mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn) < 0) {
+ result = join_mcast_group(sock->sk,
+ (struct in_addr *) &mcast_addr.sin_addr,
+ ip_vs_backup_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error joining to the multicast group\n");
goto error;
}
@@ -562,7 +662,7 @@
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -620,44 +720,29 @@
}
-static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
-static pid_t sync_master_pid = 0;
-static pid_t sync_backup_pid = 0;
-
-static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
-static int stop_master_sync = 0;
-static int stop_backup_sync = 0;
-
-static void sync_master_loop(void)
+static int sync_thread_master(void *data)
{
- struct socket *sock;
+ struct ip_vs_sync_thread_data *tinfo = data;
struct ip_vs_sync_buff *sb;
- /* create the sending multicast socket */
- sock = make_send_sock();
- if (!sock)
- return;
-
IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_master_mcast_ifn, ip_vs_master_syncid);
- for (;;) {
- while ((sb=sb_dequeue())) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ while (!kthread_should_stop()) {
+ while ((sb = sb_dequeue())) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
/* check if entries stay in curr_sb for 2 seconds */
- if ((sb = get_curr_sync_buff(2*HZ))) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ sb = get_curr_sync_buff(2 * HZ);
+ if (sb) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- if (stop_master_sync)
- break;
-
- msleep_interruptible(1000);
+ schedule_timeout_interruptible(HZ);
}
/* clean up the sync_buff queue */
@@ -671,235 +756,175 @@
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo);
+
+ return 0;
}
-static void sync_backup_loop(void)
+static int sync_thread_backup(void *data)
{
- struct socket *sock;
- char *buf;
+ struct ip_vs_sync_thread_data *tinfo = data;
int len;
- if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
- IP_VS_ERR("sync_backup_loop: kmalloc error\n");
- return;
- }
-
- /* create the receiving multicast socket */
- sock = make_receive_sock();
- if (!sock)
- goto out;
-
IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
- for (;;) {
- /* do you have data now? */
- while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
- if ((len =
- ip_vs_receive(sock, buf,
- sync_recv_mesg_maxlen)) <= 0) {
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+ !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+ || kthread_should_stop());
+
+ /* do we have data now? */
+ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+ len = ip_vs_receive(tinfo->sock, tinfo->buf,
+ sync_recv_mesg_maxlen);
+ if (len <= 0) {
IP_VS_ERR("receiving message error\n");
break;
}
- /* disable bottom half, because it accessed the data
+
+ /* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(buf, len);
+ ip_vs_process_message(tinfo->buf, len);
local_bh_enable();
}
-
- if (stop_backup_sync)
- break;
-
- msleep_interruptible(1000);
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
- out:
- kfree(buf);
+ return 0;
}
-static void set_sync_pid(int sync_state, pid_t sync_pid)
-{
- if (sync_state == IP_VS_STATE_MASTER)
- sync_master_pid = sync_pid;
- else if (sync_state == IP_VS_STATE_BACKUP)
- sync_backup_pid = sync_pid;
-}
-
-static void set_stop_sync(int sync_state, int set)
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
{
- if (sync_state == IP_VS_STATE_MASTER)
- stop_master_sync = set;
- else if (sync_state == IP_VS_STATE_BACKUP)
- stop_backup_sync = set;
- else {
- stop_master_sync = set;
- stop_backup_sync = set;
- }
-}
+ struct ip_vs_sync_thread_data *tinfo;
+ struct task_struct **realtask, *task;
+ struct socket *sock;
+ char *name, *buf = NULL;
+ int (*threadfn)(void *data);
+ int result = -ENOMEM;
-static int sync_thread(void *startup)
-{
- DECLARE_WAITQUEUE(wait, current);
- mm_segment_t oldmm;
- int state;
- const char *name;
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+ sizeof(struct ip_vs_sync_conn));
- /* increase the module use count */
- ip_vs_use_count_inc();
+ if (state == IP_VS_STATE_MASTER) {
+ if (sync_master_thread)
+ return -EEXIST;
- if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
- state = IP_VS_STATE_MASTER;
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_master_mcast_ifn));
+ ip_vs_master_syncid = syncid;
+ realtask = &sync_master_thread;
name = "ipvs_syncmaster";
- } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
- state = IP_VS_STATE_BACKUP;
+ threadfn = sync_thread_master;
+ sock = make_send_sock();
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (sync_backup_thread)
+ return -EEXIST;
+
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_backup_mcast_ifn));
+ ip_vs_backup_syncid = syncid;
+ realtask = &sync_backup_thread;
name = "ipvs_syncbackup";
+ threadfn = sync_thread_backup;
+ sock = make_receive_sock();
} else {
- IP_VS_BUG();
- ip_vs_use_count_dec();
return -EINVAL;
}
- daemonize(name);
-
- oldmm = get_fs();
- set_fs(KERNEL_DS);
-
- /* Block all signals */
- spin_lock_irq(&current->sighand->siglock);
- siginitsetinv(&current->blocked, 0);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ if (IS_ERR(sock)) {
+ result = PTR_ERR(sock);
+ goto out;
+ }
- /* set the maximum length of sync message */
set_sync_mesg_maxlen(state);
+ if (state == IP_VS_STATE_BACKUP) {
+ buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ if (!buf)
+ goto outsocket;
+ }
- /* set up multicast address */
- mcast_addr.sin_family = AF_INET;
- mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
- mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
-
- add_wait_queue(&sync_wait, &wait);
-
- set_sync_pid(state, current->pid);
- complete((struct completion *)startup);
-
- /* processing master/backup loop here */
- if (state == IP_VS_STATE_MASTER)
- sync_master_loop();
- else if (state == IP_VS_STATE_BACKUP)
- sync_backup_loop();
- else IP_VS_BUG();
-
- remove_wait_queue(&sync_wait, &wait);
-
- /* thread exits */
- set_sync_pid(state, 0);
- IP_VS_INFO("sync thread stopped!\n");
-
- set_fs(oldmm);
-
- /* decrease the module use count */
- ip_vs_use_count_dec();
-
- set_stop_sync(state, 0);
- wake_up(&stop_sync_wait);
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ goto outbuf;
- return 0;
-}
+ tinfo->sock = sock;
+ tinfo->buf = buf;
+ task = kthread_run(threadfn, tinfo, name);
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ goto outtinfo;
+ }
-static int fork_sync_thread(void *startup)
-{
- pid_t pid;
+ /* mark as active */
+ *realtask = task;
+ ip_vs_sync_state |= state;
- /* fork the sync thread here, then the parent process of the
- sync thread is the init process after this thread exits. */
- repeat:
- if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
- IP_VS_ERR("could not create sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
- }
+ /* increase the module use count */
+ ip_vs_use_count_inc();
return 0;
+
+outtinfo:
+ kfree(tinfo);
+outbuf:
+ kfree(buf);
+outsocket:
+ sock_release(sock);
+out:
+ return result;
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int stop_sync_thread(int state)
{
- DECLARE_COMPLETION_ONSTACK(startup);
- pid_t pid;
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && sync_backup_pid))
- return -EEXIST;
+ if (state == IP_VS_STATE_MASTER) {
+ if (!sync_master_thread)
+ return -ESRCH;
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
- IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
+ IP_VS_INFO("stopping master sync thread %d ...\n",
+ task_pid_nr(sync_master_thread));
- ip_vs_sync_state |= state;
- if (state == IP_VS_STATE_MASTER) {
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
+ /*
+ * The lock synchronizes with sb_queue_tail(), so that we don't
+ * add sync buffers to the queue, when we are already in
+ * progress of stopping the master sync daemon.
+ */
+
+ spin_lock_bh(&ip_vs_sync_lock);
+ ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ip_vs_sync_lock);
+ kthread_stop(sync_master_thread);
+ sync_master_thread = NULL;
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (!sync_backup_thread)
+ return -ESRCH;
+
+ IP_VS_INFO("stopping backup sync thread %d ...\n",
+ task_pid_nr(sync_backup_thread));
+
+ ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(sync_backup_thread);
+ sync_backup_thread = NULL;
} else {
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- }
-
- repeat:
- if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
- IP_VS_ERR("could not create fork_sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
+ return -EINVAL;
}
- wait_for_completion(&startup);
-
- return 0;
-}
-
-
-int stop_sync_thread(int state)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
- return -ESRCH;
-
- IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
- IP_VS_INFO("stopping sync thread %d ...\n",
- (state == IP_VS_STATE_MASTER) ?
- sync_master_pid : sync_backup_pid);
-
- __set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&stop_sync_wait, &wait);
- set_stop_sync(state, 1);
- ip_vs_sync_state -= state;
- wake_up(&sync_wait);
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&stop_sync_wait, &wait);
-
- /* Note: no need to reap the sync thread, because its parent
- process is the init process */
-
- if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
- (state == IP_VS_STATE_BACKUP && stop_backup_sync))
- IP_VS_BUG();
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
return 0;
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
@@ -128,6 +126,7 @@
.name = "wlc",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
.init_service = ip_vs_wlc_init_svc,
.done_service = ip_vs_wlc_done_svc,
.update_service = ip_vs_wlc_update_svc,
@@ -137,7 +136,6 @@
static int __init ip_vs_wlc_init(void)
{
- INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
@@ -22,6 +20,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/net.h>
#include <net/ip_vs.h>
@@ -169,7 +168,7 @@
*/
if (mark->cw == 0) {
mark->cl = &svc->destinations;
- IP_VS_INFO("ip_vs_wrr_schedule(): "
+ IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
"no available servers\n");
dest = NULL;
goto out;
@@ -213,6 +212,7 @@
.name = "wrr",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
.init_service = ip_vs_wrr_init_svc,
.done_service = ip_vs_wrr_done_svc,
.update_service = ip_vs_wrr_update_svc,
@@ -221,7 +221,6 @@
static int __init ip_vs_wrr_init(void)
{
- INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
}
diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c
--- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 11:57:22.000000000 -0400
+++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 12:56:22.000000000 -0400
@@ -1,8 +1,6 @@
/*
* ip_vs_xmit.c: various packet transmitters for IPVS
*
- * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
@@ -16,8 +14,8 @@
*/
#include <linux/kernel.h>
-#include <linux/ip.h>
#include <linux/tcp.h> /* for tcphdr */
+#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
@@ -59,7 +57,7 @@
return dst;
}
-static inline struct rtable *
+static struct rtable *
__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
{
struct rtable *rt; /* Route to the other host */
@@ -78,7 +76,7 @@
.tos = rtos, } },
};
- if (ip_route_output_key(&rt, &fl)) {
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip_route_output error, "
"dest: %u.%u.%u.%u\n",
@@ -101,7 +99,7 @@
.tos = rtos, } },
};
- if (ip_route_output_key(&rt, &fl)) {
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
IP_VS_DBG_RL("ip_route_output error, dest: "
"%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
return NULL;
@@ -128,8 +126,8 @@
#define IP_VS_XMIT(skb, rt) \
do { \
(skb)->ipvs_property = 1; \
- (skb)->ip_summed = CHECKSUM_NONE; \
- NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
+ skb_forward_csum(skb); \
+ NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
(rt)->u.dst.dev, dst_output); \
} while (0)
@@ -156,7 +154,7 @@
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
u8 tos = iph->tos;
int mtu;
struct flowi fl = {
@@ -170,7 +168,7 @@
EnterFunction(10);
- if (ip_route_output_key(&rt, &fl)) {
+ if (ip_route_output_key(&init_net, &rt, &fl)) {
IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
"dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
goto tx_error_icmp;
@@ -178,7 +176,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -193,7 +191,7 @@
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -226,7 +224,7 @@
{
struct rtable *rt; /* Route to the other host */
int mtu;
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
EnterFunction(10);
@@ -245,7 +243,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -253,7 +251,7 @@
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
@@ -264,10 +262,10 @@
skb->dst = &rt->u.dst;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
- skb->nh.iph->daddr = cp->daddr;
- ip_send_check(skb->nh.iph);
+ ip_hdr(skb)->daddr = cp->daddr;
+ ip_send_check(ip_hdr(skb));
IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
@@ -320,19 +318,20 @@
{
struct rtable *rt; /* Route to the other host */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = skb->nh.iph;
+ struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos;
__be16 df = old_iph->frag_off;
+ sk_buff_data_t old_transport_header = skb->transport_header;
struct iphdr *iph; /* Our new IP header */
- int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom; /* The extra header space needed */
int mtu;
EnterFunction(10);
- if (skb->protocol != __constant_htons(ETH_P_IP)) {
+ if (skb->protocol != htons(ETH_P_IP)) {
IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
"ETH_P_IP: %d, skb protocol: %d\n",
- __constant_htons(ETH_P_IP), skb->protocol);
+ htons(ETH_P_IP), skb->protocol);
goto tx_error;
}
@@ -350,9 +349,9 @@
if (skb->dst)
skb->dst->ops->update_pmtu(skb->dst, mtu);
- df |= (old_iph->frag_off&__constant_htons(IP_DF));
+ df |= (old_iph->frag_off & htons(IP_DF));
- if ((old_iph->frag_off&__constant_htons(IP_DF))
+ if ((old_iph->frag_off & htons(IP_DF))
&& mtu < ntohs(old_iph->tot_len)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
@@ -377,15 +376,16 @@
}
kfree_skb(skb);
skb = new_skb;
- old_iph = skb->nh.iph;
+ old_iph = ip_hdr(skb);
}
- skb->h.raw = (void *) old_iph;
+ skb->transport_header = old_transport_header;
/* fix old IP header checksum */
ip_send_check(old_iph);
- skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
@@ -395,7 +395,7 @@
/*
* Push down and install the IPIP header.
*/
- iph = skb->nh.iph;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
@@ -404,14 +404,12 @@
iph->daddr = rt->rt_dst;
iph->saddr = rt->rt_src;
iph->ttl = old_iph->ttl;
- iph->tot_len = htons(skb->len);
ip_select_ident(iph, &rt->u.dst, NULL);
- ip_send_check(iph);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(skb, rt);
+ ip_local_out(skb);
LeaveFunction(10);
@@ -435,7 +433,7 @@
struct ip_vs_protocol *pp)
{
struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = skb->nh.iph;
+ struct iphdr *iph = ip_hdr(skb);
int mtu;
EnterFunction(10);
@@ -445,7 +443,7 @@
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
ip_rt_put(rt);
IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -460,7 +458,7 @@
ip_rt_put(rt);
return NF_STOLEN;
}
- ip_send_check(skb->nh.iph);
+ ip_send_check(ip_hdr(skb));
/* drop old route */
dst_release(skb->dst);
@@ -514,12 +512,12 @@
* mangle and send the packet here (only for VS/NAT)
*/
- if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
goto tx_error_icmp;
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
- if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
ip_rt_put(rt);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
@@ -527,7 +525,7 @@
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, offset))
+ if (!skb_make_writable(skb, offset))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment