Last active
June 9, 2020 15:27
-
-
Save Ansuel/a1c5ff13d97e62223721392db643ecf5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- a/include/linux/if_bridge.h | |
+++ b/include/linux/if_bridge.h | |
@@ -45,10 +45,26 @@ struct br_ip_list { | |
#define BR_PROXYARP BIT(8) | |
#define BR_LEARNING_SYNC BIT(9) | |
#define BR_PROXYARP_WIFI BIT(10) | |
+#define BR_ISOLATE_MODE BIT(11) | |
+#define BR_MULTICAST_TO_UCAST BIT(12) | |
#define BR_DEFAULT_AGEING_TIME (300 * HZ) | |
+struct net_bridge_port; | |
+ | |
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); | |
+extern struct net_device *br_port_dev_get(struct net_device *dev, | |
+ unsigned char *addr, | |
+ struct sk_buff *skb, | |
+ unsigned int cookie); | |
+extern void br_refresh_fdb_entry(struct net_device *dev, const char *addr); | |
+extern void br_dev_update_stats(struct net_device *dev, | |
+ struct rtnl_link_stats64 *nlstats); | |
+extern struct net_bridge_fdb_entry *br_fdb_has_entry(struct net_device *dev, | |
+ const char *addr, | |
+ __u16 vid); | |
+extern void br_fdb_update_register_notify(struct notifier_block *nb); | |
+extern void br_fdb_update_unregister_notify(struct notifier_block *nb); | |
typedef int br_should_route_hook_t(struct sk_buff *skb); | |
extern br_should_route_hook_t __rcu *br_should_route_hook; | |
@@ -76,4 +92,36 @@ static inline bool br_multicast_has_querier_adjacent(struct net_device *dev, | |
} | |
#endif | |
+typedef struct net_bridge_port *br_port_dev_get_hook_t(struct net_device *dev, | |
+ struct sk_buff *skb, | |
+ unsigned char *addr, | |
+ unsigned int cookie); | |
+extern br_port_dev_get_hook_t __rcu *br_port_dev_get_hook; | |
+ | |
+typedef void (br_notify_hook_t)(int group, int event, const void *ptr); | |
+extern br_notify_hook_t __rcu *br_notify_hook; | |
+typedef int (br_multicast_handle_hook_t)(const struct net_bridge_port *src, | |
+ struct sk_buff *skb); | |
+extern br_multicast_handle_hook_t __rcu *br_multicast_handle_hook; | |
+ | |
+#define BR_FDB_EVENT_ADD 0x01 | |
+#define BR_FDB_EVENT_DEL 0x02 | |
+struct br_fdb_event { | |
+ unsigned char addr[6]; | |
+ unsigned char is_local; | |
+ struct net_device *dev; | |
+ struct net_bridge *br; | |
+ struct net_device *orig_dev; | |
+}; | |
+extern void br_fdb_register_notify(struct notifier_block *nb); | |
+extern void br_fdb_unregister_notify(struct notifier_block *nb); | |
+extern struct net_device *br_fdb_bridge_dev_get_and_hold(struct net_bridge *br); | |
+ | |
+typedef struct net_bridge_port *br_get_dst_hook_t( | |
+ const struct net_bridge_port *src, | |
+ struct sk_buff **skb); | |
+extern br_get_dst_hook_t __rcu *br_get_dst_hook; | |
+ | |
+typedef void (br_notify_hook_t)(int group, int event, const void *ptr); | |
+extern br_notify_hook_t __rcu *br_notify_hook; | |
#endif | |
--- a/include/linux/if_pppol2tp.h | |
+++ b/include/linux/if_pppol2tp.h | |
@@ -18,4 +18,27 @@ | |
#include <linux/in6.h> | |
#include <uapi/linux/if_pppol2tp.h> | |
+/* | |
+ * Holds L2TP channel info | |
+ */ | |
+struct pppol2tp_common_addr { | |
+ int tunnel_version; /* v2 or v3 */ | |
+ __u32 local_tunnel_id, remote_tunnel_id; /* tunnel id */ | |
+ __u32 local_session_id, remote_session_id; /* session id */ | |
+ struct sockaddr_in local_addr, remote_addr; /* ip address and port */ | |
+}; | |
+ | |
+/* | |
+ * L2TP channel operations | |
+ */ | |
+struct pppol2tp_channel_ops { | |
+ struct ppp_channel_ops ops; /* ppp channel ops */ | |
+}; | |
+ | |
+/* | |
+ * exported function which calls pppol2tp channel's get addressing | |
+ * function | |
+ */ | |
+extern int pppol2tp_channel_addressing_get(struct ppp_channel *, | |
+ struct pppol2tp_common_addr *); | |
#endif | |
--- a/include/linux/if_pppox.h | |
+++ b/include/linux/if_pppox.h | |
@@ -1,6 +1,22 @@ | |
+/* | |
+ ************************************************************************** | |
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. | |
+ * Permission to use, copy, modify, and/or distribute this software for | |
+ * any purpose with or without fee is hereby granted, provided that the | |
+ * above copyright notice and this permission notice appear in all copies. | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | |
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ ************************************************************************** | |
+ */ | |
+ | |
/*************************************************************************** | |
* Linux PPP over X - Generic PPP transport layer sockets | |
- * Linux PPP over Ethernet (PPPoE) Socket Implementation (RFC 2516) | |
+ * Linux PPP over Ethernet (PPPoE) Socket Implementation (RFC 2516) | |
* | |
* This file supplies definitions required by the PPP over Ethernet driver | |
* (pppox.c). All version information wrt this file is located in pppox.c | |
@@ -12,6 +28,7 @@ | |
* 2 of the License, or (at your option) any later version. | |
* | |
*/ | |
+ | |
#ifndef __LINUX_IF_PPPOX_H | |
#define __LINUX_IF_PPPOX_H | |
@@ -42,6 +59,7 @@ struct pptp_opt { | |
u32 ack_sent, ack_recv; | |
u32 seq_sent, seq_recv; | |
int ppp_flags; | |
+ bool pptp_offload_mode; | |
}; | |
#include <net/sock.h> | |
@@ -95,4 +113,45 @@ enum { | |
PPPOX_DEAD = 16 /* dead, useless, please clean me up!*/ | |
}; | |
+/* | |
+ * PPPoE Channel specific operations | |
+ */ | |
+struct pppoe_channel_ops { | |
+ /* Must be first - general to all PPP channels */ | |
+ struct ppp_channel_ops ops; | |
+ void (*get_addressing)(struct ppp_channel *, struct pppoe_opt *); | |
+}; | |
+ | |
+/* PPTP client callback */ | |
+typedef int (*pptp_gre_seq_offload_callback_t)(struct sk_buff *skb, | |
+ struct net_device *pptp_dev); | |
+ | |
+/* Return PPPoE channel specific addressing information */ | |
+extern void pppoe_channel_addressing_get(struct ppp_channel *chan, | |
+ struct pppoe_opt *addressing); | |
+ | |
+/* Lookup PPTP session info and return PPTP session */ | |
+extern int pptp_session_find(struct pptp_opt *opt, __be16 peer_call_id, | |
+ __be32 peer_ip_addr); | |
+ | |
+/* Return PPTP session information given the channel */ | |
+extern void pptp_channel_addressing_get(struct pptp_opt *opt, | |
+ struct ppp_channel *chan); | |
+ | |
+/* Enable the PPTP session offload flag */ | |
+extern int pptp_session_enable_offload_mode(__be16 peer_call_id, | |
+ __be32 peer_ip_addr); | |
+ | |
+/* Disable the PPTP session offload flag */ | |
+extern int pptp_session_disable_offload_mode(__be16 peer_call_id, | |
+ __be32 peer_ip_addr); | |
+ | |
+/* Register the PPTP GRE packets sequence number offload callback */ | |
+extern int | |
+pptp_register_gre_seq_offload_callback(pptp_gre_seq_offload_callback_t | |
+ pptp_client_cb); | |
+ | |
+/* Unregister the PPTP GRE packets sequence number offload callback */ | |
+extern void pptp_unregister_gre_seq_offload_callback(void); | |
+ | |
#endif /* !(__LINUX_IF_PPPOX_H) */ | |
--- a/include/linux/if_tun.h | |
+++ b/include/linux/if_tun.h | |
@@ -19,6 +19,12 @@ | |
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) | |
struct socket *tun_get_socket(struct file *); | |
+#ifdef __KERNEL__ | |
+typedef void (*tun_get_offload_stats_t)(struct net_device *dev, | |
+ struct rtnl_link_stats64 *stats); | |
+void tun_register_offload_stats_callback(tun_get_offload_stats_t stats_cb); | |
+void tun_unregister_offload_stats_callback(void); | |
+#endif | |
#else | |
#include <linux/err.h> | |
#include <linux/errno.h> | |
--- a/include/linux/if_vlan.h | |
+++ b/include/linux/if_vlan.h | |
@@ -108,7 +108,15 @@ struct vlan_pcpu_stats { | |
extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev, | |
__be16 vlan_proto, u16 vlan_id); | |
+extern void __vlan_dev_update_accel_stats(struct net_device *dev, | |
+ struct rtnl_link_stats64 *stats); | |
+ | |
+extern u16 vlan_dev_get_egress_prio(struct net_device *dev, u32 skb_prio); | |
+ | |
+extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, | |
+ __be16 vlan_proto, u16 vlan_id); | |
extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); | |
+extern struct net_device *vlan_dev_next_dev(const struct net_device *dev); | |
extern u16 vlan_dev_vlan_id(const struct net_device *dev); | |
extern __be16 vlan_dev_vlan_proto(const struct net_device *dev); | |
@@ -204,6 +212,19 @@ static inline int vlan_get_encap_level(struct net_device *dev) | |
return vlan_dev_priv(dev)->nest_level; | |
} | |
#else | |
+static inline void __vlan_dev_update_accel_stats(struct net_device *dev, | |
+ struct rtnl_link_stats64 *stats) | |
+{ | |
+ | |
+} | |
+ | |
+static inline u16 vlan_dev_get_egress_prio(struct net_device *dev, | |
+ u32 skb_prio) | |
+{ | |
+ return 0; | |
+} | |
+ | |
+ | |
static inline struct net_device * | |
__vlan_find_dev_deep_rcu(struct net_device *real_dev, | |
__be16 vlan_proto, u16 vlan_id) | |
--- a/net/8021q/vlan_core.c | |
+++ b/net/8021q/vlan_core.c | |
@@ -64,6 +64,46 @@ bool vlan_do_receive(struct sk_buff **skbp) | |
return true; | |
} | |
+/* Update the VLAN device with statistics from network offload engines */ | |
+void __vlan_dev_update_accel_stats(struct net_device *dev, | |
+ struct rtnl_link_stats64 *nlstats) | |
+{ | |
+ struct vlan_pcpu_stats *stats; | |
+ | |
+ if (!is_vlan_dev(dev)) | |
+ return; | |
+ | |
+ stats = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, 0); | |
+ | |
+ u64_stats_update_begin(&stats->syncp); | |
+ stats->rx_packets += nlstats->rx_packets; | |
+ stats->rx_bytes += nlstats->rx_bytes; | |
+ stats->tx_packets += nlstats->tx_packets; | |
+ stats->tx_bytes += nlstats->tx_bytes; | |
+ u64_stats_update_end(&stats->syncp); | |
+} | |
+EXPORT_SYMBOL(__vlan_dev_update_accel_stats); | |
+ | |
+/* Lookup the 802.1p egress_map table and return the 802.1p value */ | |
+u16 vlan_dev_get_egress_prio(struct net_device *dev, u32 skb_prio) | |
+{ | |
+ struct vlan_priority_tci_mapping *mp; | |
+ | |
+ mp = vlan_dev_priv(dev)->egress_priority_map[(skb_prio & 0xf)]; | |
+ while (mp) { | |
+ if (mp->priority == skb_prio) { | |
+ /* This should already be shifted | |
+ * to mask correctly with the | |
+ * VLAN's TCI | |
+ */ | |
+ return mp->vlan_qos; | |
+ } | |
+ mp = mp->next; | |
+ } | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(vlan_dev_get_egress_prio); | |
+ | |
/* Must be invoked with rcu_read_lock. */ | |
struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev, | |
__be16 vlan_proto, u16 vlan_id) | |
@@ -102,6 +142,12 @@ struct net_device *vlan_dev_real_dev(const struct net_device *dev) | |
} | |
EXPORT_SYMBOL(vlan_dev_real_dev); | |
+struct net_device *vlan_dev_next_dev(const struct net_device *dev) | |
+{ | |
+ return vlan_dev_priv(dev)->real_dev; | |
+} | |
+EXPORT_SYMBOL(vlan_dev_next_dev); | |
+ | |
u16 vlan_dev_vlan_id(const struct net_device *dev) | |
{ | |
return vlan_dev_priv(dev)->vlan_id; | |
--- a/net/8021q/vlanproc.c | |
+++ b/net/8021q/vlanproc.c | |
@@ -127,6 +127,9 @@ void vlan_proc_cleanup(struct net *net) | |
{ | |
struct vlan_net *vn = net_generic(net, vlan_net_id); | |
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ return; | |
+ | |
if (vn->proc_vlan_conf) | |
remove_proc_entry(name_conf, vn->proc_vlan_dir); | |
@@ -146,6 +149,9 @@ int __net_init vlan_proc_init(struct net *net) | |
{ | |
struct vlan_net *vn = net_generic(net, vlan_net_id); | |
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ return 0; | |
+ | |
vn->proc_vlan_dir = proc_net_mkdir(net, name_root, net->proc_net); | |
if (!vn->proc_vlan_dir) | |
goto err; | |
--- a/net/Kconfig | |
+++ b/net/Kconfig | |
@@ -25,6 +25,12 @@ menuconfig NET | |
if NET | |
+config ETHERNET_PACKET_MANGLE | |
+ bool | |
+ help | |
+ This option can be selected by phy drivers that need to mangle | |
+ packets going in or out of an ethernet device. | |
+ | |
config WANT_COMPAT_NETLINK_MESSAGES | |
bool | |
help | |
@@ -86,6 +92,9 @@ source "net/netlabel/Kconfig" | |
endif # if INET | |
+config SOCK_DIAG | |
+ bool | |
+ | |
config NETWORK_SECMARK | |
bool "Security Marking" | |
help | |
@@ -233,6 +242,8 @@ source "net/mpls/Kconfig" | |
source "net/hsr/Kconfig" | |
source "net/switchdev/Kconfig" | |
source "net/l3mdev/Kconfig" | |
+source "net/rmnet_data/Kconfig" | |
+source "net/qrtr/Kconfig" | |
config RPS | |
bool | |
@@ -297,6 +308,45 @@ config NET_FLOW_LIMIT | |
with many clients some protection against DoS by a single (spoofed) | |
flow that greatly exceeds average workload. | |
+config SKB_RECYCLER | |
+ bool "Generic skb recycling" | |
+ default y | |
+ ---help--- | |
+ SKB_RECYCLER is used to implement RX-to-RX skb recycling. | |
+ This config enables the recycling scheme for bridging and | |
+ routing workloads. It can reduce skbuff freeing or | |
+ reallocation overhead. | |
+ | |
+config SKB_RECYCLER_MULTI_CPU | |
+ bool "Cross-CPU recycling for CPU-locked workloads" | |
+ depends on SMP && SKB_RECYCLER | |
+ default n | |
+ | |
+config SKB_RECYCLER_PREALLOC | |
+ bool "Enable preallocation of SKBs" | |
+ depends on SKB_RECYCLER | |
+ default n | |
+ ---help--- | |
+ Preallocates SKBs in recycling lists and the number of | |
+ SKBs are configured through CONFIG_SKB_RECYCLE_MAX_PREALLOC_SKBS. | |
+ This needs SKB_RECYCLER to be enabled. | |
+ The number of preallocated SKBs can be passed using | |
+ SKB_RECYCLE_MAX_PREALLOC_SKBS. | |
+ | |
+config SKB_RECYCLE_MAX_PREALLOC_SKBS | |
+ int "Number of SKBs to be preallocated" | |
+ depends on SKB_RECYCLER_PREALLOC | |
+ default 16384 | |
+ ---help--- | |
+ Number of SKBs each of 4K size to be preallocated for recycling | |
+ | |
+config ALLOC_SKB_PAGE_FRAG_DISABLE | |
+ bool "Disable page fragment based skbuff payload allocations" | |
+ depends on !SKB_RECYCLER | |
+ default n | |
+ ---help--- | |
+ Disable page fragment based allocations for skbuff payloads. | |
+ | |
menu "Network testing" | |
config NET_PKTGEN | |
@@ -383,6 +433,8 @@ config LWTUNNEL | |
weight tunnel endpoint. Tunnel encapsulation parameters are stored | |
with light weight tunnel state associated with fib routes. | |
+source "drivers/soc/qcom/ipc_router/Kconfig" | |
+ | |
endif # if NET | |
# Used by archs to tell that they support BPF_JIT | |
--- a/net/Makefile | |
+++ b/net/Makefile | |
@@ -5,6 +5,8 @@ | |
# Rewritten to use lists instead of if-statements. | |
# | |
+KBUILD_CFLAGS_KERNEL := $(filter-out -Werror, $(KBUILD_CFLAGS_KERNEL)) | |
+ | |
obj-$(CONFIG_NET) := socket.o core/ | |
tmp-$(CONFIG_COMPAT) := compat.o | |
@@ -77,3 +79,5 @@ endif | |
ifneq ($(CONFIG_NET_L3_MASTER_DEV),) | |
obj-y += l3mdev/ | |
endif | |
+obj-$(CONFIG_RMNET_DATA) += rmnet_data/ | |
+obj-$(CONFIG_QRTR) += qrtr/ | |
--- a/net/bluetooth/hidp/core.c | |
+++ b/net/bluetooth/hidp/core.c | |
@@ -431,7 +431,7 @@ static void hidp_del_timer(struct hidp_session *session) | |
} | |
static void hidp_process_report(struct hidp_session *session, | |
- int type, const u8 *data, int len, int intr) | |
+ int type, const u8 *data, unsigned int len, int intr) | |
{ | |
if (len > HID_MAX_BUFFER_SIZE) | |
len = HID_MAX_BUFFER_SIZE; | |
--- a/net/bridge/br.c | |
+++ b/net/bridge/br.c | |
@@ -266,6 +266,10 @@ static void __exit br_deinit(void) | |
br_fdb_fini(); | |
} | |
+/* Hook for bridge event notifications */ | |
+br_notify_hook_t __rcu *br_notify_hook __read_mostly; | |
+EXPORT_SYMBOL_GPL(br_notify_hook); | |
+ | |
module_init(br_init) | |
module_exit(br_deinit) | |
MODULE_LICENSE("GPL"); | |
--- a/net/bridge/br_device.c | |
+++ b/net/bridge/br_device.c | |
@@ -40,6 +40,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); | |
const struct nf_br_ops *nf_ops; | |
u16 vid = 0; | |
+ struct net_bridge_port *pdst; | |
+ br_get_dst_hook_t *get_dst_hook; | |
rcu_read_lock(); | |
nf_ops = rcu_dereference(nf_br_ops); | |
@@ -61,9 +63,16 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) | |
goto out; | |
+ get_dst_hook = rcu_dereference(br_get_dst_hook); | |
+ | |
if (is_broadcast_ether_addr(dest)) | |
br_flood_deliver(br, skb, false); | |
else if (is_multicast_ether_addr(dest)) { | |
+ br_multicast_handle_hook_t *multicast_handle_hook = | |
+ rcu_dereference(br_multicast_handle_hook); | |
+ if (!__br_get(multicast_handle_hook, true, NULL, skb)) | |
+ goto out; | |
+ | |
if (unlikely(netpoll_tx_running(dev))) { | |
br_flood_deliver(br, skb, false); | |
goto out; | |
@@ -79,10 +88,20 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |
br_multicast_deliver(mdst, skb); | |
else | |
br_flood_deliver(br, skb, false); | |
- } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) | |
- br_deliver(dst->dst, skb); | |
- else | |
- br_flood_deliver(br, skb, true); | |
+ } else { | |
+ pdst = __br_get(get_dst_hook, NULL, NULL, &skb); | |
+ if (pdst) { | |
+ if (!skb) | |
+ goto out; | |
+ br_deliver(pdst, skb); | |
+ } else { | |
+ dst = __br_fdb_get(br, dest, vid); | |
+ if (dst) | |
+ br_deliver(dst->dst, skb); | |
+ else | |
+ br_flood_deliver(br, skb, true); | |
+ } | |
+ } | |
out: | |
rcu_read_unlock(); | |
--- a/net/bridge/br_fdb.c | |
+++ b/net/bridge/br_fdb.c | |
@@ -38,6 +38,20 @@ static void fdb_notify(struct net_bridge *br, | |
static u32 fdb_salt __read_mostly; | |
+ATOMIC_NOTIFIER_HEAD(br_fdb_notifier_list); | |
+ | |
+void br_fdb_register_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_register(&br_fdb_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_register_notify); | |
+ | |
+void br_fdb_unregister_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_unregister(&br_fdb_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_unregister_notify); | |
+ | |
int __init br_fdb_init(void) | |
{ | |
br_fdb_cache = kmem_cache_create("bridge_fdb_cache", | |
@@ -289,12 +303,27 @@ out: | |
spin_unlock_bh(&br->hash_lock); | |
} | |
+ATOMIC_NOTIFIER_HEAD(br_fdb_update_notifier_list); | |
+ | |
+void br_fdb_update_register_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_register(&br_fdb_update_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_update_register_notify); | |
+ | |
+void br_fdb_update_unregister_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_unregister(&br_fdb_update_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_update_unregister_notify); | |
+ | |
void br_fdb_cleanup(unsigned long _data) | |
{ | |
struct net_bridge *br = (struct net_bridge *)_data; | |
unsigned long delay = hold_time(br); | |
unsigned long next_timer = jiffies + br->ageing_time; | |
int i; | |
+ struct br_fdb_event fdb_event; | |
spin_lock(&br->hash_lock); | |
for (i = 0; i < BR_HASH_SIZE; i++) { | |
@@ -308,10 +337,16 @@ void br_fdb_cleanup(unsigned long _data) | |
if (f->added_by_external_learn) | |
continue; | |
this_timer = f->updated + delay; | |
- if (time_before_eq(this_timer, jiffies)) | |
+ if (time_before_eq(this_timer, jiffies)) { | |
+ memset(&fdb_event, 0, sizeof(fdb_event)); | |
+ ether_addr_copy(fdb_event.addr, f->addr.addr); | |
fdb_delete(br, f); | |
- else if (time_before(this_timer, next_timer)) | |
+ atomic_notifier_call_chain( | |
+ &br_fdb_update_notifier_list, 0, | |
+ (void *)&fdb_event); | |
+ } else if (time_before(this_timer, next_timer)) { | |
next_timer = this_timer; | |
+ } | |
} | |
} | |
spin_unlock(&br->hash_lock); | |
@@ -389,6 +424,7 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, | |
return NULL; | |
} | |
+EXPORT_SYMBOL_GPL(__br_fdb_get); | |
#if IS_ENABLED(CONFIG_ATM_LANE) | |
/* Interface used by ATM LANE hook to test | |
@@ -561,12 +597,21 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, | |
return ret; | |
} | |
+/* Get the bridge device */ | |
+struct net_device *br_fdb_bridge_dev_get_and_hold(struct net_bridge *br) | |
+{ | |
+ dev_hold(br->dev); | |
+ return br->dev; | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_bridge_dev_get_and_hold); | |
+ | |
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, | |
const unsigned char *addr, u16 vid, bool added_by_user) | |
{ | |
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; | |
struct net_bridge_fdb_entry *fdb; | |
bool fdb_modified = false; | |
+ struct br_fdb_event fdb_event; | |
/* some users want to always flood. */ | |
if (hold_time(br) == 0) | |
@@ -588,8 +633,16 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, | |
} else { | |
/* fastpath: update of existing entry */ | |
if (unlikely(source != fdb->dst)) { | |
+ ether_addr_copy(fdb_event.addr, addr); | |
+ fdb_event.br = br; | |
+ fdb_event.orig_dev = fdb->dst->dev; | |
+ fdb_event.dev = source->dev; | |
fdb->dst = source; | |
fdb_modified = true; | |
+ | |
+ atomic_notifier_call_chain( | |
+ &br_fdb_update_notifier_list, | |
+ 0, (void *)&fdb_event); | |
} | |
fdb->updated = jiffies; | |
if (unlikely(added_by_user)) | |
@@ -614,8 +667,46 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, | |
} | |
} | |
+/* Refresh FDB entries for bridge packets being forwarded by offload engines */ | |
+void br_refresh_fdb_entry(struct net_device *dev, const char *addr) | |
+{ | |
+ struct net_bridge_port *p = br_port_get_rcu(dev); | |
+ | |
+ if (!p || p->state == BR_STATE_DISABLED) | |
+ return; | |
+ | |
+ if (!is_valid_ether_addr(addr)) { | |
+ pr_info("bridge: Attempt to refresh with invalid ether address %pM\n", | |
+ addr); | |
+ return; | |
+ } | |
+ | |
+ rcu_read_lock(); | |
+ br_fdb_update(p->br, p, addr, 0, true); | |
+ rcu_read_unlock(); | |
+} | |
+EXPORT_SYMBOL_GPL(br_refresh_fdb_entry); | |
+ | |
+/* Look up the MAC address in the device's bridge fdb table */ | |
+struct net_bridge_fdb_entry *br_fdb_has_entry(struct net_device *dev, | |
+ const char *addr, __u16 vid) | |
+{ | |
+ struct net_bridge_port *p = br_port_get_rcu(dev); | |
+ struct net_bridge_fdb_entry *fdb; | |
+ | |
+ if (!p || p->state == BR_STATE_DISABLED) | |
+ return NULL; | |
+ | |
+ rcu_read_lock(); | |
+ fdb = fdb_find_rcu(&p->br->hash[br_mac_hash(addr, vid)], addr, vid); | |
+ rcu_read_unlock(); | |
+ | |
+ return fdb; | |
+} | |
+EXPORT_SYMBOL_GPL(br_fdb_has_entry); | |
+ | |
static int fdb_to_nud(const struct net_bridge *br, | |
- const struct net_bridge_fdb_entry *fdb) | |
+ const struct net_bridge_fdb_entry *fdb) | |
{ | |
if (fdb->is_local) | |
return NUD_PERMANENT; | |
@@ -687,6 +778,23 @@ static void fdb_notify(struct net_bridge *br, | |
struct sk_buff *skb; | |
int err = -ENOBUFS; | |
+ if (fdb->dst) { | |
+ int event; | |
+ struct br_fdb_event fdb_event; | |
+ | |
+ if (type == RTM_NEWNEIGH) | |
+ event = BR_FDB_EVENT_ADD; | |
+ else | |
+ event = BR_FDB_EVENT_DEL; | |
+ | |
+ fdb_event.dev = fdb->dst->dev; | |
+ ether_addr_copy(fdb_event.addr, fdb->addr.addr); | |
+ fdb_event.is_local = fdb->is_local; | |
+ atomic_notifier_call_chain(&br_fdb_notifier_list, | |
+ event, | |
+ (void *)&fdb_event); | |
+ } | |
+ | |
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); | |
if (skb == NULL) | |
goto errout; | |
@@ -698,6 +806,7 @@ static void fdb_notify(struct net_bridge *br, | |
kfree_skb(skb); | |
goto errout; | |
} | |
+ __br_notify(RTNLGRP_NEIGH, type, fdb); | |
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); | |
return; | |
errout: | |
--- a/net/bridge/br_forward.c | |
+++ b/net/bridge/br_forward.c | |
@@ -33,7 +33,8 @@ static inline int should_deliver(const struct net_bridge_port *p, | |
struct net_bridge_vlan_group *vg; | |
vg = nbp_vlan_group_rcu(p); | |
- return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && | |
+ return ((skb->dev != p->dev) || ((p->flags & BR_HAIRPIN_MODE) && | |
+ (!is_multicast_ether_addr(eth_hdr(skb)->h_dest)))) && | |
br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; | |
} | |
@@ -69,7 +70,7 @@ EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); | |
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) | |
{ | |
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, | |
+ return BR_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, | |
net, sk, skb, NULL, skb->dev, | |
br_dev_queue_push_xmit); | |
@@ -97,7 +98,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) | |
return; | |
} | |
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
+ BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
dev_net(skb->dev), NULL, skb,NULL, skb->dev, | |
br_forward_finish); | |
} | |
@@ -121,7 +122,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) | |
skb->dev = to->dev; | |
skb_forward_csum(skb); | |
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, | |
+ BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, | |
dev_net(indev), NULL, skb, indev, skb->dev, | |
br_forward_finish); | |
} | |
@@ -136,12 +137,11 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) | |
kfree_skb(skb); | |
} | |
-EXPORT_SYMBOL_GPL(br_deliver); | |
/* called with rcu_read_lock */ | |
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) | |
{ | |
- if (to && should_deliver(to, skb)) { | |
+ if (to && should_deliver(to, skb) && !(to->flags & BR_ISOLATE_MODE)) { | |
if (skb0) | |
deliver_clone(to, skb, __br_forward); | |
else | |
@@ -192,12 +192,40 @@ out: | |
return p; | |
} | |
+static struct net_bridge_port *maybe_deliver_addr( | |
+ struct net_bridge_port *prev, struct net_bridge_port *p, | |
+ struct sk_buff *skb, const unsigned char *addr, | |
+ void (*__packet_hook)(const struct net_bridge_port *p, | |
+ struct sk_buff *skb)) | |
+{ | |
+ struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; | |
+ const unsigned char *src = eth_hdr(skb)->h_source; | |
+ | |
+ if (!should_deliver(p, skb)) | |
+ return prev; | |
+ | |
+ /* Even with hairpin, no soliloquies - prevent breaking IPv6 DAD */ | |
+ if (skb->dev == p->dev && ether_addr_equal(src, addr)) | |
+ return prev; | |
+ | |
+ skb = skb_copy(skb, GFP_ATOMIC); | |
+ if (!skb) { | |
+ dev->stats.tx_dropped++; | |
+ return prev; | |
+ } | |
+ | |
+ memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); | |
+ __packet_hook(p, skb); | |
+ | |
+ return prev; | |
+} | |
+ | |
/* called under bridge lock */ | |
static void br_flood(struct net_bridge *br, struct sk_buff *skb, | |
struct sk_buff *skb0, | |
void (*__packet_hook)(const struct net_bridge_port *p, | |
struct sk_buff *skb), | |
- bool unicast) | |
+ bool unicast, bool forward) | |
{ | |
struct net_bridge_port *p; | |
struct net_bridge_port *prev; | |
@@ -205,6 +233,8 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, | |
prev = NULL; | |
list_for_each_entry_rcu(p, &br->port_list, list) { | |
+ if (forward && (p->flags & BR_ISOLATE_MODE)) | |
+ continue; | |
/* Do not flood unicast traffic to ports that turn it off */ | |
if (unicast && !(p->flags & BR_FLOOD)) | |
continue; | |
@@ -239,15 +269,17 @@ out: | |
/* called with rcu_read_lock */ | |
void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) | |
{ | |
- br_flood(br, skb, NULL, __br_deliver, unicast); | |
+ br_flood(br, skb, NULL, __br_deliver, unicast, false); | |
} | |
+EXPORT_SYMBOL_GPL(br_deliver); | |
/* called under bridge lock */ | |
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, | |
struct sk_buff *skb2, bool unicast) | |
{ | |
- br_flood(br, skb, skb2, __br_forward, unicast); | |
+ br_flood(br, skb, skb2, __br_forward, unicast, true); | |
} | |
+EXPORT_SYMBOL_GPL(br_forward); | |
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING | |
/* called with rcu_read_lock */ | |
@@ -262,6 +294,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, | |
struct net_bridge_port *prev = NULL; | |
struct net_bridge_port_group *p; | |
struct hlist_node *rp; | |
+ const unsigned char *addr; | |
rp = rcu_dereference(hlist_first_rcu(&br->router_list)); | |
p = mdst ? rcu_dereference(mdst->ports) : NULL; | |
@@ -272,10 +305,19 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, | |
rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : | |
NULL; | |
- port = (unsigned long)lport > (unsigned long)rport ? | |
- lport : rport; | |
+ if ((unsigned long)lport > (unsigned long)rport) { | |
+ port = lport; | |
+ addr = p->unicast ? p->eth_addr : NULL; | |
+ } else { | |
+ port = rport; | |
+ addr = NULL; | |
+ } | |
- prev = maybe_deliver(prev, port, skb, __packet_hook); | |
+ if (addr) | |
+ prev = maybe_deliver_addr(prev, port, skb, addr, | |
+ __packet_hook); | |
+ else | |
+ prev = maybe_deliver(prev, port, skb, __packet_hook); | |
if (IS_ERR(prev)) | |
goto out; | |
--- a/net/bridge/br_if.c | |
+++ b/net/bridge/br_if.c | |
@@ -1,3 +1,9 @@ | |
+/* | |
+ ************************************************************************** | |
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. | |
+ ************************************************************************** | |
+ */ | |
+ | |
/* | |
* Userspace interface | |
* Linux ethernet bridge | |
@@ -28,6 +34,10 @@ | |
#include "br_private.h" | |
+/* Hook for external forwarding logic */ | |
+br_port_dev_get_hook_t __rcu *br_port_dev_get_hook __read_mostly; | |
+EXPORT_SYMBOL_GPL(br_port_dev_get_hook); | |
+ | |
/* | |
* Determine initial path cost based on speed. | |
* using recommendations from 802.1d standard | |
@@ -456,8 +466,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) | |
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) | |
return -ELOOP; | |
- /* Device is already being bridged */ | |
- if (br_port_exists(dev)) | |
+ /* Device has master upper dev */ | |
+ if (netdev_master_upper_dev_get(dev)) | |
return -EBUSY; | |
/* No bridging devices that dislike that (e.g. wireless) */ | |
@@ -530,6 +540,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) | |
dev_set_mtu(br->dev, br_min_mtu(br)); | |
kobject_uevent(&p->kobj, KOBJ_ADD); | |
+ call_netdevice_notifiers(NETDEV_BR_JOIN, dev); | |
return 0; | |
@@ -561,6 +572,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) | |
if (!p || p->br != br) | |
return -EINVAL; | |
+ call_netdevice_notifiers(NETDEV_BR_LEAVE, dev); | |
+ | |
/* Since more than one interface can be attached to a bridge, | |
* there still maybe an alternate path for netconsole to use; | |
* therefore there is no reason for a NETDEV_RELEASE event. | |
@@ -588,3 +601,86 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) | |
if (mask & BR_AUTO_MASK) | |
nbp_update_port_count(br); | |
} | |
+ | |
+/* br_port_dev_get() | |
+ * If a skb is provided, and the br_port_dev_get_hook_t hook exists, | |
+ * use that to try and determine the egress port for that skb. | |
+ * If not, or no egress port could be determined, use the given addr | |
+ * to identify the port to which it is reachable, | |
+ * returing a reference to the net device associated with that port. | |
+ * | |
+ * NOTE: Return NULL if given dev is not a bridge or the mac has no | |
+ * associated port. | |
+ */ | |
+struct net_device *br_port_dev_get(struct net_device *dev, unsigned char *addr, | |
+ struct sk_buff *skb, | |
+ unsigned int cookie) | |
+{ | |
+ struct net_bridge_fdb_entry *fdbe; | |
+ struct net_bridge *br; | |
+ struct net_device *netdev = NULL; | |
+ | |
+ /* Is this a bridge? */ | |
+ if (!(dev->priv_flags & IFF_EBRIDGE)) | |
+ return NULL; | |
+ | |
+ rcu_read_lock(); | |
+ | |
+ /* If the hook exists and the skb isn't NULL, try and get the port */ | |
+ if (skb) { | |
+ br_port_dev_get_hook_t *port_dev_get_hook; | |
+ | |
+ port_dev_get_hook = rcu_dereference(br_port_dev_get_hook); | |
+ if (port_dev_get_hook) { | |
+ struct net_bridge_port *pdst = | |
+ __br_get(port_dev_get_hook, NULL, dev, skb, | |
+ addr, cookie); | |
+ if (pdst) { | |
+ dev_hold(pdst->dev); | |
+ netdev = pdst->dev; | |
+ goto out; | |
+ } | |
+ } | |
+ } | |
+ | |
+ /* Either there is no hook, or can't | |
+ * determine the port to use - fall back to using FDB | |
+ */ | |
+ | |
+ br = netdev_priv(dev); | |
+ | |
+ /* Lookup the fdb entry and get reference to the port dev */ | |
+ fdbe = __br_fdb_get(br, addr, 0); | |
+ if (fdbe && fdbe->dst) { | |
+ netdev = fdbe->dst->dev; /* port device */ | |
+ dev_hold(netdev); | |
+ } | |
+out: | |
+ rcu_read_unlock(); | |
+ return netdev; | |
+} | |
+EXPORT_SYMBOL_GPL(br_port_dev_get); | |
+ | |
+/* Update bridge statistics for bridge packets processed by offload engines */ | |
+void br_dev_update_stats(struct net_device *dev, | |
+ struct rtnl_link_stats64 *nlstats) | |
+{ | |
+ struct net_bridge *br; | |
+ struct pcpu_sw_netstats *stats; | |
+ | |
+ /* Is this a bridge? */ | |
+ if (!(dev->priv_flags & IFF_EBRIDGE)) | |
+ return; | |
+ | |
+ br = netdev_priv(dev); | |
+ stats = per_cpu_ptr(br->stats, 0); | |
+ | |
+ u64_stats_update_begin(&stats->syncp); | |
+ stats->rx_packets += nlstats->rx_packets; | |
+ stats->rx_bytes += nlstats->rx_bytes; | |
+ stats->tx_packets += nlstats->tx_packets; | |
+ stats->tx_bytes += nlstats->tx_bytes; | |
+ u64_stats_update_end(&stats->syncp); | |
+} | |
+EXPORT_SYMBOL_GPL(br_dev_update_stats); | |
+ | |
--- a/net/bridge/br_input.c | |
+++ b/net/bridge/br_input.c | |
@@ -33,7 +33,15 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) | |
return netif_receive_skb(skb); | |
} | |
-static int br_pass_frame_up(struct sk_buff *skb) | |
+/* Hook for external Multicast handler */ | |
+br_multicast_handle_hook_t __rcu *br_multicast_handle_hook __read_mostly; | |
+EXPORT_SYMBOL_GPL(br_multicast_handle_hook); | |
+ | |
+/* Hook for external forwarding logic */ | |
+br_get_dst_hook_t __rcu *br_get_dst_hook __read_mostly; | |
+EXPORT_SYMBOL_GPL(br_get_dst_hook); | |
+ | |
+int br_pass_frame_up(struct sk_buff *skb) | |
{ | |
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; | |
struct net_bridge *br = netdev_priv(brdev); | |
@@ -62,10 +70,11 @@ static int br_pass_frame_up(struct sk_buff *skb) | |
if (!skb) | |
return NET_RX_DROP; | |
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, | |
+ return BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, | |
dev_net(indev), NULL, skb, indev, NULL, | |
br_netif_receive_skb); | |
} | |
+EXPORT_SYMBOL_GPL(br_pass_frame_up); | |
static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, | |
u16 vid, struct net_bridge_port *p) | |
@@ -135,6 +144,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb | |
struct net_bridge_fdb_entry *dst; | |
struct net_bridge_mdb_entry *mdst; | |
struct sk_buff *skb2; | |
+ struct net_bridge_port *pdst = NULL; | |
+ br_get_dst_hook_t *get_dst_hook = rcu_dereference(br_get_dst_hook); | |
bool unicast = true; | |
u16 vid = 0; | |
@@ -153,7 +164,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb | |
br_multicast_rcv(br, p, skb, vid)) | |
goto drop; | |
- if (p->state == BR_STATE_LEARNING) | |
+ if ((p->state == BR_STATE_LEARNING) && skb->protocol != htons(ETH_P_PAE)) | |
goto drop; | |
BR_INPUT_SKB_CB(skb)->brdev = br->dev; | |
@@ -169,10 +180,19 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb | |
if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) | |
br_do_proxy_arp(skb, br, vid, p); | |
- if (is_broadcast_ether_addr(dest)) { | |
+ if (skb->protocol == htons(ETH_P_PAE)) { | |
+ skb2 = skb; | |
+ /* Do not forward 802.1x/EAP frames */ | |
+ skb = NULL; | |
+ } else if (is_broadcast_ether_addr(dest)) { | |
skb2 = skb; | |
unicast = false; | |
} else if (is_multicast_ether_addr(dest)) { | |
+ br_multicast_handle_hook_t *multicast_handle_hook = | |
+ rcu_dereference(br_multicast_handle_hook); | |
+ if (!__br_get(multicast_handle_hook, true, p, skb)) | |
+ goto out; | |
+ | |
mdst = br_mdb_get(br, skb, vid); | |
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && | |
br_multicast_querier_exists(br, eth_hdr(skb))) { | |
@@ -188,18 +208,31 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb | |
unicast = false; | |
br->dev->stats.multicast++; | |
- } else if ((dst = __br_fdb_get(br, dest, vid)) && | |
- dst->is_local) { | |
- skb2 = skb; | |
- /* Do not forward the packet since it's local. */ | |
- skb = NULL; | |
+ } else { | |
+ pdst = __br_get(get_dst_hook, NULL, p, &skb); | |
+ if (pdst) { | |
+ if (!skb) | |
+ goto out; | |
+ } else { | |
+ dst = __br_fdb_get(br, dest, vid); | |
+ if ((p->flags & BR_ISOLATE_MODE) || | |
+ (dst && dst->is_local)) { | |
+ skb2 = skb; | |
+ /* Do not forward the packet since it's local.*/ | |
+ skb = NULL; | |
+ } | |
+ } | |
} | |
if (skb) { | |
if (dst) { | |
dst->used = jiffies; | |
- br_forward(dst->dst, skb, skb2); | |
- } else | |
+ pdst = dst->dst; | |
+ } | |
+ | |
+ if (pdst) | |
+ br_forward(pdst, skb, skb2); | |
+ else | |
br_flood_forward(br, skb, skb2, unicast); | |
} | |
@@ -218,11 +251,13 @@ EXPORT_SYMBOL_GPL(br_handle_frame_finish); | |
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) | |
{ | |
struct net_bridge_port *p = br_port_get_rcu(skb->dev); | |
- u16 vid = 0; | |
+ if (p->state != BR_STATE_DISABLED) { | |
+ u16 vid = 0; | |
- /* check if vlan is allowed, to avoid spoofing */ | |
- if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) | |
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); | |
+ /* check if vlan is allowed, to avoid spoofing */ | |
+ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) | |
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); | |
+ } | |
return 0; /* process further */ | |
} | |
@@ -285,7 +320,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) | |
} | |
/* Deliver packet to local host only */ | |
- if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, | |
+ if (BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, | |
dev_net(skb->dev), NULL, skb, skb->dev, NULL, | |
br_handle_local_finish)) { | |
return RX_HANDLER_CONSUMED; /* consumed by filter */ | |
@@ -297,6 +332,21 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) | |
forward: | |
switch (p->state) { | |
+ case BR_STATE_DISABLED: | |
+ if (skb->protocol == htons(ETH_P_PAE)) { | |
+ if (ether_addr_equal(p->br->dev->dev_addr, dest)) | |
+ skb->pkt_type = PACKET_HOST; | |
+ | |
+ if (BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, dev_net(skb->dev), NULL, | |
+ skb, skb->dev, NULL, br_handle_local_finish)) | |
+ break; | |
+ | |
+ BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; | |
+ br_pass_frame_up(skb); | |
+ break; | |
+ } | |
+ goto drop; | |
+ | |
case BR_STATE_FORWARDING: | |
rhook = rcu_dereference(br_should_route_hook); | |
if (rhook) { | |
@@ -311,7 +361,7 @@ forward: | |
if (ether_addr_equal(p->br->dev->dev_addr, dest)) | |
skb->pkt_type = PACKET_HOST; | |
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, | |
+ BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, | |
dev_net(skb->dev), NULL, skb, skb->dev, NULL, | |
br_handle_frame_finish); | |
break; | |
--- a/net/bridge/br_mdb.c | |
+++ b/net/bridge/br_mdb.c | |
@@ -416,7 +416,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, | |
break; | |
} | |
- p = br_multicast_new_port_group(port, group, *pp, state); | |
+ p = br_multicast_new_port_group(port, group, *pp, state, NULL); | |
if (unlikely(!p)) | |
return -ENOMEM; | |
rcu_assign_pointer(*pp, p); | |
--- a/net/bridge/br_multicast.c | |
+++ b/net/bridge/br_multicast.c | |
@@ -30,6 +30,7 @@ | |
#include <net/ipv6.h> | |
#include <net/mld.h> | |
#include <net/ip6_checksum.h> | |
+#include <net/ip6_route.h> | |
#include <net/addrconf.h> | |
#endif | |
@@ -42,12 +43,13 @@ static void br_multicast_add_router(struct net_bridge *br, | |
static void br_ip4_multicast_leave_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
__be32 group, | |
- __u16 vid); | |
+ __u16 vid, | |
+ const unsigned char *src); | |
#if IS_ENABLED(CONFIG_IPV6) | |
static void br_ip6_multicast_leave_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
const struct in6_addr *group, | |
- __u16 vid); | |
+ __u16 vid, const unsigned char *src); | |
#endif | |
unsigned int br_mdb_rehash_seq; | |
@@ -652,7 +654,8 @@ struct net_bridge_port_group *br_multicast_new_port_group( | |
struct net_bridge_port *port, | |
struct br_ip *group, | |
struct net_bridge_port_group __rcu *next, | |
- unsigned char state) | |
+ unsigned char state, | |
+ const unsigned char *src) | |
{ | |
struct net_bridge_port_group *p; | |
@@ -667,12 +670,33 @@ struct net_bridge_port_group *br_multicast_new_port_group( | |
hlist_add_head(&p->mglist, &port->mglist); | |
setup_timer(&p->timer, br_multicast_port_group_expired, | |
(unsigned long)p); | |
+ if ((port->flags & BR_MULTICAST_TO_UCAST) && src) { | |
+ memcpy(p->eth_addr, src, ETH_ALEN); | |
+ p->unicast = true; | |
+ } | |
return p; | |
} | |
+static bool br_port_group_equal(struct net_bridge_port_group *p, | |
+ struct net_bridge_port *port, | |
+ const unsigned char *src) | |
+{ | |
+ if (p->port != port) | |
+ return false; | |
+ | |
+ if (!p->unicast) | |
+ return true; | |
+ | |
+ if (!src) | |
+ return false; | |
+ | |
+ return ether_addr_equal(src, p->eth_addr); | |
+} | |
+ | |
static int br_multicast_add_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
- struct br_ip *group) | |
+ struct br_ip *group, | |
+ const unsigned char *src) | |
{ | |
struct net_bridge_mdb_entry *mp; | |
struct net_bridge_port_group *p; | |
@@ -699,13 +723,13 @@ static int br_multicast_add_group(struct net_bridge *br, | |
for (pp = &mp->ports; | |
(p = mlock_dereference(*pp, br)) != NULL; | |
pp = &p->next) { | |
- if (p->port == port) | |
+ if (br_port_group_equal(p, port, src)) | |
goto found; | |
if ((unsigned long)p->port < (unsigned long)port) | |
break; | |
} | |
- p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); | |
+ p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY, src); | |
if (unlikely(!p)) | |
goto err; | |
rcu_assign_pointer(*pp, p); | |
@@ -724,7 +748,7 @@ err: | |
static int br_ip4_multicast_add_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
__be32 group, | |
- __u16 vid) | |
+ __u16 vid, const unsigned char *src) | |
{ | |
struct br_ip br_group; | |
@@ -735,14 +759,14 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, | |
br_group.proto = htons(ETH_P_IP); | |
br_group.vid = vid; | |
- return br_multicast_add_group(br, port, &br_group); | |
+ return br_multicast_add_group(br, port, &br_group, src); | |
} | |
#if IS_ENABLED(CONFIG_IPV6) | |
static int br_ip6_multicast_add_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
const struct in6_addr *group, | |
- __u16 vid) | |
+ __u16 vid, const unsigned char *src) | |
{ | |
struct br_ip br_group; | |
@@ -753,7 +777,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, | |
br_group.proto = htons(ETH_P_IPV6); | |
br_group.vid = vid; | |
- return br_multicast_add_group(br, port, &br_group); | |
+ return br_multicast_add_group(br, port, &br_group, src); | |
} | |
#endif | |
@@ -832,7 +856,7 @@ static void __br_multicast_send_query(struct net_bridge *br, | |
if (port) { | |
skb->dev = port->dev; | |
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
+ BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
dev_net(port->dev), NULL, skb, NULL, skb->dev, | |
br_dev_queue_push_xmit); | |
} else { | |
@@ -1003,6 +1027,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, | |
struct sk_buff *skb, | |
u16 vid) | |
{ | |
+ const unsigned char *src = eth_hdr(skb)->h_source; | |
struct igmpv3_report *ih; | |
struct igmpv3_grec *grec; | |
int i; | |
@@ -1046,9 +1071,9 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, | |
if ((type == IGMPV3_CHANGE_TO_INCLUDE || | |
type == IGMPV3_MODE_IS_INCLUDE) && | |
ntohs(grec->grec_nsrcs) == 0) { | |
- br_ip4_multicast_leave_group(br, port, group, vid); | |
+ br_ip4_multicast_leave_group(br, port, group, vid, src); | |
} else { | |
- err = br_ip4_multicast_add_group(br, port, group, vid); | |
+ err = br_ip4_multicast_add_group(br, port, group, vid, src); | |
if (err) | |
break; | |
} | |
@@ -1063,6 +1088,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, | |
struct sk_buff *skb, | |
u16 vid) | |
{ | |
+ const unsigned char *src = eth_hdr(skb)->h_source; | |
struct icmp6hdr *icmp6h; | |
struct mld2_grec *grec; | |
int i; | |
@@ -1114,10 +1140,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, | |
grec->grec_type == MLD2_MODE_IS_INCLUDE) && | |
ntohs(*nsrcs) == 0) { | |
br_ip6_multicast_leave_group(br, port, &grec->grec_mca, | |
- vid); | |
+ vid, src); | |
} else { | |
err = br_ip6_multicast_add_group(br, port, | |
- &grec->grec_mca, vid); | |
+ &grec->grec_mca, vid, src); | |
if (err) | |
break; | |
} | |
@@ -1432,7 +1458,8 @@ br_multicast_leave_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
struct br_ip *group, | |
struct bridge_mcast_other_query *other_query, | |
- struct bridge_mcast_own_query *own_query) | |
+ struct bridge_mcast_own_query *own_query, | |
+ const unsigned char *src) | |
{ | |
struct net_bridge_mdb_htable *mdb; | |
struct net_bridge_mdb_entry *mp; | |
@@ -1456,7 +1483,7 @@ br_multicast_leave_group(struct net_bridge *br, | |
for (pp = &mp->ports; | |
(p = mlock_dereference(*pp, br)) != NULL; | |
pp = &p->next) { | |
- if (p->port != port) | |
+ if (!br_port_group_equal(p, port, src)) | |
continue; | |
rcu_assign_pointer(*pp, p->next); | |
@@ -1519,7 +1546,7 @@ br_multicast_leave_group(struct net_bridge *br, | |
for (p = mlock_dereference(mp->ports, br); | |
p != NULL; | |
p = mlock_dereference(p->next, br)) { | |
- if (p->port != port) | |
+ if (!br_port_group_equal(p, port, src)) | |
continue; | |
if (!hlist_unhashed(&p->mglist) && | |
@@ -1537,8 +1564,8 @@ out: | |
static void br_ip4_multicast_leave_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
- __be32 group, | |
- __u16 vid) | |
+ __be32 group, __u16 vid, | |
+ const unsigned char *src) | |
{ | |
struct br_ip br_group; | |
struct bridge_mcast_own_query *own_query; | |
@@ -1553,14 +1580,14 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, | |
br_group.vid = vid; | |
br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, | |
- own_query); | |
+ own_query, src); | |
} | |
#if IS_ENABLED(CONFIG_IPV6) | |
static void br_ip6_multicast_leave_group(struct net_bridge *br, | |
struct net_bridge_port *port, | |
const struct in6_addr *group, | |
- __u16 vid) | |
+ __u16 vid, const unsigned char *src) | |
{ | |
struct br_ip br_group; | |
struct bridge_mcast_own_query *own_query; | |
@@ -1575,7 +1602,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, | |
br_group.vid = vid; | |
br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, | |
- own_query); | |
+ own_query, src); | |
} | |
#endif | |
@@ -1584,6 +1611,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, | |
struct sk_buff *skb, | |
u16 vid) | |
{ | |
+ const unsigned char *src; | |
struct sk_buff *skb_trimmed = NULL; | |
struct igmphdr *ih; | |
int err; | |
@@ -1600,12 +1628,13 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, | |
BR_INPUT_SKB_CB(skb)->igmp = 1; | |
ih = igmp_hdr(skb); | |
+ src = eth_hdr(skb)->h_source; | |
switch (ih->type) { | |
case IGMP_HOST_MEMBERSHIP_REPORT: | |
case IGMPV2_HOST_MEMBERSHIP_REPORT: | |
BR_INPUT_SKB_CB(skb)->mrouters_only = 1; | |
- err = br_ip4_multicast_add_group(br, port, ih->group, vid); | |
+ err = br_ip4_multicast_add_group(br, port, ih->group, vid, src); | |
break; | |
case IGMPV3_HOST_MEMBERSHIP_REPORT: | |
err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); | |
@@ -1614,7 +1643,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, | |
err = br_ip4_multicast_query(br, port, skb_trimmed, vid); | |
break; | |
case IGMP_HOST_LEAVE_MESSAGE: | |
- br_ip4_multicast_leave_group(br, port, ih->group, vid); | |
+ br_ip4_multicast_leave_group(br, port, ih->group, vid, src); | |
break; | |
} | |
@@ -1625,11 +1654,268 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, | |
} | |
#if IS_ENABLED(CONFIG_IPV6) | |
+static int br_ndisc_send_na_finish(struct net *net, struct sock *sk, | |
+ struct sk_buff *skb) | |
+{ | |
+ return dev_queue_xmit(skb); | |
+} | |
+ | |
+static int br_ndisc_send_na(struct net_device *dev, | |
+ const struct in6_addr *daddr, | |
+ const struct in6_addr *solicited_addr, | |
+ const u8 *target_lladdr, bool solicited, | |
+ bool override, const u8 *dest_hw) | |
+{ | |
+ struct sk_buff *skb; | |
+ struct nd_msg *msg; | |
+ int hlen = LL_RESERVED_SPACE(dev); | |
+ int tlen = dev->needed_tailroom; | |
+ struct dst_entry *dst; | |
+ struct net *net = dev_net(dev); | |
+ struct sock *sk = net->ipv6.ndisc_sk; | |
+ struct inet6_dev *idev; | |
+ int err; | |
+ struct ipv6hdr *hdr; | |
+ struct icmp6hdr *icmp6h; | |
+ u8 type; | |
+ const struct in6_addr *saddr = solicited_addr; | |
+ int pad, data_len, space; | |
+ u8 *opt; | |
+ | |
+ skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + sizeof(*msg) + | |
+ ndisc_opt_addr_space(dev) + tlen, GFP_ATOMIC); | |
+ if (!skb) | |
+ return -ENOMEM; | |
+ | |
+ skb->protocol = htons(ETH_P_IPV6); | |
+ skb->dev = dev; | |
+ | |
+ skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); | |
+ skb_reset_transport_header(skb); | |
+ | |
+ /* Manually assign socket ownership as we avoid calling | |
+ * sock_alloc_send_pskb() to bypass wmem buffer limits | |
+ */ | |
+ skb_set_owner_w(skb, sk); | |
+ | |
+ msg = (struct nd_msg *)skb_put(skb, sizeof(*msg)); | |
+ *msg = (struct nd_msg) { | |
+ .icmph = { | |
+ .icmp6_type = ICMPV6_NDISC_NBR_ADVERTISEMENT, | |
+ .icmp6_router = false, | |
+ .icmp6_solicited = solicited, | |
+ .icmp6_override = override, | |
+ }, | |
+ .target = *solicited_addr, | |
+ }; | |
+ | |
+ /* We are replying on behalf of other entity. Let that entity's | |
+ * address be the target ll addr and src_addr. | |
+ */ | |
+ pad = ndisc_addr_option_pad(skb->dev->type); | |
+ data_len = skb->dev->addr_len; | |
+ space = ndisc_opt_addr_space(skb->dev); | |
+ opt = skb_put(skb, space); | |
+ | |
+ opt[0] = ND_OPT_TARGET_LL_ADDR; | |
+ opt[1] = space >> 3; | |
+ | |
+ memset(opt + 2, 0, pad); | |
+ opt += pad; | |
+ space -= pad; | |
+ | |
+ memcpy(opt + 2, target_lladdr, dev->addr_len); | |
+ data_len += 2; | |
+ opt += data_len; | |
+ space -= data_len; | |
+ if (space > 0) | |
+ memset(opt, 0, space); | |
+ | |
+ dst = skb_dst(skb); | |
+ icmp6h = icmp6_hdr(skb); | |
+ | |
+ type = icmp6h->icmp6_type; | |
+ | |
+ if (!dst) { | |
+ struct flowi6 fl6; | |
+ | |
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, | |
+ skb->dev->ifindex); | |
+ dst = icmp6_dst_alloc(skb->dev, &fl6); | |
+ if (IS_ERR(dst)) | |
+ goto out; | |
+ | |
+ skb_dst_set(skb, dst); | |
+ } | |
+ | |
+ icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, | |
+ IPPROTO_ICMPV6, | |
+ csum_partial(icmp6h, | |
+ skb->len, 0)); | |
+ | |
+ skb_push(skb, sizeof(*hdr)); | |
+ skb_reset_network_header(skb); | |
+ hdr = ipv6_hdr(skb); | |
+ | |
+ ip6_flow_hdr(hdr, 0, 0); | |
+ | |
+ hdr->payload_len = htons(skb->len - sizeof(*hdr)); | |
+ hdr->nexthdr = IPPROTO_ICMPV6; | |
+ hdr->hop_limit = inet6_sk(sk)->hop_limit; | |
+ | |
+ hdr->saddr = *saddr; | |
+ hdr->daddr = *daddr; | |
+ | |
+ /* We are replying on behalf of another entity. Use that entity's | |
+ * address as the source link layer address if we have all the needed | |
+ * information to build the link layer header. | |
+ */ | |
+ if (dest_hw && | |
+ dev_hard_header(skb, dev, ETH_P_IPV6, dest_hw, target_lladdr, | |
+ skb->len) < 0) | |
+ goto out; | |
+ | |
+ rcu_read_lock(); | |
+ idev = __in6_dev_get(dst->dev); | |
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); | |
+ | |
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, | |
+ dst->dev, dest_hw ? br_ndisc_send_na_finish : dst_output); | |
+ | |
+ if (!err) { | |
+ ICMP6MSGOUT_INC_STATS(net, idev, type); | |
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); | |
+ } | |
+ | |
+ rcu_read_unlock(); | |
+ return 0; | |
+ | |
+out: | |
+ kfree_skb(skb); | |
+ return -EINVAL; | |
+} | |
+ | |
+static const u8 *br_get_ndisc_lladdr(const u8 *opt, int opt_len, | |
+ unsigned int alen) | |
+{ | |
+ const struct nd_opt_hdr *nd_opt = (const struct nd_opt_hdr *)opt; | |
+ | |
+ while (opt_len > sizeof(struct nd_opt_hdr)) { | |
+ int l; | |
+ | |
+ l = nd_opt->nd_opt_len << 3; | |
+ if (opt_len < l || l == 0) | |
+ return NULL; | |
+ | |
+ if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR) { | |
+ if (l >= 2 + alen) | |
+ return (const u8 *)(nd_opt + 1); | |
+ } | |
+ | |
+ opt_len -= l; | |
+ nd_opt = ((void *)nd_opt) + l; | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static void br_do_proxy_ndisc(struct sk_buff *skb, struct net_bridge *br, | |
+ u16 vid, struct net_bridge_port *p) | |
+{ | |
+ struct net_device *dev = br->dev; | |
+ struct nd_msg *msg; | |
+ const struct ipv6hdr *iphdr; | |
+ const struct in6_addr *saddr, *daddr; | |
+ struct neighbour *n, *n_sender = NULL; | |
+ struct net_bridge_fdb_entry *f; | |
+ int ndoptlen; | |
+ bool override = false, solicited = true; | |
+ bool dad; | |
+ const struct in6_addr *daddr_na; | |
+ const u8 *dest_hw = NULL; | |
+ | |
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; | |
+ | |
+ if (!p) | |
+ return; | |
+ | |
+ if (!pskb_may_pull(skb, skb->len)) | |
+ return; | |
+ | |
+ iphdr = ipv6_hdr(skb); | |
+ saddr = &iphdr->saddr; | |
+ daddr = &iphdr->daddr; | |
+ | |
+ msg = (struct nd_msg *)skb_transport_header(skb); | |
+ if (msg->icmph.icmp6_code != 0 || | |
+ msg->icmph.icmp6_type != ICMPV6_NDISC_NBR_SOLICITATION) | |
+ return; | |
+ | |
+ if (ipv6_addr_loopback(daddr) || | |
+ ipv6_addr_is_multicast(&msg->target)) | |
+ return; | |
+ | |
+ n = neigh_lookup(&nd_tbl, &msg->target, dev); | |
+ if (!n) | |
+ return; | |
+ | |
+ if (!(n->nud_state & NUD_VALID)) | |
+ goto out; | |
+ | |
+ f = __br_fdb_get(br, n->ha, vid); | |
+ if (!f) | |
+ goto out; | |
+ | |
+ if (!(p->flags & BR_PROXYARP) && | |
+ !(f->dst && (f->dst->flags & BR_PROXYARP_WIFI))) | |
+ goto out; | |
+ | |
+ dad = ipv6_addr_any(saddr); | |
+ daddr_na = saddr; | |
+ | |
+ if (dad && !ipv6_addr_is_solict_mult(daddr)) | |
+ goto out; | |
+ | |
+ if (dad) { | |
+ override = true; | |
+ solicited = false; | |
+ daddr_na = &in6addr_linklocal_allnodes; | |
+ } | |
+ | |
+ if (!(p->flags & BR_PROXYARP)) { | |
+ ndoptlen = skb_tail_pointer(skb) - | |
+ (skb_transport_header(skb) + | |
+ offsetof(struct nd_msg, opt)); | |
+ dest_hw = br_get_ndisc_lladdr(msg->opt, ndoptlen, | |
+ dev->addr_len); | |
+ if (!dest_hw && !dad) { | |
+ n_sender = neigh_lookup(&nd_tbl, saddr, dev); | |
+ if (n_sender) | |
+ dest_hw = n_sender->ha; | |
+ } | |
+ | |
+ if (dest_hw && is_multicast_ether_addr(dest_hw)) | |
+ dest_hw = NULL; | |
+ } | |
+ | |
+ if (br_ndisc_send_na(dev, daddr_na, &msg->target, n->ha, solicited, | |
+ override, dest_hw)) | |
+ goto out; | |
+ | |
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; | |
+ | |
+out: | |
+ neigh_release(n); | |
+ if (n_sender) | |
+ neigh_release(n_sender); | |
+} | |
+ | |
static int br_multicast_ipv6_rcv(struct net_bridge *br, | |
struct net_bridge_port *port, | |
struct sk_buff *skb, | |
u16 vid) | |
{ | |
+ const unsigned char *src; | |
struct sk_buff *skb_trimmed = NULL; | |
struct mld_msg *mld; | |
int err; | |
@@ -1649,8 +1935,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, | |
switch (mld->mld_type) { | |
case ICMPV6_MGM_REPORT: | |
+ src = eth_hdr(skb)->h_source; | |
BR_INPUT_SKB_CB(skb)->mrouters_only = 1; | |
- err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); | |
+ err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, src); | |
break; | |
case ICMPV6_MLD2_REPORT: | |
err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); | |
@@ -1659,7 +1946,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, | |
err = br_ip6_multicast_query(br, port, skb_trimmed, vid); | |
break; | |
case ICMPV6_MGM_REDUCTION: | |
- br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); | |
+ src = eth_hdr(skb)->h_source; | |
+ br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src); | |
+ break; | |
+ case ICMPV6_NDISC_NBR_SOLICITATION: | |
+ br_do_proxy_ndisc(skb, br, vid, port); | |
break; | |
} | |
--- a/net/bridge/br_netfilter_hooks.c | |
+++ b/net/bridge/br_netfilter_hooks.c | |
@@ -49,6 +49,7 @@ static struct ctl_table_header *brnf_sysctl_header; | |
static int brnf_call_iptables __read_mostly = 1; | |
static int brnf_call_ip6tables __read_mostly = 1; | |
static int brnf_call_arptables __read_mostly = 1; | |
+static int brnf_call_custom __read_mostly; | |
static int brnf_filter_vlan_tagged __read_mostly; | |
static int brnf_filter_pppoe_tagged __read_mostly; | |
static int brnf_pass_vlan_indev __read_mostly; | |
@@ -56,6 +57,7 @@ static int brnf_pass_vlan_indev __read_mostly; | |
#define brnf_call_iptables 1 | |
#define brnf_call_ip6tables 1 | |
#define brnf_call_arptables 1 | |
+#define brnf_call_custom 1 | |
#define brnf_filter_vlan_tagged 0 | |
#define brnf_filter_pppoe_tagged 0 | |
#define brnf_pass_vlan_indev 0 | |
@@ -70,6 +72,15 @@ static int brnf_pass_vlan_indev __read_mostly; | |
#define IS_ARP(skb) \ | |
(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) | |
+int brnf_call_ebtables __read_mostly; | |
+EXPORT_SYMBOL_GPL(brnf_call_ebtables); | |
+ | |
+bool br_netfilter_run_hooks(void) | |
+{ | |
+ return brnf_call_iptables | brnf_call_ip6tables | brnf_call_arptables | | |
+ brnf_call_ebtables | brnf_call_custom; | |
+} | |
+ | |
static inline __be16 vlan_proto(const struct sk_buff *skb) | |
{ | |
if (skb_vlan_tag_present(skb)) | |
@@ -974,6 +985,13 @@ static struct ctl_table brnf_table[] = { | |
.mode = 0644, | |
.proc_handler = brnf_sysctl_call_tables, | |
}, | |
+ { | |
+ .procname = "bridge-nf-call-custom", | |
+ .data = &brnf_call_custom, | |
+ .maxlen = sizeof(int), | |
+ .mode = 0644, | |
+ .proc_handler = brnf_sysctl_call_tables, | |
+ }, | |
{ } | |
}; | |
#endif | |
--- a/net/bridge/br_netlink.c | |
+++ b/net/bridge/br_netlink.c | |
@@ -449,6 +449,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) | |
kfree_skb(skb); | |
goto errout; | |
} | |
+ __br_notify(RTNLGRP_LINK, event, port); | |
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); | |
return; | |
errout: | |
--- a/net/bridge/br_private.h | |
+++ b/net/bridge/br_private.h | |
@@ -21,6 +21,8 @@ | |
#include <net/ip6_fib.h> | |
#include <linux/if_vlan.h> | |
#include <linux/rhashtable.h> | |
+#include <linux/export.h> | |
+#include <linux/netfilter.h> | |
#define BR_HASH_BITS 8 | |
#define BR_HASH_SIZE (1 << BR_HASH_BITS) | |
@@ -158,6 +160,9 @@ struct net_bridge_port_group { | |
struct timer_list timer; | |
struct br_ip addr; | |
unsigned char state; | |
+ | |
+ unsigned char eth_addr[ETH_ALEN]; | |
+ bool unicast; | |
}; | |
struct net_bridge_mdb_entry | |
@@ -504,6 +509,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); | |
void br_manage_promisc(struct net_bridge *br); | |
/* br_input.c */ | |
+int br_pass_frame_up(struct sk_buff *skb); | |
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); | |
rx_handler_result_t br_handle_frame(struct sk_buff **pskb); | |
@@ -555,7 +561,8 @@ void br_multicast_free_pg(struct rcu_head *head); | |
struct net_bridge_port_group * | |
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, | |
struct net_bridge_port_group __rcu *next, | |
- unsigned char state); | |
+ unsigned char state, | |
+ const unsigned char *src); | |
void br_mdb_init(void); | |
void br_mdb_uninit(void); | |
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, | |
@@ -902,15 +909,29 @@ extern const struct nf_br_ops __rcu *nf_br_ops; | |
/* br_netfilter.c */ | |
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) | |
+extern int brnf_call_ebtables; | |
int br_nf_core_init(void); | |
void br_nf_core_fini(void); | |
void br_netfilter_rtable_init(struct net_bridge *); | |
+bool br_netfilter_run_hooks(void); | |
#else | |
static inline int br_nf_core_init(void) { return 0; } | |
static inline void br_nf_core_fini(void) {} | |
#define br_netfilter_rtable_init(x) | |
+#define br_netfilter_run_hooks() false | |
#endif | |
+static inline int | |
+BR_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, | |
+ struct sk_buff *skb, struct net_device *in, struct net_device *out, | |
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *)) | |
+{ | |
+ if (!br_netfilter_run_hooks()) | |
+ return okfn(net, sk, skb); | |
+ | |
+ return NF_HOOK(pf, hook, net, sk, skb, in, out, okfn); | |
+} | |
+ | |
/* br_stp.c */ | |
void br_log_state(const struct net_bridge_port *p); | |
void br_set_state(struct net_bridge_port *p, unsigned int state); | |
@@ -981,4 +1002,15 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } | |
static inline void br_sysfs_delbr(struct net_device *dev) { return; } | |
#endif /* CONFIG_SYSFS */ | |
+#define __br_get(__hook, __default, __args ...) \ | |
+ (__hook ? (__hook(__args)) : (__default)) | |
+ | |
+static inline void __br_notify(int group, int type, const void *data) | |
+{ | |
+ br_notify_hook_t *notify_hook = rcu_dereference(br_notify_hook); | |
+ | |
+ if (notify_hook) | |
+ notify_hook(group, type, data); | |
+} | |
+ | |
#endif | |
--- a/net/bridge/br_stp_bpdu.c | |
+++ b/net/bridge/br_stp_bpdu.c | |
@@ -60,7 +60,7 @@ static void br_send_bpdu(struct net_bridge_port *p, | |
skb_reset_mac_header(skb); | |
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
+ BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, | |
dev_net(p->dev), NULL, skb, NULL, skb->dev, | |
br_send_bpdu_finish); | |
} | |
--- a/net/bridge/br_stp_if.c | |
+++ b/net/bridge/br_stp_if.c | |
@@ -166,6 +166,7 @@ static void br_stp_start(struct net_bridge *br) | |
br_debug(br, "using kernel STP\n"); | |
/* To start timers on any ports left in blocking */ | |
+ mod_timer(&br->hello_timer, jiffies + br->hello_time); | |
br_port_state_selection(br); | |
} | |
--- a/net/bridge/br_stp_timer.c | |
+++ b/net/bridge/br_stp_timer.c | |
@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg) | |
if (br->dev->flags & IFF_UP) { | |
br_config_bpdu_generation(br); | |
- if (br->stp_enabled != BR_USER_STP) | |
+ if (br->stp_enabled == BR_KERNEL_STP) | |
mod_timer(&br->hello_timer, | |
round_jiffies(jiffies + br->hello_time)); | |
} | |
--- a/net/bridge/br_sysfs_if.c | |
+++ b/net/bridge/br_sysfs_if.c | |
@@ -173,6 +173,22 @@ BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); | |
BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); | |
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); | |
+static ssize_t show_isolate_mode(struct net_bridge_port *p, char *buf) | |
+{ | |
+ int isolate_mode = (p->flags & BR_ISOLATE_MODE) ? 1 : 0; | |
+ return sprintf(buf, "%d\n", isolate_mode); | |
+} | |
+static int store_isolate_mode(struct net_bridge_port *p, unsigned long v) | |
+{ | |
+ if (v) | |
+ p->flags |= BR_ISOLATE_MODE; | |
+ else | |
+ p->flags &= ~BR_ISOLATE_MODE; | |
+ return 0; | |
+} | |
+static BRPORT_ATTR(isolate_mode, S_IRUGO | S_IWUSR, | |
+ show_isolate_mode, store_isolate_mode); | |
+ | |
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING | |
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) | |
{ | |
@@ -188,6 +204,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, | |
store_multicast_router); | |
BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); | |
+BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UCAST); | |
#endif | |
static const struct brport_attribute *brport_attrs[] = { | |
@@ -214,9 +231,11 @@ static const struct brport_attribute *brport_attrs[] = { | |
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING | |
&brport_attr_multicast_router, | |
&brport_attr_multicast_fast_leave, | |
+ &brport_attr_multicast_to_unicast, | |
#endif | |
&brport_attr_proxyarp, | |
&brport_attr_proxyarp_wifi, | |
+ &brport_attr_isolate_mode, | |
NULL | |
}; | |
--- a/net/bridge/netfilter/ebtables.c | |
+++ b/net/bridge/netfilter/ebtables.c | |
@@ -2416,11 +2416,13 @@ static int __init ebtables_init(void) | |
} | |
printk(KERN_INFO "Ebtables v2.0 registered\n"); | |
+ brnf_call_ebtables = 1; | |
return 0; | |
} | |
static void __exit ebtables_fini(void) | |
{ | |
+ brnf_call_ebtables = 0; | |
nf_unregister_sockopt(&ebt_sockopts); | |
xt_unregister_target(&ebt_standard_target); | |
printk(KERN_INFO "Ebtables v2.0 unregistered\n"); | |
--- a/net/core/Makefile | |
+++ b/net/core/Makefile | |
@@ -9,8 +9,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | |
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ | |
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ | |
- sock_diag.o dev_ioctl.o tso.o | |
+ dev_ioctl.o tso.o | |
+obj-$(CONFIG_SOCK_DIAG) += sock_diag.o | |
obj-$(CONFIG_XFRM) += flow.o | |
obj-y += net-sysfs.o | |
obj-$(CONFIG_PROC_FS) += net-procfs.o | |
@@ -24,3 +25,5 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o | |
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o | |
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o | |
obj-$(CONFIG_LWTUNNEL) += lwtunnel.o | |
+obj-$(CONFIG_SKB_RECYCLER) += skbuff_recycle.o | |
+obj-$(CONFIG_DEBUG_OBJECTS_SKBUFF) += skbuff_debug.o skbuff_notifier.o | |
--- a/net/core/dev.c | |
+++ b/net/core/dev.c | |
@@ -139,6 +139,7 @@ | |
#include <linux/netfilter_ingress.h> | |
#include "net-sysfs.h" | |
+#include "skbuff_debug.h" | |
/* Instead of increasing this, you should create a hash table. */ | |
#define MAX_GRO_SKBS 8 | |
@@ -2732,13 +2733,28 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, | |
unsigned int len; | |
int rc; | |
- if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) | |
- dev_queue_xmit_nit(skb, dev); | |
+ /* If this skb has been fast forwarded then we don't want it to | |
+ * go to any taps (by definition we're trying to bypass them). | |
+ */ | |
+ if (unlikely(!skb->fast_forwarded)) { | |
+ if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) | |
+ dev_queue_xmit_nit(skb, dev); | |
+ } | |
- len = skb->len; | |
- trace_net_dev_start_xmit(skb, dev); | |
- rc = netdev_start_xmit(skb, dev, txq, more); | |
- trace_net_dev_xmit(skb, rc, dev, len); | |
+#ifdef CONFIG_ETHERNET_PACKET_MANGLE | |
+ if (!dev->eth_mangle_tx || | |
+ (skb = dev->eth_mangle_tx(dev, skb)) != NULL) | |
+#else | |
+ if (1) | |
+#endif | |
+ { | |
+ len = skb->len; | |
+ trace_net_dev_start_xmit(skb, dev); | |
+ rc = netdev_start_xmit(skb, dev, txq, more); | |
+ trace_net_dev_xmit(skb, rc, dev, len); | |
+ } else { | |
+ rc = NETDEV_TX_OK; | |
+ } | |
return rc; | |
} | |
@@ -3813,6 +3829,9 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |
} | |
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | |
+int (*athrs_fast_nat_recv)(struct sk_buff *skb) __rcu __read_mostly; | |
+EXPORT_SYMBOL_GPL(athrs_fast_nat_recv); | |
+ | |
/* | |
* Limit the use of PFMEMALLOC reserves to those protocols that implement | |
* the special handling of PFMEMALLOC skbs. | |
@@ -3855,6 +3874,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) | |
bool deliver_exact = false; | |
int ret = NET_RX_DROP; | |
__be16 type; | |
+ int (*fast_recv)(struct sk_buff *skb); | |
net_timestamp_check(!netdev_tstamp_prequeue, skb); | |
@@ -3881,6 +3901,14 @@ another_round: | |
goto out; | |
} | |
+ fast_recv = rcu_dereference(athrs_fast_nat_recv); | |
+ if (fast_recv) { | |
+ if (fast_recv(skb)) { | |
+ ret = NET_RX_SUCCESS; | |
+ goto out; | |
+ } | |
+ } | |
+ | |
#ifdef CONFIG_NET_CLS_ACT | |
if (skb->tc_verd & TC_NCLS) { | |
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | |
@@ -4246,6 +4274,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff | |
enum gro_result ret; | |
int grow; | |
+ if (skb->gro_skip) | |
+ goto normal; | |
+ | |
if (!(skb->dev->features & NETIF_F_GRO)) | |
goto normal; | |
@@ -4388,6 +4419,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | |
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { | |
skb_dst_drop(skb); | |
kmem_cache_free(skbuff_head_cache, skb); | |
+ skbuff_debugobj_deactivate(skb); | |
} else { | |
__kfree_skb(skb); | |
} | |
@@ -4824,6 +4856,14 @@ void netif_napi_del(struct napi_struct *napi) | |
} | |
EXPORT_SYMBOL(netif_napi_del); | |
+struct napi_struct *get_current_napi_context(void) | |
+{ | |
+ struct softnet_data *sd = this_cpu_ptr(&softnet_data); | |
+ | |
+ return sd->current_napi; | |
+} | |
+EXPORT_SYMBOL(get_current_napi_context); | |
+ | |
static int napi_poll(struct napi_struct *n, struct list_head *repoll) | |
{ | |
void *have; | |
@@ -5405,6 +5445,48 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, | |
&upper_dev->adj_list.lower); | |
} | |
+static void __netdev_addr_mask(unsigned char *mask, const unsigned char *addr, | |
+ struct net_device *dev) | |
+{ | |
+ int i; | |
+ | |
+ for (i = 0; i < dev->addr_len; i++) | |
+ mask[i] |= addr[i] ^ dev->dev_addr[i]; | |
+} | |
+ | |
+static void __netdev_upper_mask(unsigned char *mask, struct net_device *dev, | |
+ struct net_device *lower) | |
+{ | |
+ struct net_device *cur; | |
+ struct list_head *iter; | |
+ | |
+ netdev_for_each_upper_dev_rcu(dev, cur, iter) { | |
+ __netdev_addr_mask(mask, cur->dev_addr, lower); | |
+ __netdev_upper_mask(mask, cur, lower); | |
+ } | |
+} | |
+ | |
+static void __netdev_update_addr_mask(struct net_device *dev) | |
+{ | |
+ unsigned char mask[MAX_ADDR_LEN]; | |
+ struct net_device *cur; | |
+ struct list_head *iter; | |
+ | |
+ memset(mask, 0, sizeof(mask)); | |
+ __netdev_upper_mask(mask, dev, dev); | |
+ memcpy(dev->local_addr_mask, mask, dev->addr_len); | |
+ | |
+ netdev_for_each_lower_dev(dev, cur, iter) | |
+ __netdev_update_addr_mask(cur); | |
+} | |
+ | |
+static void netdev_update_addr_mask(struct net_device *dev) | |
+{ | |
+ rcu_read_lock(); | |
+ __netdev_update_addr_mask(dev); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
static int __netdev_upper_dev_link(struct net_device *dev, | |
struct net_device *upper_dev, bool master, | |
void *private) | |
@@ -5476,6 +5558,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, | |
goto rollback_lower_mesh; | |
} | |
+ netdev_update_addr_mask(dev); | |
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, | |
&changeupper_info.info); | |
return 0; | |
@@ -5602,6 +5685,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, | |
list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) | |
__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); | |
+ netdev_update_addr_mask(dev); | |
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, | |
&changeupper_info.info); | |
} | |
@@ -6142,6 +6226,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |
if (err) | |
return err; | |
dev->addr_assign_type = NET_ADDR_SET; | |
+ netdev_update_addr_mask(dev); | |
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | |
add_device_randomness(dev->dev_addr, dev->addr_len); | |
return 0; | |
@@ -6453,6 +6538,18 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, | |
#endif | |
features &= ~NETIF_F_BUSY_POLL; | |
+ if (!(features & NETIF_F_RXCSUM)) { | |
+ /* NETIF_F_GRO_HW implies doing RXCSUM since every packet | |
+ * successfully merged by hardware must also have the | |
+ * checksum verified by hardware. If the user does not | |
+ * want to enable RXCSUM, logically, we should disable GRO_HW. | |
+ */ | |
+ if (features & NETIF_F_GRO_HW) { | |
+ netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n"); | |
+ features &= ~NETIF_F_GRO_HW; | |
+ } | |
+ } | |
+ | |
return features; | |
} | |
--- a/net/core/ethtool.c | |
+++ b/net/core/ethtool.c | |
@@ -73,6 +73,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] | |
[NETIF_F_LLTX_BIT] = "tx-lockless", | |
[NETIF_F_NETNS_LOCAL_BIT] = "netns-local", | |
[NETIF_F_GRO_BIT] = "rx-gro", | |
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", | |
[NETIF_F_LRO_BIT] = "rx-lro", | |
[NETIF_F_TSO_BIT] = "tx-tcp-segmentation", | |
--- a/net/core/flow_dissector.c | |
+++ b/net/core/flow_dissector.c | |
@@ -95,7 +95,7 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, | |
ports = __skb_header_pointer(skb, thoff + poff, | |
sizeof(_ports), data, hlen, &_ports); | |
if (ports) | |
- return *ports; | |
+ return (__be32)net_hdr_word(ports); | |
} | |
return 0; | |
--- a/net/core/neighbour.c | |
+++ b/net/core/neighbour.c | |
@@ -687,7 +687,7 @@ void neigh_destroy(struct neighbour *neigh) | |
NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); | |
if (!neigh->dead) { | |
- pr_warn("Destroying alive neighbour %p\n", neigh); | |
+ pr_warn("Destroying alive neighbour %pK\n", neigh); | |
dump_stack(); | |
return; | |
} | |
@@ -1049,7 +1049,19 @@ static void neigh_update_hhs(struct neighbour *neigh) | |
} | |
} | |
+ATOMIC_NOTIFIER_HEAD(neigh_mac_update_notifier_list); | |
+void neigh_mac_update_register_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_register(&neigh_mac_update_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(neigh_mac_update_register_notify); | |
+ | |
+void neigh_mac_update_unregister_notify(struct notifier_block *nb) | |
+{ | |
+ atomic_notifier_chain_unregister(&neigh_mac_update_notifier_list, nb); | |
+} | |
+EXPORT_SYMBOL_GPL(neigh_mac_update_unregister_notify); | |
/* Generic update routine. | |
-- lladdr is new lladdr or NULL, if it is not supplied. | |
@@ -1080,6 +1092,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |
int notify = 0; | |
struct net_device *dev; | |
int update_isrouter = 0; | |
+ struct neigh_mac_update nmu; | |
write_lock_bh(&neigh->lock); | |
@@ -1087,6 +1100,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |
old = neigh->nud_state; | |
err = -EPERM; | |
+ memset(&nmu, 0, sizeof(struct neigh_mac_update)); | |
+ | |
if (!(flags & NEIGH_UPDATE_F_ADMIN) && | |
(old & (NUD_NOARP | NUD_PERMANENT))) | |
goto out; | |
@@ -1117,7 +1132,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, | |
and a new address is proposed: | |
- compare new & old | |
- if they are different, check override flag | |
+ - copy old and new addresses for neigh update notification | |
*/ | |
+ memcpy(nmu.old_mac, neigh->ha, dev->addr_len); | |
+ memcpy(nmu.update_mac, lladdr, dev->addr_len); | |
+ | |
if ((old & NUD_VALID) && | |
!memcmp(lladdr, neigh->ha, dev->addr_len)) | |
lladdr = neigh->ha; | |
@@ -1231,8 +1250,11 @@ out: | |
} | |
write_unlock_bh(&neigh->lock); | |
- if (notify) | |
+ if (notify) { | |
neigh_update_notify(neigh); | |
+ atomic_notifier_call_chain(&neigh_mac_update_notifier_list, 0, | |
+ (struct neigh_mac_update *)&nmu); | |
+ } | |
return err; | |
} | |
@@ -3225,4 +3247,3 @@ static int __init neigh_init(void) | |
} | |
subsys_initcall(neigh_init); | |
- | |
--- a/net/core/net-procfs.c | |
+++ b/net/core/net-procfs.c | |
@@ -318,10 +318,12 @@ static int __net_init dev_proc_net_init(struct net *net) | |
if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops)) | |
goto out; | |
- if (!proc_create("softnet_stat", S_IRUGO, net->proc_net, | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && | |
+ !proc_create("softnet_stat", S_IRUGO, net->proc_net, | |
&softnet_seq_fops)) | |
goto out_dev; | |
- if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && | |
+ !proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops)) | |
goto out_softnet; | |
if (wext_proc_init(net)) | |
@@ -330,9 +332,11 @@ static int __net_init dev_proc_net_init(struct net *net) | |
out: | |
return rc; | |
out_ptype: | |
- remove_proc_entry("ptype", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ remove_proc_entry("ptype", net->proc_net); | |
out_softnet: | |
- remove_proc_entry("softnet_stat", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ remove_proc_entry("softnet_stat", net->proc_net); | |
out_dev: | |
remove_proc_entry("dev", net->proc_net); | |
goto out; | |
@@ -342,8 +346,10 @@ static void __net_exit dev_proc_net_exit(struct net *net) | |
{ | |
wext_proc_exit(net); | |
- remove_proc_entry("ptype", net->proc_net); | |
- remove_proc_entry("softnet_stat", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { | |
+ remove_proc_entry("ptype", net->proc_net); | |
+ remove_proc_entry("softnet_stat", net->proc_net); | |
+ } | |
remove_proc_entry("dev", net->proc_net); | |
} | |
--- a/net/core/net_namespace.c | |
+++ b/net/core/net_namespace.c | |
@@ -261,7 +261,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) | |
spin_lock_irqsave(&net->nsid_lock, flags); | |
peer = idr_find(&net->netns_ids, id); | |
if (peer) | |
- get_net(peer); | |
+ peer = maybe_get_net(peer); | |
spin_unlock_irqrestore(&net->nsid_lock, flags); | |
rcu_read_unlock(); | |
--- a/net/core/secure_seq.c | |
+++ b/net/core/secure_seq.c | |
@@ -46,11 +46,12 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | |
u32 secret[MD5_MESSAGE_BYTES / 4]; | |
u32 hash[MD5_DIGEST_WORDS]; | |
u32 i; | |
+ const struct in6_addr *daddr6 = (struct in6_addr *) daddr; | |
net_secret_init(); | |
memcpy(hash, saddr, 16); | |
for (i = 0; i < 4; i++) | |
- secret[i] = net_secret[i] + (__force u32)daddr[i]; | |
+ secret[i] = net_secret[i] + (__force u32)daddr6->s6_addr32[i]; | |
secret[4] = net_secret[4] + | |
(((__force u16)sport << 16) + (__force u16)dport); | |
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) | |
@@ -68,11 +69,12 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, | |
u32 secret[MD5_MESSAGE_BYTES / 4]; | |
u32 hash[MD5_DIGEST_WORDS]; | |
u32 i; | |
+ const struct in6_addr *daddr6 = (struct in6_addr *) daddr; | |
net_secret_init(); | |
memcpy(hash, saddr, 16); | |
for (i = 0; i < 4; i++) | |
- secret[i] = net_secret[i] + (__force u32) daddr[i]; | |
+ secret[i] = net_secret[i] + (__force u32) daddr6->s6_addr32[i]; | |
secret[4] = net_secret[4] + (__force u32)dport; | |
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) | |
secret[i] = net_secret[i]; | |
@@ -146,6 +148,7 @@ EXPORT_SYMBOL(secure_dccp_sequence_number); | |
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | |
__be16 sport, __be16 dport) | |
{ | |
+ const struct in6_addr *daddr6 = (struct in6_addr *) daddr; | |
u32 secret[MD5_MESSAGE_BYTES / 4]; | |
u32 hash[MD5_DIGEST_WORDS]; | |
u64 seq; | |
@@ -154,7 +157,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | |
net_secret_init(); | |
memcpy(hash, saddr, 16); | |
for (i = 0; i < 4; i++) | |
- secret[i] = net_secret[i] + (__force u32)daddr[i]; | |
+ secret[i] = net_secret[i] + (__force u32)daddr6->s6_addr32[i]; | |
secret[4] = net_secret[4] + | |
(((__force u16)sport << 16) + (__force u16)dport); | |
for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) | |
--- a/net/core/skbuff.c | |
+++ b/net/core/skbuff.c | |
@@ -63,6 +63,7 @@ | |
#include <linux/errqueue.h> | |
#include <linux/prefetch.h> | |
#include <linux/if_vlan.h> | |
+#include <linux/if.h> | |
#include <net/protocol.h> | |
#include <net/dst.h> | |
@@ -77,6 +78,9 @@ | |
#include <linux/capability.h> | |
#include <linux/user_namespace.h> | |
+#include "skbuff_recycle.h" | |
+#include "skbuff_debug.h" | |
+ | |
struct kmem_cache *skbuff_head_cache __read_mostly; | |
static struct kmem_cache *skbuff_fclone_cache __read_mostly; | |
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; | |
@@ -166,6 +170,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) | |
gfp_mask & ~__GFP_DMA, node); | |
if (!skb) | |
goto out; | |
+ skbuff_debugobj_init_and_activate(skb); | |
/* | |
* Only clear those fields we need to clear, not those that we will | |
@@ -218,6 +223,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | |
if (!skb) | |
goto out; | |
+ skbuff_debugobj_init_and_activate(skb); | |
prefetchw(skb); | |
/* We do our best to align skb_shared_info on a separate cache | |
@@ -275,6 +281,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |
out: | |
return skb; | |
nodata: | |
+ skbuff_debugobj_deactivate(skb); | |
kmem_cache_free(cache, skb); | |
skb = NULL; | |
goto out; | |
@@ -309,6 +316,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) | |
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); | |
if (!skb) | |
return NULL; | |
+ skbuff_debugobj_init_and_activate(skb); | |
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | |
@@ -394,7 +402,7 @@ EXPORT_SYMBOL(napi_alloc_frag); | |
/** | |
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device | |
* @dev: network device to receive on | |
- * @len: length to allocate | |
+ * @length: length to allocate | |
* @gfp_mask: get_free_pages mask, passed to alloc_skb | |
* | |
* Allocate a new &sk_buff and assign it a usage count of one. The | |
@@ -404,19 +412,56 @@ EXPORT_SYMBOL(napi_alloc_frag); | |
* | |
* %NULL is returned if there is no free memory. | |
*/ | |
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
- gfp_t gfp_mask) | |
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |
+ unsigned int length, gfp_t gfp_mask) | |
{ | |
+#ifndef CONFIG_SKB_RECYCLER | |
struct page_frag_cache *nc; | |
unsigned long flags; | |
- struct sk_buff *skb; | |
bool pfmemalloc; | |
+ bool page_frag_alloc_enable = true; | |
void *data; | |
+#endif | |
+ | |
+ struct sk_buff *skb; | |
+ unsigned int len = length; | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER | |
+ skb = skb_recycler_alloc(dev, length); | |
+ if (likely(skb)) { | |
+ /* SKBs in the recycler are from various unknown sources. | |
+ * Their truesize is unknown. We should set truesize | |
+ * as the needed buffer size before using it. | |
+ */ | |
+ skb->truesize = SKB_TRUESIZE(SKB_DATA_ALIGN(len + NET_SKB_PAD)); | |
+ return skb; | |
+ } | |
+ | |
+ len = SKB_RECYCLE_SIZE; | |
+ if (unlikely(length > SKB_RECYCLE_SIZE)) | |
+ len = length; | |
+ | |
+ skb = __alloc_skb(len + NET_SKB_PAD, gfp_mask, | |
+ SKB_ALLOC_RX, NUMA_NO_NODE); | |
+ if (!skb) | |
+ goto skb_fail; | |
+ | |
+ /* Set truesize as the needed buffer size | |
+ * rather than the allocated size by __alloc_skb(). | |
+ */ | |
+ if (length + NET_SKB_PAD < SKB_WITH_OVERHEAD(PAGE_SIZE)) | |
+ skb->truesize = SKB_TRUESIZE(SKB_DATA_ALIGN(length + NET_SKB_PAD)); | |
+ goto skb_success; | |
+#else | |
len += NET_SKB_PAD; | |
+#ifdef CONFIG_ALLOC_SKB_PAGE_FRAG_DISABLE | |
+ page_frag_alloc_enable = false; | |
+#endif | |
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || | |
- (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { | |
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA)) || | |
+ !page_frag_alloc_enable) { | |
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); | |
if (!skb) | |
goto skb_fail; | |
@@ -450,6 +495,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
if (pfmemalloc) | |
skb->pfmemalloc = 1; | |
skb->head_frag = 1; | |
+#endif | |
skb_success: | |
skb_reserve(skb, NET_SKB_PAD); | |
@@ -520,6 +566,22 @@ skb_fail: | |
} | |
EXPORT_SYMBOL(__napi_alloc_skb); | |
+struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, | |
+ unsigned int length, gfp_t gfp) | |
+{ | |
+ struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); | |
+ | |
+#ifdef CONFIG_ETHERNET_PACKET_MANGLE | |
+ if (dev && (dev->priv_flags & IFF_NO_IP_ALIGN)) | |
+ return skb; | |
+#endif | |
+ | |
+ if (NET_IP_ALIGN && skb) | |
+ skb_reserve(skb, NET_IP_ALIGN); | |
+ return skb; | |
+} | |
+EXPORT_SYMBOL(__netdev_alloc_skb_ip_align); | |
+ | |
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, | |
int size, unsigned int truesize) | |
{ | |
@@ -571,7 +633,7 @@ static void skb_free_head(struct sk_buff *skb) | |
kfree(head); | |
} | |
-static void skb_release_data(struct sk_buff *skb) | |
+void skb_release_data(struct sk_buff *skb) | |
{ | |
struct skb_shared_info *shinfo = skb_shinfo(skb); | |
int i; | |
@@ -605,12 +667,13 @@ static void skb_release_data(struct sk_buff *skb) | |
/* | |
* Free an skbuff by memory without cleaning the state. | |
*/ | |
-static void kfree_skbmem(struct sk_buff *skb) | |
+void kfree_skbmem(struct sk_buff *skb) | |
{ | |
struct sk_buff_fclones *fclones; | |
switch (skb->fclone) { | |
case SKB_FCLONE_UNAVAILABLE: | |
+ skbuff_debugobj_deactivate(skb); | |
kmem_cache_free(skbuff_head_cache, skb); | |
return; | |
@@ -631,7 +694,9 @@ static void kfree_skbmem(struct sk_buff *skb) | |
} | |
if (!atomic_dec_and_test(&fclones->fclone_ref)) | |
return; | |
+ | |
fastpath: | |
+ skbuff_debugobj_deactivate(&fclones->skb1); | |
kmem_cache_free(skbuff_fclone_cache, fclones); | |
} | |
@@ -740,12 +805,38 @@ void consume_skb(struct sk_buff *skb) | |
{ | |
if (unlikely(!skb)) | |
return; | |
+ | |
+ prefetch(&skb->destructor); | |
+ | |
if (likely(atomic_read(&skb->users) == 1)) | |
smp_rmb(); | |
else if (likely(!atomic_dec_and_test(&skb->users))) | |
return; | |
+ | |
+ /* If possible we'd like to recycle any skb rather than just free it, | |
+ * but in order to do that we need to release any head state too. | |
+ * We don't want to do this later because we'll be in a pre-emption | |
+ * disabled state. | |
+ */ | |
+ skb_release_head_state(skb); | |
+ | |
+ /* Can we recycle this skb? If we can then it will be much faster | |
+ * for us to recycle this one later than to allocate a new one | |
+ * from scratch. | |
+ */ | |
+ if (likely(skb->head) && likely(skb_recycler_consume(skb))) | |
+ return; | |
+ | |
trace_consume_skb(skb); | |
- __kfree_skb(skb); | |
+ | |
+ /* We're not recycling so now we need to do the rest of what we would | |
+ * have done in __kfree_skb (above and beyond the skb_release_head_state | |
+ * that we already did). | |
+ */ | |
+ if (likely(skb->head)) | |
+ skb_release_data(skb); | |
+ | |
+ kfree_skbmem(skb); | |
} | |
EXPORT_SYMBOL(consume_skb); | |
@@ -956,6 +1047,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | |
if (!n) | |
return NULL; | |
+ skbuff_debugobj_init_and_activate(n); | |
kmemcheck_annotate_bitfield(n, flags1); | |
n->fclone = SKB_FCLONE_UNAVAILABLE; | |
@@ -3327,6 +3419,7 @@ void __init skb_init(void) | |
0, | |
SLAB_HWCACHE_ALIGN|SLAB_PANIC, | |
NULL); | |
+ skb_recycler_init(); | |
} | |
/** | |
@@ -4115,6 +4208,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) | |
{ | |
if (head_stolen) { | |
skb_release_head_state(skb); | |
+ skbuff_debugobj_deactivate(skb); | |
kmem_cache_free(skbuff_head_cache, skb); | |
} else { | |
__kfree_skb(skb); | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_debug.c | |
@@ -0,0 +1,316 @@ | |
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. | |
+ * | |
+ * Permission to use, copy, modify, and/or distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+#include <asm/stacktrace.h> | |
+#include <asm/current.h> | |
+#include <linux/sched.h> | |
+#include <linux/module.h> | |
+#include <linux/smp.h> | |
+ | |
+#include "skbuff_debug.h" | |
+#include "skbuff_notifier.h" | |
+#include "skbuff_recycle.h" | |
+ | |
+static int skbuff_debugobj_enabled __read_mostly = 1; | |
+ | |
+static int skbuff_debug_event_handler(struct notifier_block *nb, | |
+ unsigned long action, void *data); | |
+static struct notifier_block skbuff_debug_notify = { | |
+ .notifier_call = skbuff_debug_event_handler, | |
+ .priority = 0 | |
+}; | |
+ | |
+inline u32 skbuff_debugobj_sum(struct sk_buff *skb) | |
+{ | |
+ int pos = offsetof(struct sk_buff, free_addr); | |
+ u32 sum = 0; | |
+ | |
+ while (pos--) | |
+ sum += ((u8 *)skb)[pos]; | |
+ | |
+ return sum; | |
+} | |
+ | |
+struct skbuff_debugobj_walking { | |
+ int pos; | |
+ void **d; | |
+}; | |
+ | |
+static int skbuff_debugobj_walkstack(struct stackframe *frame, void *p) | |
+{ | |
+ struct skbuff_debugobj_walking *w = (struct skbuff_debugobj_walking *)p; | |
+ unsigned long pc = frame->pc; | |
+ | |
+ if (w->pos < DEBUG_OBJECTS_SKBUFF_STACKSIZE - 1) { | |
+ w->d[w->pos++] = (void *)pc; | |
+ return 0; | |
+ } | |
+ | |
+ return -ENOENT; | |
+} | |
+ | |
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) | |
+static void skbuff_debugobj_get_stack(void **ret) | |
+{ | |
+ struct stackframe frame; | |
+ | |
+ register unsigned long current_sp asm ("sp"); | |
+ struct skbuff_debugobj_walking w = {0, ret}; | |
+ void *p = &w; | |
+ | |
+ frame.fp = (unsigned long)__builtin_frame_address(0); | |
+ frame.sp = current_sp; | |
+ | |
+#ifdef CONFIG_ARM | |
+ frame.lr = (unsigned long)__builtin_return_address(0); | |
+#endif | |
+ | |
+ frame.pc = (unsigned long)skbuff_debugobj_get_stack; | |
+ | |
+ walk_stackframe(&frame, skbuff_debugobj_walkstack, p); | |
+ | |
+ ret[w.pos] = NULL; | |
+} | |
+#else | |
+#error | |
+static void skbuff_debugobj_get_stack(void **ret) | |
+{ | |
+ /* not supported */ | |
+ ret[0] = 0xdeadbeef; | |
+} | |
+#endif | |
+ | |
+void skbuff_debugobj_print_stack(void *const *stack) | |
+{ | |
+ int i; | |
+ | |
+ for (i = 0; stack[i]; i++) | |
+ pr_emerg("\t %pS (0x%p)\n", stack[i], stack[i]); | |
+} | |
+ | |
+static const char *skbuff_debugobj_state_name(const struct sk_buff *skb) | |
+{ | |
+ int obj_state; | |
+ | |
+ obj_state = debug_object_get_state((struct sk_buff *)skb); | |
+ switch (obj_state) { | |
+ case ODEBUG_STATE_NONE: | |
+ return "none"; | |
+ case ODEBUG_STATE_INIT: | |
+ return "init"; | |
+ case ODEBUG_STATE_INACTIVE: | |
+ return "inactive"; | |
+ case ODEBUG_STATE_ACTIVE: | |
+ return "active"; | |
+ case ODEBUG_STATE_DESTROYED: | |
+ return "destroyed"; | |
+ case ODEBUG_STATE_NOTAVAILABLE: | |
+ return "not available"; | |
+ default: | |
+ return "invalid"; | |
+ } | |
+} | |
+ | |
+void skbuff_debugobj_print_skb(const struct sk_buff *skb) | |
+{ | |
+ pr_emerg("skb_debug: current process = %s (pid %i)\n", | |
+ current->comm, current->pid); | |
+ pr_emerg("skb_debug: skb 0x%p, next 0x%p, prev 0x%p, state = %s\n", skb, | |
+ skb->next, skb->prev, skbuff_debugobj_state_name(skb)); | |
+ pr_emerg("skb_debug: free stack:\n"); | |
+ skbuff_debugobj_print_stack(skb->free_addr); | |
+ pr_emerg("skb_debug: alloc stack:\n"); | |
+ skbuff_debugobj_print_stack(skb->alloc_addr); | |
+} | |
+EXPORT_SYMBOL(skbuff_debugobj_print_skb); | |
+ | |
+/* skbuff_debugobj_fixup(): | |
+ * Called when an error is detected in the state machine for | |
+ * the objects | |
+ */ | |
+static int skbuff_debugobj_fixup(void *addr, enum debug_obj_state state) | |
+{ | |
+ struct sk_buff *skb = (struct sk_buff *)addr; | |
+ ftrace_dump(DUMP_ALL); | |
+ WARN(1, "skb_debug: state = %d, skb = 0x%p sum = %d (now %d)\n", | |
+ state, skb, skb->sum, skbuff_debugobj_sum(skb)); | |
+ skb_recycler_notifier_send_event(SKB_RECYCLER_NOTIFIER_FSM, skb); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static struct debug_obj_descr skbuff_debug_descr = { | |
+ .name = "sk_buff_struct", | |
+ .fixup_init = skbuff_debugobj_fixup, | |
+ .fixup_activate = skbuff_debugobj_fixup, | |
+ .fixup_destroy = skbuff_debugobj_fixup, | |
+ .fixup_free = skbuff_debugobj_fixup, | |
+}; | |
+ | |
+inline void skbuff_debugobj_activate(struct sk_buff *skb) | |
+{ | |
+ int ret = 0; | |
+ | |
+ if (!skbuff_debugobj_enabled) | |
+ return; | |
+ | |
+ skbuff_debugobj_get_stack(skb->alloc_addr); | |
+ ret = debug_object_activate(skb, &skbuff_debug_descr); | |
+ if (ret) | |
+ goto err_act; | |
+ | |
+ skbuff_debugobj_sum_validate(skb); | |
+ | |
+ return; | |
+ | |
+err_act: | |
+ ftrace_dump(DUMP_ALL); | |
+ WARN(1, "skb_debug: failed to activate err = %d skb = 0x%p sum = %d (now %d)\n", | |
+ ret, skb, skb->sum, skbuff_debugobj_sum(skb)); | |
+ skb_recycler_notifier_send_event(SKB_RECYCLER_NOTIFIER_DBLALLOC, skb); | |
+} | |
+ | |
+inline void skbuff_debugobj_init_and_activate(struct sk_buff *skb) | |
+{ | |
+ if (!skbuff_debugobj_enabled) | |
+ return; | |
+ | |
+ /* if we're coming from the slab, the skb->sum might | |
+ * be invalid anyways | |
+ */ | |
+ skb->sum = skbuff_debugobj_sum(skb); | |
+ | |
+ debug_object_init(skb, &skbuff_debug_descr); | |
+ skbuff_debugobj_activate(skb); | |
+} | |
+ | |
+inline void skbuff_debugobj_deactivate(struct sk_buff *skb) | |
+{ | |
+ int obj_state; | |
+ | |
+ if (!skbuff_debugobj_enabled) | |
+ return; | |
+ | |
+ skb->sum = skbuff_debugobj_sum(skb); | |
+ | |
+ obj_state = debug_object_get_state(skb); | |
+ | |
+ if (obj_state == ODEBUG_STATE_ACTIVE) { | |
+ debug_object_deactivate(skb, &skbuff_debug_descr); | |
+ skbuff_debugobj_get_stack(skb->free_addr); | |
+ return; | |
+ } | |
+ | |
+ ftrace_dump(DUMP_ALL); | |
+ WARN(1, "skb_debug: deactivating inactive object skb=0x%p state=%d sum = %d (now %d)\n", | |
+ skb, obj_state, skb->sum, skbuff_debugobj_sum(skb)); | |
+ skb_recycler_notifier_send_event(SKB_RECYCLER_NOTIFIER_DBLFREE, skb); | |
+} | |
+ | |
+inline void _skbuff_debugobj_sum_validate(struct sk_buff *skb, | |
+ const char *var, const char *src, | |
+ int line, const char *fxn) | |
+{ | |
+ if (!skbuff_debugobj_enabled || !skb) | |
+ return; | |
+ | |
+ if (skb->sum == skbuff_debugobj_sum(skb)) | |
+ return; | |
+ | |
+ ftrace_dump(DUMP_ALL); | |
+ WARN(1, "skb_debug: skb sum changed skb = 0x%p sum = %d (now %d)\n", | |
+ skb, skb->sum, skbuff_debugobj_sum(skb)); | |
+ pr_emerg("skb_debug: %s() checking %s in %s:%d\n", fxn, var, src, line); | |
+ skb_recycler_notifier_send_event(SKB_RECYCLER_NOTIFIER_SUMERR, skb); | |
+} | |
+ | |
+inline void skbuff_debugobj_sum_update(struct sk_buff *skb) | |
+{ | |
+ if (!skbuff_debugobj_enabled || !skb) | |
+ return; | |
+ | |
+ skb->sum = skbuff_debugobj_sum(skb); | |
+} | |
+ | |
+inline void skbuff_debugobj_destroy(struct sk_buff *skb) | |
+{ | |
+ if (!skbuff_debugobj_enabled) | |
+ return; | |
+ | |
+ debug_object_destroy(skb, &skbuff_debug_descr); | |
+} | |
+ | |
+static int __init disable_object_debug(char *str) | |
+{ | |
+ skbuff_debugobj_enabled = 0; | |
+ | |
+ pr_info("skb_debug: debug objects is disabled\n"); | |
+ return 0; | |
+} | |
+ | |
+early_param("no_skbuff_debug_objects", disable_object_debug); | |
+ | |
+void skbuff_debugobj_print_skb_list(const struct sk_buff *skb_list, | |
+ const char *list_title, int cpu) | |
+{ | |
+ int count; | |
+ struct sk_buff *skb_i = (struct sk_buff *)skb_list; | |
+ u32 sum_i, sum_now; | |
+ int obj_state; | |
+ | |
+ if (cpu < 0) { | |
+ cpu = get_cpu(); | |
+ put_cpu(); | |
+ } | |
+ pr_emerg("skb_debug: start skb list '%s' [CPU#%d]\n", list_title, cpu); | |
+ count = 0; | |
+ if (skb_list) { | |
+ do { | |
+ obj_state = | |
+ debug_object_get_state(skb_i); | |
+ if (obj_state < ODEBUG_STATE_NOTAVAILABLE) { | |
+ sum_i = skb_i->sum; | |
+ sum_now = skbuff_debugobj_sum(skb_i); | |
+ } else { | |
+ sum_i = 0; | |
+ sum_now = 0; | |
+ } | |
+ pr_emerg("skb_debug: [%02d] skb 0x%p, next 0x%p, prev 0x%p, state %d (%s), sum %d (now %d)\n", | |
+ count, skb_i, skb_i->next, skb_i->prev, | |
+ obj_state, skbuff_debugobj_state_name(skb_i), | |
+ sum_i, sum_now); | |
+ skb_i = skb_i->next; | |
+ count++; | |
+ } while (skb_list != skb_i); | |
+ } | |
+ pr_emerg("skb_debug: end skb list '%s'\n", list_title); | |
+} | |
+ | |
+void skbuff_debugobj_register_callback(void) | |
+{ | |
+ skb_recycler_notifier_register(&skbuff_debug_notify); | |
+} | |
+ | |
+int skbuff_debug_event_handler(struct notifier_block *nb, unsigned long action, | |
+ void *data) | |
+{ | |
+ struct sk_buff *skb = (struct sk_buff *)data; | |
+ | |
+ pr_emerg("skb_debug: notifier event %lu\n", action); | |
+ skbuff_debugobj_print_skb(skb); | |
+ skb_recycler_print_all_lists(); | |
+ | |
+ return NOTIFY_DONE; | |
+} | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_debug.h | |
@@ -0,0 +1,49 @@ | |
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved. | |
+ * | |
+ * Permission to use, copy, modify, and/or distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+#include <linux/skbuff.h> | |
+#include <linux/debugobjects.h> | |
+ | |
+#ifndef _LINUX_SKBBUFF_DEBUG_OBJECTS | |
+#define _LINUX_SKBBUFF_DEBUG_OBJECTS | |
+ | |
+#ifdef CONFIG_DEBUG_OBJECTS_SKBUFF | |
+void skbuff_debugobj_init_and_activate(struct sk_buff *skb); | |
+void skbuff_debugobj_activate(struct sk_buff *skb); | |
+void skbuff_debugobj_deactivate(struct sk_buff *skb); | |
+void skbuff_debugobj_destroy(struct sk_buff *skb); | |
+#define skbuff_debugobj_sum_validate(skb) _skbuff_debugobj_sum_validate(skb, \ | |
+ #skb, __FILE__, __LINE__, __func__) | |
+void _skbuff_debugobj_sum_validate(struct sk_buff *skb, const char *var, | |
+ const char *src, int line, const char *fxn); | |
+void skbuff_debugobj_sum_update(struct sk_buff *skb); | |
+void skbuff_debugobj_print_skb(const struct sk_buff *skb); | |
+void skbuff_debugobj_print_skb_list(const struct sk_buff *skb_list, | |
+ const char *list_title, int cpu); | |
+void skbuff_debugobj_register_callback(void); | |
+#else | |
+static inline void skbuff_debugobj_init_and_activate(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_activate(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_deactivate(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_destroy(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_sum_validate(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_sum_update(struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_print_skb(const struct sk_buff *skb) { } | |
+static inline void skbuff_debugobj_print_skb_list | |
+ (const struct sk_buff *skb_list, const char *list_title, int cpu) { } | |
+static inline void skbuff_debugobj_register_callback(void) { } | |
+#endif | |
+ | |
+#endif /* _LINUX_SKBBUFF_DEBUG_OBJECTS */ | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_notifier.c | |
@@ -0,0 +1,41 @@ | |
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved. | |
+ * | |
+ * Permission to use, copy, modify, and/or distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+ | |
+/* Notifier interface for the SKB Recycler */ | |
+ | |
+#include "skbuff_notifier.h" | |
+ | |
+static BLOCKING_NOTIFIER_HEAD(skb_recycler_notifier); | |
+ | |
+int skb_recycler_notifier_register(struct notifier_block *nb) | |
+{ | |
+ return blocking_notifier_chain_register(&skb_recycler_notifier, nb); | |
+} | |
+EXPORT_SYMBOL(skb_recycler_notifier_register); | |
+ | |
+int skb_recycler_notifier_unregister(struct notifier_block *nb) | |
+{ | |
+ return blocking_notifier_chain_unregister(&skb_recycler_notifier, nb); | |
+} | |
+EXPORT_SYMBOL(skb_recycler_notifier_unregister); | |
+ | |
+int skb_recycler_notifier_send_event(unsigned long action, struct sk_buff *skb) | |
+{ | |
+ int ret; | |
+ | |
+ ret = blocking_notifier_call_chain(&skb_recycler_notifier, action, skb); | |
+ | |
+ return 0; | |
+} | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_notifier.h | |
@@ -0,0 +1,51 @@ | |
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved. | |
+* | |
+* Permission to use, copy, modify, and/or distribute this software for any | |
+* purpose with or without fee is hereby granted, provided that the above | |
+* copyright notice and this permission notice appear in all copies. | |
+* | |
+* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+*/ | |
+ | |
+#ifndef SKBUFF_NOTIFIER_H | |
+#define SKBUFF_NOTIFIER_H | |
+ | |
+#include <linux/notifier.h> | |
+#include <linux/skbuff.h> | |
+ | |
+/* notifier events */ | |
+#define SKB_RECYCLER_NOTIFIER_SUMERR 0x0001 | |
+#define SKB_RECYCLER_NOTIFIER_DBLFREE 0x0002 | |
+#define SKB_RECYCLER_NOTIFIER_DBLALLOC 0x0004 | |
+#define SKB_RECYCLER_NOTIFIER_FSM 0x0008 | |
+ | |
+#if defined(CONFIG_DEBUG_OBJECTS_SKBUFF) | |
+int skb_recycler_notifier_register(struct notifier_block *nb); | |
+int skb_recycler_notifier_unregister(struct notifier_block *nb); | |
+int skb_recycler_notifier_send_event(unsigned long action, | |
+ struct sk_buff *skb); | |
+#else | |
+static inline int skb_recycler_notifier_register(struct notifier_block *nb) | |
+{ | |
+ return 0; | |
+} | |
+ | |
+static inline int skb_recycler_notifier_unregister(struct notifier_block *nb) | |
+{ | |
+ return 0; | |
+} | |
+ | |
+static inline int skb_recycler_notifier_send_event(unsigned long action, | |
+ struct sk_buff *skb) | |
+{ | |
+ return 1; | |
+} | |
+#endif /* CONFIG_DEBUG_OBJECTS_SKBUFF */ | |
+ | |
+#endif /* SKBUFF_NOTIFIER_H */ | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_recycle.c | |
@@ -0,0 +1,582 @@ | |
+/* Copyright (c) 2013-2016, 2019, The Linux Foundation. All rights reserved. | |
+ * | |
+ * Permission to use, copy, modify, and/or distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ */ | |
+/* Generic skb recycler */ | |
+#include "skbuff_recycle.h" | |
+#include <linux/proc_fs.h> | |
+#include <linux/string.h> | |
+ | |
+#include "skbuff_debug.h" | |
+ | |
+static struct proc_dir_entry *proc_net_skbrecycler; | |
+ | |
+static DEFINE_PER_CPU(struct sk_buff_head, recycle_list); | |
+static int skb_recycle_max_skbs = SKB_RECYCLE_MAX_SKBS; | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+static DEFINE_PER_CPU(struct sk_buff_head, recycle_spare_list); | |
+static struct global_recycler glob_recycler; | |
+static int skb_recycle_spare_max_skbs = SKB_RECYCLE_SPARE_MAX_SKBS; | |
+#endif | |
+ | |
+inline struct sk_buff *skb_recycler_alloc(struct net_device *dev, | |
+ unsigned int length) | |
+{ | |
+ unsigned long flags; | |
+ struct sk_buff_head *h; | |
+ struct sk_buff *skb = NULL; | |
+ struct sk_buff *ln = NULL; | |
+ | |
+ if (unlikely(length > SKB_RECYCLE_SIZE)) | |
+ return NULL; | |
+ | |
+ h = &get_cpu_var(recycle_list); | |
+ local_irq_save(flags); | |
+ skb = skb_peek(h); | |
+ if (skb) { | |
+ ln = skb_peek_next(skb, h); | |
+ skbuff_debugobj_activate(skb); | |
+ /* Recalculate the sum for skb->next as next and prev pointers | |
+ * of skb->next will be updated in __skb_unlink | |
+ */ | |
+ skbuff_debugobj_sum_validate(ln); | |
+ __skb_unlink(skb, h); | |
+ skbuff_debugobj_sum_update(ln); | |
+ } | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ if (unlikely(!skb)) { | |
+ u8 head; | |
+ | |
+ spin_lock(&glob_recycler.lock); | |
+ /* If global recycle list is not empty, use global buffers */ | |
+ head = glob_recycler.head; | |
+ if (unlikely(head == glob_recycler.tail)) { | |
+ spin_unlock(&glob_recycler.lock); | |
+ } else { | |
+ struct sk_buff *gn = glob_recycler.pool[head].next; | |
+ struct sk_buff *gp = glob_recycler.pool[head].prev; | |
+ | |
+ /* Move SKBs from global list to CPU pool */ | |
+ skbuff_debugobj_sum_validate(gn); | |
+ skbuff_debugobj_sum_validate(gp); | |
+ skb_queue_splice_init(&glob_recycler.pool[head], h); | |
+ skbuff_debugobj_sum_update(gn); | |
+ skbuff_debugobj_sum_update(gp); | |
+ | |
+ head = (head + 1) & SKB_RECYCLE_MAX_SHARED_POOLS_MASK; | |
+ glob_recycler.head = head; | |
+ spin_unlock(&glob_recycler.lock); | |
+ /* We have refilled the CPU pool - dequeue */ | |
+ skb = skb_peek(h); | |
+ if (skb) { | |
+ /* Recalculate the sum for skb->next as next and | |
+ * prev pointers of skb->next will be updated | |
+ * in __skb_unlink | |
+ */ | |
+ ln = skb_peek_next(skb, h); | |
+ skbuff_debugobj_activate(skb); | |
+ skbuff_debugobj_sum_validate(ln); | |
+ __skb_unlink(skb, h); | |
+ skbuff_debugobj_sum_update(ln); | |
+ } | |
+ } | |
+ } | |
+#endif | |
+ local_irq_restore(flags); | |
+ put_cpu_var(recycle_list); | |
+ | |
+ if (likely(skb)) { | |
+ struct skb_shared_info *shinfo; | |
+ | |
+ /* We're about to write a large amount to the skb to | |
+ * zero most of the structure so prefetch the start | |
+ * of the shinfo region now so it's in the D-cache | |
+ * before we start to write that. | |
+ */ | |
+ shinfo = skb_shinfo(skb); | |
+ prefetchw(shinfo); | |
+ | |
+ zero_struct(skb, offsetof(struct sk_buff, tail)); | |
+ atomic_set(&skb->users, 1); | |
+ skb->mac_header = (typeof(skb->mac_header))~0U; | |
+ skb->transport_header = (typeof(skb->transport_header))~0U; | |
+ zero_struct(shinfo, offsetof(struct skb_shared_info, dataref)); | |
+ atomic_set(&shinfo->dataref, 1); | |
+ | |
+ skb->data = skb->head + NET_SKB_PAD; | |
+ skb_reset_tail_pointer(skb); | |
+ | |
+ skb->dev = dev; | |
+ } | |
+ | |
+ return skb; | |
+} | |
+ | |
+inline bool skb_recycler_consume(struct sk_buff *skb) | |
+{ | |
+ unsigned long flags; | |
+ struct sk_buff_head *h; | |
+ struct sk_buff *ln = NULL; | |
+ /* Can we recycle this skb? If not, simply return that we cannot */ | |
+ if (unlikely(!consume_skb_can_recycle(skb, SKB_RECYCLE_MIN_SIZE, | |
+ SKB_RECYCLE_MAX_SIZE))) | |
+ return false; | |
+ | |
+ /* If we can, then it will be much faster for us to recycle this one | |
+ * later than to allocate a new one from scratch. | |
+ */ | |
+ h = &get_cpu_var(recycle_list); | |
+ local_irq_save(flags); | |
+ /* Attempt to enqueue the CPU hot recycle list first */ | |
+ if (likely(skb_queue_len(h) < skb_recycle_max_skbs)) { | |
+ ln = skb_peek(h); | |
+ /* Recalculate the sum for peek of list as next and prev | |
+ * pointers of skb->next will be updated in __skb_queue_head | |
+ */ | |
+ skbuff_debugobj_sum_validate(ln); | |
+ __skb_queue_head(h, skb); | |
+ skbuff_debugobj_deactivate(skb); | |
+ skbuff_debugobj_sum_update(ln); | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+ return true; | |
+ } | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ h = this_cpu_ptr(&recycle_spare_list); | |
+ | |
+ /* The CPU hot recycle list was full; if the spare list is also full, | |
+ * attempt to move the spare list to the global list for other CPUs to | |
+ * use. | |
+ */ | |
+ if (unlikely(skb_queue_len(h) >= skb_recycle_spare_max_skbs)) { | |
+ u8 cur_tail, next_tail; | |
+ | |
+ spin_lock(&glob_recycler.lock); | |
+ cur_tail = glob_recycler.tail; | |
+ next_tail = (cur_tail + 1) & SKB_RECYCLE_MAX_SHARED_POOLS_MASK; | |
+ if (next_tail != glob_recycler.head) { | |
+ struct sk_buff_head *p = &glob_recycler.pool[cur_tail]; | |
+ struct sk_buff *hn = h->next, *hp = h->prev; | |
+ | |
+ /* Move SKBs from CPU pool to Global pool*/ | |
+ skbuff_debugobj_sum_validate(hp); | |
+ skbuff_debugobj_sum_validate(hn); | |
+ skb_queue_splice_init(h, p); | |
+ skbuff_debugobj_sum_update(hp); | |
+ skbuff_debugobj_sum_update(hn); | |
+ | |
+ /* Done with global list init */ | |
+ glob_recycler.tail = next_tail; | |
+ spin_unlock(&glob_recycler.lock); | |
+ | |
+ /* Recalculate the sum for peek of list as next and prev | |
+ * pointers of skb->next will be updated in | |
+ * __skb_queue_head | |
+ */ | |
+ ln = skb_peek(h); | |
+ skbuff_debugobj_sum_validate(ln); | |
+ /* We have now cleared room in the spare; | |
+ * Initialize and enqueue skb into spare | |
+ */ | |
+ __skb_queue_head(h, skb); | |
+ skbuff_debugobj_sum_update(ln); | |
+ skbuff_debugobj_deactivate(skb); | |
+ | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+ return true; | |
+ } | |
+ /* We still have a full spare because the global is also full */ | |
+ spin_unlock(&glob_recycler.lock); | |
+ } else { | |
+ /* We have room in the spare list; enqueue to spare list */ | |
+ ln = skb_peek(h); | |
+ /* Recalculate the sum for peek of list as next and prev | |
+ * pointers of skb->next will be updated in __skb_queue_head | |
+ */ | |
+ skbuff_debugobj_sum_validate(ln); | |
+ __skb_queue_head(h, skb); | |
+ skbuff_debugobj_deactivate(skb); | |
+ skbuff_debugobj_sum_update(ln); | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+ return true; | |
+ } | |
+#endif | |
+ | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+ | |
+ return false; | |
+} | |
+ | |
+static void skb_recycler_free_skb(struct sk_buff_head *list) | |
+{ | |
+ struct sk_buff *skb = NULL; | |
+ unsigned long flags; | |
+ | |
+ spin_lock_irqsave(&list->lock, flags); | |
+ while ((skb = skb_peek(list)) != NULL) { | |
+ skbuff_debugobj_activate(skb); | |
+ __skb_unlink(skb, list); | |
+ skb_release_data(skb); | |
+ kfree_skbmem(skb); | |
+ } | |
+ spin_unlock_irqrestore(&list->lock, flags); | |
+} | |
+ | |
+static int skb_cpu_callback(struct notifier_block *nfb, | |
+ unsigned long action, void *ocpu) | |
+{ | |
+ unsigned long oldcpu = (unsigned long)ocpu; | |
+ | |
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
+ skb_recycler_free_skb(&per_cpu(recycle_list, oldcpu)); | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ spin_lock(&glob_recycler.lock); | |
+ skb_recycler_free_skb(&per_cpu(recycle_spare_list, oldcpu)); | |
+ spin_unlock(&glob_recycler.lock); | |
+#endif | |
+ } | |
+ | |
+ return NOTIFY_OK; | |
+} | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_PREALLOC | |
+static int __init skb_prealloc_init_list(void) | |
+{ | |
+ int i; | |
+ struct sk_buff *skb; | |
+ | |
+ for (i = 0; i < SKB_RECYCLE_MAX_PREALLOC_SKBS; i++) { | |
+ skb = __alloc_skb(SKB_RECYCLE_MAX_SIZE + NET_SKB_PAD, | |
+ GFP_KERNEL, 0, NUMA_NO_NODE); | |
+ if (unlikely(!skb)) | |
+ return -ENOMEM; | |
+ | |
+ skb_reserve(skb, NET_SKB_PAD); | |
+ | |
+ skb_recycler_consume(skb); | |
+ } | |
+ return 0; | |
+} | |
+#endif | |
+ | |
+/* procfs: count | |
+ * Show skb counts | |
+ */ | |
+static int proc_skb_count_show(struct seq_file *seq, void *v) | |
+{ | |
+ int cpu; | |
+ int len; | |
+ int total; | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ unsigned int i; | |
+ unsigned long flags; | |
+#endif | |
+ | |
+ total = 0; | |
+ | |
+ for_each_online_cpu(cpu) { | |
+ len = skb_queue_len(&per_cpu(recycle_list, cpu)); | |
+ seq_printf(seq, "recycle_list[%d]: %d\n", cpu, len); | |
+ total += len; | |
+ } | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ for_each_online_cpu(cpu) { | |
+ len = skb_queue_len(&per_cpu(recycle_spare_list, cpu)); | |
+ seq_printf(seq, "recycle_spare_list[%d]: %d\n", cpu, len); | |
+ total += len; | |
+ } | |
+ | |
+ for (i = 0; i < SKB_RECYCLE_MAX_SHARED_POOLS; i++) { | |
+ spin_lock_irqsave(&glob_recycler.lock, flags); | |
+ len = skb_queue_len(&glob_recycler.pool[i]); | |
+ spin_unlock_irqrestore(&glob_recycler.lock, flags); | |
+ seq_printf(seq, "global_list[%d]: %d\n", i, len); | |
+ total += len; | |
+ } | |
+#endif | |
+ | |
+ seq_printf(seq, "total: %d\n", total); | |
+ return 0; | |
+} | |
+ | |
+static int proc_skb_count_open(struct inode *inode, struct file *file) | |
+{ | |
+ return single_open(file, proc_skb_count_show, PDE_DATA(inode)); | |
+} | |
+ | |
+static const struct file_operations proc_skb_count_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = proc_skb_count_open, | |
+ .read = seq_read, | |
+ .llseek = seq_lseek, | |
+ .release = single_release, | |
+}; | |
+ | |
+/* procfs: flush | |
+ * Flush skbs | |
+ */ | |
+static void skb_recycler_flush_task(struct work_struct *work) | |
+{ | |
+ unsigned long flags; | |
+ struct sk_buff_head *h; | |
+ struct sk_buff_head tmp; | |
+ | |
+ skb_queue_head_init(&tmp); | |
+ | |
+ h = &get_cpu_var(recycle_list); | |
+ local_irq_save(flags); | |
+ skb_queue_splice_init(h, &tmp); | |
+ local_irq_restore(flags); | |
+ put_cpu_var(recycle_list); | |
+ skb_recycler_free_skb(&tmp); | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ h = &get_cpu_var(recycle_spare_list); | |
+ local_irq_save(flags); | |
+ skb_queue_splice_init(h, &tmp); | |
+ local_irq_restore(flags); | |
+ put_cpu_var(recycle_spare_list); | |
+ skb_recycler_free_skb(&tmp); | |
+#endif | |
+} | |
+ | |
+static ssize_t proc_skb_flush_write(struct file *file, | |
+ const char __user *buf, | |
+ size_t count, | |
+ loff_t *ppos) | |
+{ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ unsigned int i; | |
+ unsigned long flags; | |
+#endif | |
+ schedule_on_each_cpu(&skb_recycler_flush_task); | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ spin_lock_irqsave(&glob_recycler.lock, flags); | |
+ for (i = 0; i < SKB_RECYCLE_MAX_SHARED_POOLS; i++) | |
+ skb_recycler_free_skb(&glob_recycler.pool[i]); | |
+ glob_recycler.head = 0; | |
+ glob_recycler.tail = 0; | |
+ spin_unlock_irqrestore(&glob_recycler.lock, flags); | |
+#endif | |
+ return count; | |
+} | |
+ | |
+static const struct file_operations proc_skb_flush_fops = { | |
+ .owner = THIS_MODULE, | |
+ .write = proc_skb_flush_write, | |
+ .open = simple_open, | |
+ .llseek = noop_llseek, | |
+}; | |
+ | |
+/* procfs: max_skbs | |
+ * Show max skbs | |
+ */ | |
+static int proc_skb_max_skbs_show(struct seq_file *seq, void *v) | |
+{ | |
+ seq_printf(seq, "%d\n", skb_recycle_max_skbs); | |
+ return 0; | |
+} | |
+ | |
+static int proc_skb_max_skbs_open(struct inode *inode, struct file *file) | |
+{ | |
+ return single_open(file, proc_skb_max_skbs_show, PDE_DATA(inode)); | |
+} | |
+ | |
+static ssize_t proc_skb_max_skbs_write(struct file *file, | |
+ const char __user *buf, | |
+ size_t count, | |
+ loff_t *ppos) | |
+{ | |
+ int ret; | |
+ int max; | |
+ char buffer[13]; | |
+ | |
+ memset(buffer, 0, sizeof(buffer)); | |
+ if (count > sizeof(buffer) - 1) | |
+ count = sizeof(buffer) - 1; | |
+ if (copy_from_user(buffer, buf, count) != 0) | |
+ return -EFAULT; | |
+ ret = kstrtoint(strstrip(buffer), 10, &max); | |
+ if (ret == 0 && max >= 0) | |
+ skb_recycle_max_skbs = max; | |
+ | |
+ return count; | |
+} | |
+ | |
+static const struct file_operations proc_skb_max_skbs_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = proc_skb_max_skbs_open, | |
+ .read = seq_read, | |
+ .write = proc_skb_max_skbs_write, | |
+ .release = single_release, | |
+}; | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+/* procfs: max_spare_skbs | |
+ * Show max spare skbs | |
+ */ | |
+static int proc_skb_max_spare_skbs_show(struct seq_file *seq, void *v) | |
+{ | |
+ seq_printf(seq, "%d\n", skb_recycle_spare_max_skbs); | |
+ return 0; | |
+} | |
+ | |
+static int proc_skb_max_spare_skbs_open(struct inode *inode, struct file *file) | |
+{ | |
+ return single_open(file, | |
+ proc_skb_max_spare_skbs_show, | |
+ PDE_DATA(inode)); | |
+} | |
+ | |
+static ssize_t | |
+proc_skb_max_spare_skbs_write(struct file *file, | |
+ const char __user *buf, | |
+ size_t count, | |
+ loff_t *ppos) | |
+{ | |
+ int ret; | |
+ int max; | |
+ char buffer[13]; | |
+ | |
+ memset(buffer, 0, sizeof(buffer)); | |
+ if (count > sizeof(buffer) - 1) | |
+ count = sizeof(buffer) - 1; | |
+ if (copy_from_user(buffer, buf, count) != 0) | |
+ return -EFAULT; | |
+ ret = kstrtoint(strstrip(buffer), 10, &max); | |
+ if (ret == 0 && max >= 0) | |
+ skb_recycle_spare_max_skbs = max; | |
+ | |
+ return count; | |
+} | |
+ | |
+static const struct file_operations proc_skb_max_spare_skbs_fops = { | |
+ .owner = THIS_MODULE, | |
+ .open = proc_skb_max_spare_skbs_open, | |
+ .read = seq_read, | |
+ .write = proc_skb_max_spare_skbs_write, | |
+ .release = single_release, | |
+}; | |
+#endif /* CONFIG_SKB_RECYCLER_MULTI_CPU */ | |
+ | |
+static void skb_recycler_init_procfs(void) | |
+{ | |
+ proc_net_skbrecycler = proc_mkdir("skb_recycler", init_net.proc_net); | |
+ if (!proc_net_skbrecycler) { | |
+ pr_err("cannot create skb_recycle proc dir"); | |
+ return; | |
+ } | |
+ | |
+ if (!proc_create("count", | |
+ S_IRUGO, | |
+ proc_net_skbrecycler, | |
+ &proc_skb_count_fops)) | |
+ pr_err("cannot create proc net skb_recycle held\n"); | |
+ | |
+ if (!proc_create("flush", | |
+ S_IWUGO, | |
+ proc_net_skbrecycler, | |
+ &proc_skb_flush_fops)) | |
+ pr_err("cannot create proc net skb_recycle flush\n"); | |
+ | |
+ if (!proc_create("max_skbs", | |
+ S_IRUGO | S_IWUGO, | |
+ proc_net_skbrecycler, | |
+ &proc_skb_max_skbs_fops)) | |
+ pr_err("cannot create proc net skb_recycle max_skbs\n"); | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ if (!proc_create("max_spare_skbs", | |
+ S_IRUGO | S_IWUGO, | |
+ proc_net_skbrecycler, | |
+ &proc_skb_max_spare_skbs_fops)) | |
+ pr_err("cannot create proc net skb_recycle max_spare_skbs\n"); | |
+#endif | |
+} | |
+ | |
+void __init skb_recycler_init(void) | |
+{ | |
+ int cpu; | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ unsigned int i; | |
+#endif | |
+ | |
+ for_each_possible_cpu(cpu) { | |
+ skb_queue_head_init(&per_cpu(recycle_list, cpu)); | |
+ } | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ for_each_possible_cpu(cpu) { | |
+ skb_queue_head_init(&per_cpu(recycle_spare_list, cpu)); | |
+ } | |
+ | |
+ spin_lock_init(&glob_recycler.lock); | |
+ | |
+ for (i = 0; i < SKB_RECYCLE_MAX_SHARED_POOLS; i++) | |
+ skb_queue_head_init(&glob_recycler.pool[i]); | |
+ glob_recycler.head = 0; | |
+ glob_recycler.tail = 0; | |
+#endif | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_PREALLOC | |
+ if (skb_prealloc_init_list()) | |
+ pr_err("Failed to preallocate SKBs for recycle list\n"); | |
+#endif | |
+ | |
+ hotcpu_notifier(skb_cpu_callback, 0); | |
+ skbuff_debugobj_register_callback(); | |
+ skb_recycler_init_procfs(); | |
+} | |
+ | |
+void skb_recycler_print_all_lists(void) | |
+{ | |
+ unsigned long flags; | |
+ int cpu; | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+ int i; | |
+ | |
+ spin_lock_irqsave(&glob_recycler.lock, flags); | |
+ for (i = 0; i < SKB_RECYCLE_MAX_SHARED_POOLS; i++) | |
+ skbuff_debugobj_print_skb_list((&glob_recycler.pool[i])->next, | |
+ "Global Pool", -1); | |
+ spin_unlock_irqrestore(&glob_recycler.lock, flags); | |
+ | |
+ preempt_disable(); | |
+ local_irq_save(flags); | |
+ for_each_possible_cpu(cpu) { | |
+ struct sk_buff_head *h; | |
+ | |
+ h = &per_cpu(recycle_spare_list, cpu); | |
+ skbuff_debugobj_print_skb_list(h->next, "Recycle Spare", cpu); | |
+ } | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+#endif | |
+ | |
+ preempt_disable(); | |
+ local_irq_save(flags); | |
+ for_each_possible_cpu(cpu) { | |
+ struct sk_buff_head *h; | |
+ | |
+ h = &per_cpu(recycle_list, cpu); | |
+ skbuff_debugobj_print_skb_list(h->next, "Recycle List", cpu); | |
+ } | |
+ local_irq_restore(flags); | |
+ preempt_enable(); | |
+} | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/core/skbuff_recycle.h | |
@@ -0,0 +1,170 @@ | |
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. | |
+ * | |
+ * Permission to use, copy, modify, and/or distribute this software for any | |
+ * purpose with or without fee is hereby granted, provided that the above | |
+ * copyright notice and this permission notice appear in all copies. | |
+ * | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ * | |
+ */ | |
+/* Definitions for the skb recycler functions */ | |
+#ifndef _LINUX_SKBUFF_RECYCLE_H | |
+#define _LINUX_SKBUFF_RECYCLE_H | |
+ | |
+#include <linux/module.h> | |
+#include <linux/types.h> | |
+#include <linux/cpu.h> | |
+#include <linux/module.h> | |
+#include <linux/types.h> | |
+#include <linux/kernel.h> | |
+#include <linux/kmemcheck.h> | |
+#include <linux/mm.h> | |
+#include <linux/interrupt.h> | |
+#include <linux/in.h> | |
+#include <linux/inet.h> | |
+#include <linux/slab.h> | |
+#include <linux/netdevice.h> | |
+#ifdef CONFIG_NET_CLS_ACT | |
+#include <net/pkt_sched.h> | |
+#endif | |
+#include <linux/string.h> | |
+#include <linux/skbuff.h> | |
+#include <linux/splice.h> | |
+#include <linux/init.h> | |
+#include <linux/prefetch.h> | |
+#include <linux/if.h> | |
+ | |
+#define SKB_RECYCLE_SIZE 2304 | |
+#define SKB_RECYCLE_MIN_SIZE SKB_RECYCLE_SIZE | |
+#define SKB_RECYCLE_MAX_SIZE (3904 - NET_SKB_PAD) | |
+#define SKB_RECYCLE_MAX_SKBS 1024 | |
+ | |
+#define SKB_RECYCLE_SPARE_MAX_SKBS 256 | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_PREALLOC | |
+#define SKB_RECYCLE_MAX_PREALLOC_SKBS CONFIG_SKB_RECYCLE_MAX_PREALLOC_SKBS | |
+#define SKB_RECYCLE_MAX_SHARED_POOLS \ | |
+ DIV_ROUND_UP(SKB_RECYCLE_MAX_PREALLOC_SKBS, \ | |
+ SKB_RECYCLE_SPARE_MAX_SKBS) | |
+#else | |
+#define SKB_RECYCLE_MAX_SHARED_POOLS 8 | |
+#endif | |
+ | |
+#define SKB_RECYCLE_MAX_SHARED_POOLS_MASK \ | |
+ (SKB_RECYCLE_MAX_SHARED_POOLS - 1) | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU | |
+struct global_recycler { | |
+ /* Global circular list which holds the shared skb pools */ | |
+ struct sk_buff_head pool[SKB_RECYCLE_MAX_SHARED_POOLS]; | |
+ u8 head; /* head of the circular list */ | |
+ u8 tail; /* tail of the circular list */ | |
+ spinlock_t lock; | |
+}; | |
+#endif | |
+ | |
+static __always_inline void zero_struct(void *v, int size) | |
+{ | |
+ u32 *s = (u32 *)v; | |
+ | |
+ /* We assume that size is word aligned; in fact, it's constant */ | |
+ WARN_ON((size & 3) != 0); | |
+ | |
+ /* This looks odd but we "know" size is a constant, and so the | |
+ * compiler can fold away all of the conditionals. The compiler is | |
+ * pretty smart here, and can fold away the loop, too! | |
+ */ | |
+ while (size > 0) { | |
+ if (size >= 4) | |
+ s[0] = 0; | |
+ if (size >= 8) | |
+ s[1] = 0; | |
+ if (size >= 12) | |
+ s[2] = 0; | |
+ if (size >= 16) | |
+ s[3] = 0; | |
+ if (size >= 20) | |
+ s[4] = 0; | |
+ if (size >= 24) | |
+ s[5] = 0; | |
+ if (size >= 28) | |
+ s[6] = 0; | |
+ if (size >= 32) | |
+ s[7] = 0; | |
+ if (size >= 36) | |
+ s[8] = 0; | |
+ if (size >= 40) | |
+ s[9] = 0; | |
+ if (size >= 44) | |
+ s[10] = 0; | |
+ if (size >= 48) | |
+ s[11] = 0; | |
+ if (size >= 52) | |
+ s[12] = 0; | |
+ if (size >= 56) | |
+ s[13] = 0; | |
+ if (size >= 60) | |
+ s[14] = 0; | |
+ if (size >= 64) | |
+ s[15] = 0; | |
+ size -= 64; | |
+ s += 16; | |
+ } | |
+} | |
+ | |
+static inline bool consume_skb_can_recycle(const struct sk_buff *skb, | |
+ int min_skb_size, int max_skb_size) | |
+{ | |
+ if (unlikely(irqs_disabled())) | |
+ return false; | |
+ | |
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)) | |
+ return false; | |
+ | |
+ if (unlikely(skb_is_nonlinear(skb))) | |
+ return false; | |
+ | |
+ if (unlikely(skb_shinfo(skb)->frag_list)) | |
+ return false; | |
+ | |
+ if (unlikely(skb_shinfo(skb)->nr_frags)) | |
+ return false; | |
+ | |
+ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) | |
+ return false; | |
+ | |
+ min_skb_size = SKB_DATA_ALIGN(min_skb_size + NET_SKB_PAD); | |
+ if (unlikely(skb_end_pointer(skb) - skb->head < min_skb_size)) | |
+ return false; | |
+ | |
+ max_skb_size = SKB_DATA_ALIGN(max_skb_size + NET_SKB_PAD); | |
+ if (unlikely(skb_end_pointer(skb) - skb->head > max_skb_size)) | |
+ return false; | |
+ | |
+ if (unlikely(skb_cloned(skb))) | |
+ return false; | |
+ | |
+ if (unlikely(skb_pfmemalloc(skb))) | |
+ return false; | |
+ | |
+ return true; | |
+} | |
+ | |
+#ifdef CONFIG_SKB_RECYCLER | |
+void __init skb_recycler_init(void); | |
+struct sk_buff *skb_recycler_alloc(struct net_device *dev, unsigned int length); | |
+bool skb_recycler_consume(struct sk_buff *skb); | |
+void skb_recycler_print_all_lists(void); | |
+#else | |
+#define skb_recycler_init() {} | |
+#define skb_recycler_alloc(dev, len) NULL | |
+#define skb_recycler_consume(skb) false | |
+#define skb_recycler_print_all_lists() false | |
+#endif | |
+#endif | |
--- a/net/core/sock.c | |
+++ b/net/core/sock.c | |
@@ -1474,9 +1474,11 @@ void sk_destruct(struct sock *sk) | |
static void __sk_free(struct sock *sk) | |
{ | |
+#ifdef CONFIG_SOCK_DIAG | |
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) | |
sock_diag_broadcast_destroy(sk); | |
else | |
+#endif | |
sk_destruct(sk); | |
} | |
@@ -3040,6 +3042,8 @@ static __net_initdata struct pernet_operations proto_net_ops = { | |
static int __init proto_init(void) | |
{ | |
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ return 0; | |
return register_pernet_subsys(&proto_net_ops); | |
} | |
--- a/net/core/timestamping.c | |
+++ b/net/core/timestamping.c | |
@@ -63,6 +63,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) | |
if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv) | |
return false; | |
+ if (!(skb->dev->phydev->advertising & ADVERTISED_PTP)) | |
+ return false; | |
+ | |
if (skb_headroom(skb) < ETH_HLEN) | |
return false; | |
--- a/net/dccp/ipv6.c | |
+++ b/net/dccp/ipv6.c | |
@@ -422,6 +422,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, | |
newsk->sk_backlog_rcv = dccp_v4_do_rcv; | |
newnp->pktoptions = NULL; | |
newnp->opt = NULL; | |
+ newnp->ipv6_mc_list = NULL; | |
+ newnp->ipv6_ac_list = NULL; | |
+ newnp->ipv6_fl_list = NULL; | |
newnp->mcast_oif = inet6_iif(skb); | |
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; | |
@@ -486,6 +489,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, | |
/* Clone RX bits */ | |
newnp->rxopt.all = np->rxopt.all; | |
+ newnp->ipv6_mc_list = NULL; | |
+ newnp->ipv6_ac_list = NULL; | |
+ newnp->ipv6_fl_list = NULL; | |
newnp->pktoptions = NULL; | |
newnp->opt = NULL; | |
newnp->mcast_oif = inet6_iif(skb); | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/dsa/mv88e6063.c | |
@@ -0,0 +1,294 @@ | |
+/* | |
+ * net/dsa/mv88e6063.c - Driver for Marvell 88e6063 switch chips | |
+ * Copyright (c) 2009 Gabor Juhos <juhosg@openwrt.org> | |
+ * | |
+ * This driver was base on: net/dsa/mv88e6060.c | |
+ * net/dsa/mv88e6063.c - Driver for Marvell 88e6060 switch chips | |
+ * Copyright (c) 2008-2009 Marvell Semiconductor | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License as published by | |
+ * the Free Software Foundation; either version 2 of the License, or | |
+ * (at your option) any later version. | |
+ */ | |
+ | |
+#include <linux/list.h> | |
+#include <linux/netdevice.h> | |
+#include <linux/phy.h> | |
+#include "dsa_priv.h" | |
+ | |
+#define REG_BASE 0x10 | |
+#define REG_PHY(p) (REG_BASE + (p)) | |
+#define REG_PORT(p) (REG_BASE + 8 + (p)) | |
+#define REG_GLOBAL (REG_BASE + 0x0f) | |
+#define NUM_PORTS 7 | |
+ | |
+static int reg_read(struct dsa_switch *ds, int addr, int reg) | |
+{ | |
+ return mdiobus_read(ds->master_mii_bus, addr, reg); | |
+} | |
+ | |
+#define REG_READ(addr, reg) \ | |
+ ({ \ | |
+ int __ret; \ | |
+ \ | |
+ __ret = reg_read(ds, addr, reg); \ | |
+ if (__ret < 0) \ | |
+ return __ret; \ | |
+ __ret; \ | |
+ }) | |
+ | |
+ | |
+static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) | |
+{ | |
+ return mdiobus_write(ds->master_mii_bus, addr, reg, val); | |
+} | |
+ | |
+#define REG_WRITE(addr, reg, val) \ | |
+ ({ \ | |
+ int __ret; \ | |
+ \ | |
+ __ret = reg_write(ds, addr, reg, val); \ | |
+ if (__ret < 0) \ | |
+ return __ret; \ | |
+ }) | |
+ | |
+static char *mv88e6063_probe(struct mii_bus *bus, int sw_addr) | |
+{ | |
+ int ret; | |
+ | |
+ ret = mdiobus_read(bus, REG_PORT(0), 0x03); | |
+ if (ret >= 0) { | |
+ ret &= 0xfff0; | |
+ if (ret == 0x1530) | |
+ return "Marvell 88E6063"; | |
+ } | |
+ | |
+ return NULL; | |
+} | |
+ | |
+static int mv88e6063_switch_reset(struct dsa_switch *ds) | |
+{ | |
+ int i; | |
+ int ret; | |
+ | |
+ /* | |
+ * Set all ports to the disabled state. | |
+ */ | |
+ for (i = 0; i < NUM_PORTS; i++) { | |
+ ret = REG_READ(REG_PORT(i), 0x04); | |
+ REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); | |
+ } | |
+ | |
+ /* | |
+ * Wait for transmit queues to drain. | |
+ */ | |
+ msleep(2); | |
+ | |
+ /* | |
+ * Reset the switch. | |
+ */ | |
+ REG_WRITE(REG_GLOBAL, 0x0a, 0xa130); | |
+ | |
+ /* | |
+ * Wait up to one second for reset to complete. | |
+ */ | |
+ for (i = 0; i < 1000; i++) { | |
+ ret = REG_READ(REG_GLOBAL, 0x00); | |
+ if ((ret & 0x8000) == 0x0000) | |
+ break; | |
+ | |
+ msleep(1); | |
+ } | |
+ if (i == 1000) | |
+ return -ETIMEDOUT; | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int mv88e6063_setup_global(struct dsa_switch *ds) | |
+{ | |
+ /* | |
+ * Disable discarding of frames with excessive collisions, | |
+ * set the maximum frame size to 1536 bytes, and mask all | |
+ * interrupt sources. | |
+ */ | |
+ REG_WRITE(REG_GLOBAL, 0x04, 0x0800); | |
+ | |
+ /* | |
+ * Enable automatic address learning, set the address | |
+ * database size to 1024 entries, and set the default aging | |
+ * time to 5 minutes. | |
+ */ | |
+ REG_WRITE(REG_GLOBAL, 0x0a, 0x2130); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int mv88e6063_setup_port(struct dsa_switch *ds, int p) | |
+{ | |
+ int addr = REG_PORT(p); | |
+ | |
+ /* | |
+ * Do not force flow control, disable Ingress and Egress | |
+ * Header tagging, disable VLAN tunneling, and set the port | |
+ * state to Forwarding. Additionally, if this is the CPU | |
+ * port, enable Ingress and Egress Trailer tagging mode. | |
+ */ | |
+ REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003); | |
+ | |
+ /* | |
+ * Port based VLAN map: give each port its own address | |
+ * database, allow the CPU port to talk to each of the 'real' | |
+ * ports, and allow each of the 'real' ports to only talk to | |
+ * the CPU port. | |
+ */ | |
+ REG_WRITE(addr, 0x06, | |
+ ((p & 0xf) << 12) | | |
+ (dsa_is_cpu_port(ds, p) ? | |
+ ds->phys_port_mask : | |
+ (1 << ds->dst->cpu_port))); | |
+ | |
+ /* | |
+ * Port Association Vector: when learning source addresses | |
+ * of packets, add the address to the address database using | |
+ * a port bitmap that has only the bit for this port set and | |
+ * the other bits clear. | |
+ */ | |
+ REG_WRITE(addr, 0x0b, 1 << p); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int mv88e6063_setup(struct dsa_switch *ds) | |
+{ | |
+ int i; | |
+ int ret; | |
+ | |
+ ret = mv88e6063_switch_reset(ds); | |
+ if (ret < 0) | |
+ return ret; | |
+ | |
+ /* @@@ initialise atu */ | |
+ | |
+ ret = mv88e6063_setup_global(ds); | |
+ if (ret < 0) | |
+ return ret; | |
+ | |
+ for (i = 0; i < NUM_PORTS; i++) { | |
+ ret = mv88e6063_setup_port(ds, i); | |
+ if (ret < 0) | |
+ return ret; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int mv88e6063_set_addr(struct dsa_switch *ds, u8 *addr) | |
+{ | |
+ REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); | |
+ REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); | |
+ REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int mv88e6063_port_to_phy_addr(int port) | |
+{ | |
+ if (port >= 0 && port <= NUM_PORTS) | |
+ return REG_PHY(port); | |
+ return -1; | |
+} | |
+ | |
+static int mv88e6063_phy_read(struct dsa_switch *ds, int port, int regnum) | |
+{ | |
+ int addr; | |
+ | |
+ addr = mv88e6063_port_to_phy_addr(port); | |
+ if (addr == -1) | |
+ return 0xffff; | |
+ | |
+ return reg_read(ds, addr, regnum); | |
+} | |
+ | |
+static int | |
+mv88e6063_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) | |
+{ | |
+ int addr; | |
+ | |
+ addr = mv88e6063_port_to_phy_addr(port); | |
+ if (addr == -1) | |
+ return 0xffff; | |
+ | |
+ return reg_write(ds, addr, regnum, val); | |
+} | |
+ | |
+static void mv88e6063_poll_link(struct dsa_switch *ds) | |
+{ | |
+ int i; | |
+ | |
+ for (i = 0; i < DSA_MAX_PORTS; i++) { | |
+ struct net_device *dev; | |
+ int uninitialized_var(port_status); | |
+ int link; | |
+ int speed; | |
+ int duplex; | |
+ int fc; | |
+ | |
+ dev = ds->ports[i]; | |
+ if (dev == NULL) | |
+ continue; | |
+ | |
+ link = 0; | |
+ if (dev->flags & IFF_UP) { | |
+ port_status = reg_read(ds, REG_PORT(i), 0x00); | |
+ if (port_status < 0) | |
+ continue; | |
+ | |
+ link = !!(port_status & 0x1000); | |
+ } | |
+ | |
+ if (!link) { | |
+ if (netif_carrier_ok(dev)) { | |
+ printk(KERN_INFO "%s: link down\n", dev->name); | |
+ netif_carrier_off(dev); | |
+ } | |
+ continue; | |
+ } | |
+ | |
+ speed = (port_status & 0x0100) ? 100 : 10; | |
+ duplex = (port_status & 0x0200) ? 1 : 0; | |
+ fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0; | |
+ | |
+ if (!netif_carrier_ok(dev)) { | |
+ printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " | |
+ "flow control %sabled\n", dev->name, | |
+ speed, duplex ? "full" : "half", | |
+ fc ? "en" : "dis"); | |
+ netif_carrier_on(dev); | |
+ } | |
+ } | |
+} | |
+ | |
+static struct dsa_switch_driver mv88e6063_switch_driver = { | |
+ .tag_protocol = htons(ETH_P_TRAILER), | |
+ .probe = mv88e6063_probe, | |
+ .setup = mv88e6063_setup, | |
+ .set_addr = mv88e6063_set_addr, | |
+ .phy_read = mv88e6063_phy_read, | |
+ .phy_write = mv88e6063_phy_write, | |
+ .poll_link = mv88e6063_poll_link, | |
+}; | |
+ | |
+static int __init mv88e6063_init(void) | |
+{ | |
+ register_switch_driver(&mv88e6063_switch_driver); | |
+ return 0; | |
+} | |
+module_init(mv88e6063_init); | |
+ | |
+static void __exit mv88e6063_cleanup(void) | |
+{ | |
+ unregister_switch_driver(&mv88e6063_switch_driver); | |
+} | |
+module_exit(mv88e6063_cleanup); | |
--- a/net/ethernet/eth.c | |
+++ b/net/ethernet/eth.c | |
@@ -140,6 +140,18 @@ u32 eth_get_headlen(void *data, unsigned int len) | |
} | |
EXPORT_SYMBOL(eth_get_headlen); | |
+static inline bool | |
+eth_check_local_mask(const void *addr1, const void *addr2, const void *mask) | |
+{ | |
+ const u16 *a1 = addr1; | |
+ const u16 *a2 = addr2; | |
+ const u16 *m = mask; | |
+ | |
+ return (((a1[0] ^ a2[0]) & ~m[0]) | | |
+ ((a1[1] ^ a2[1]) & ~m[1]) | | |
+ ((a1[2] ^ a2[2]) & ~m[2])); | |
+} | |
+ | |
/** | |
* eth_type_trans - determine the packet's protocol ID. | |
* @skb: received socket data | |
@@ -156,6 +168,12 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) | |
const struct ethhdr *eth; | |
skb->dev = dev; | |
+ | |
+#ifdef CONFIG_ETHERNET_PACKET_MANGLE | |
+ if (dev->eth_mangle_rx) | |
+ dev->eth_mangle_rx(dev, skb); | |
+#endif | |
+ | |
skb_reset_mac_header(skb); | |
eth = (struct ethhdr *)skb->data; | |
@@ -168,8 +186,12 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) | |
skb->pkt_type = PACKET_MULTICAST; | |
} | |
else if (unlikely(!ether_addr_equal_64bits(eth->h_dest, | |
- dev->dev_addr))) | |
+ dev->dev_addr))) { | |
skb->pkt_type = PACKET_OTHERHOST; | |
+ if (eth_check_local_mask(eth->h_dest, dev->dev_addr, | |
+ dev->local_addr_mask)) | |
+ skb->gro_skip = 1; | |
+ } | |
/* | |
* Some variants of DSA tagging don't have an ethertype field | |
--- a/net/ipv4/Kconfig | |
+++ b/net/ipv4/Kconfig | |
@@ -414,6 +414,7 @@ config INET_LRO | |
config INET_DIAG | |
tristate "INET: socket monitoring interface" | |
+ select SOCK_DIAG | |
default y | |
---help--- | |
Support for INET (TCP, DCCP, etc) socket monitoring interface used by | |
--- a/net/ipv4/af_inet.c | |
+++ b/net/ipv4/af_inet.c | |
@@ -1321,8 +1321,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |
if (unlikely(ip_fast_csum((u8 *)iph, 5))) | |
goto out_unlock; | |
- id = ntohl(*(__be32 *)&iph->id); | |
- flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); | |
+ id = ntohl(net_hdr_word(&iph->id)); | |
+ flush = (u16)((ntohl(net_hdr_word(iph)) ^ skb_gro_len(skb)) | (id & ~IP_DF)); | |
id >>= 16; | |
for (p = *head; p; p = p->next) { | |
--- a/net/ipv4/esp4.c | |
+++ b/net/ipv4/esp4.c | |
@@ -150,6 +150,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |
int assoclen; | |
int extralen; | |
__be64 seqno; | |
+ bool nosupp_sg; | |
/* skb is pure payload to encrypt */ | |
@@ -157,6 +158,12 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) | |
alen = crypto_aead_authsize(aead); | |
ivlen = crypto_aead_ivsize(aead); | |
+ nosupp_sg = crypto_tfm_alg_flags(&aead->base) & CRYPTO_ALG_NOSUPP_SG; | |
+ if (nosupp_sg && skb_linearize(skb)) { | |
+ err = -ENOMEM; | |
+ goto error; | |
+ } | |
+ | |
tfclen = 0; | |
if (x->tfcpad) { | |
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); | |
@@ -430,6 +437,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |
u8 *iv; | |
struct scatterlist *sg; | |
int err = -EINVAL; | |
+ bool nosupp_sg; | |
if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) | |
goto out; | |
@@ -437,6 +445,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) | |
if (elen <= 0) | |
goto out; | |
+ nosupp_sg = crypto_tfm_alg_flags(&aead->base) & CRYPTO_ALG_NOSUPP_SG; | |
+ if (nosupp_sg && skb_linearize(skb)) { | |
+ err = -ENOMEM; | |
+ goto out; | |
+ } | |
+ | |
err = skb_cow_data(skb, 0, &trailer); | |
if (err < 0) | |
goto out; | |
--- a/net/ipv4/fib_semantics.c | |
+++ b/net/ipv4/fib_semantics.c | |
@@ -138,6 +138,10 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { | |
.error = -EINVAL, | |
.scope = RT_SCOPE_NOWHERE, | |
}, | |
+ [RTN_POLICY_FAILED] = { | |
+ .error = -EACCES, | |
+ .scope = RT_SCOPE_UNIVERSE, | |
+ }, | |
}; | |
static void rt_fibinfo_free(struct rtable __rcu **rtp) | |
--- a/net/ipv4/fib_trie.c | |
+++ b/net/ipv4/fib_trie.c | |
@@ -1077,6 +1077,9 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, | |
return 0; | |
} | |
+/* Define route change notification chain. */ | |
+static BLOCKING_NOTIFIER_HEAD(iproute_chain); | |
+ | |
/* Caller must hold RTNL. */ | |
int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |
{ | |
@@ -1246,6 +1249,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, | |
&cfg->fc_nlinfo, nlflags); | |
succeeded: | |
+ blocking_notifier_call_chain(&iproute_chain, | |
+ RTM_NEWROUTE, fi); | |
return 0; | |
out_sw_fib_del: | |
@@ -1554,6 +1559,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |
if (fa_to_delete->fa_state & FA_S_ACCESSED) | |
rt_cache_flush(cfg->fc_nlinfo.nl_net); | |
+ blocking_notifier_call_chain(&iproute_chain, | |
+ RTM_DELROUTE, fa_to_delete->fa_info); | |
fib_release_info(fa_to_delete->fa_info); | |
alias_free_mem_rcu(fa_to_delete); | |
return 0; | |
@@ -1982,6 +1989,18 @@ void __init fib_trie_init(void) | |
0, SLAB_PANIC, NULL); | |
} | |
+int ip_rt_register_notifier(struct notifier_block *nb) | |
+{ | |
+ return blocking_notifier_chain_register(&iproute_chain, nb); | |
+} | |
+EXPORT_SYMBOL(ip_rt_register_notifier); | |
+ | |
+int ip_rt_unregister_notifier(struct notifier_block *nb) | |
+{ | |
+ return blocking_notifier_chain_unregister(&iproute_chain, nb); | |
+} | |
+EXPORT_SYMBOL(ip_rt_unregister_notifier); | |
+ | |
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) | |
{ | |
struct fib_table *tb; | |
@@ -2368,6 +2387,7 @@ static const char *const rtn_type_names[__RTN_MAX] = { | |
[RTN_THROW] = "THROW", | |
[RTN_NAT] = "NAT", | |
[RTN_XRESOLVE] = "XRESOLVE", | |
+ [RTN_POLICY_FAILED] = "POLICY_FAILED", | |
}; | |
static inline const char *rtn_type(char *buf, size_t len, unsigned int t) | |
@@ -2638,10 +2658,12 @@ static const struct file_operations fib_route_fops = { | |
int __net_init fib_proc_init(struct net *net) | |
{ | |
- if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && | |
+ !proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops)) | |
goto out1; | |
- if (!proc_create("fib_triestat", S_IRUGO, net->proc_net, | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED) && | |
+ !proc_create("fib_triestat", S_IRUGO, net->proc_net, | |
&fib_triestat_fops)) | |
goto out2; | |
@@ -2651,17 +2673,21 @@ int __net_init fib_proc_init(struct net *net) | |
return 0; | |
out3: | |
- remove_proc_entry("fib_triestat", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ remove_proc_entry("fib_triestat", net->proc_net); | |
out2: | |
- remove_proc_entry("fib_trie", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ remove_proc_entry("fib_trie", net->proc_net); | |
out1: | |
return -ENOMEM; | |
} | |
void __net_exit fib_proc_exit(struct net *net) | |
{ | |
- remove_proc_entry("fib_trie", net->proc_net); | |
- remove_proc_entry("fib_triestat", net->proc_net); | |
+ if (!IS_ENABLED(CONFIG_PROC_STRIPPED)) { | |
+ remove_proc_entry("fib_trie", net->proc_net); | |
+ remove_proc_entry("fib_triestat", net->proc_net); | |
+ } | |
remove_proc_entry("route", net->proc_net); | |
} | |
--- a/net/ipv4/igmp.c | |
+++ b/net/ipv4/igmp.c | |
@@ -505,7 +505,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, | |
if (!skb) | |
return NULL; | |
psrc = (__be32 *)skb_put(skb, sizeof(__be32)); | |
- *psrc = psf->sf_inaddr; | |
+ net_hdr_word(psrc) = psf->sf_inaddr; | |
scount++; stotal++; | |
if ((type == IGMPV3_ALLOW_NEW_SOURCES || | |
type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) { | |
--- a/net/ipv4/inet_connection_sock.c | |
+++ b/net/ipv4/inet_connection_sock.c | |
@@ -669,6 +669,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, | |
inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); | |
newsk->sk_write_space = sk_stream_write_space; | |
+ inet_sk(newsk)->mc_list = NULL; | |
+ | |
newsk->sk_mark = inet_rsk(req)->ir_mark; | |
atomic64_set(&newsk->sk_cookie, | |
atomic64_read(&inet_rsk(req)->ir_cookie)); | |
--- a/net/ipv4/ip_forward.c | |
+++ b/net/ipv4/ip_forward.c | |
@@ -39,6 +39,9 @@ | |
#include <net/route.h> | |
#include <net/xfrm.h> | |
+int sysctl_ip_use_legacy_tos __read_mostly = 1; | |
+EXPORT_SYMBOL(sysctl_ip_use_legacy_tos); | |
+ | |
static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) | |
{ | |
if (skb->len <= mtu) | |
@@ -143,7 +146,11 @@ int ip_forward(struct sk_buff *skb) | |
!skb_sec_path(skb)) | |
ip_rt_send_redirect(skb); | |
- skb->priority = rt_tos2priority(iph->tos); | |
+ /* | |
+ * Set skb priority using legacy ToS method if required. | |
+ */ | |
+ if (sysctl_ip_use_legacy_tos != 0) | |
+ skb->priority = rt_tos2priority(iph->tos); | |
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, | |
net, NULL, skb, skb->dev, rt->dst.dev, | |
--- a/net/ipv4/ip_gre.c | |
+++ b/net/ipv4/ip_gre.c | |
@@ -633,6 +633,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, | |
if (IS_ERR(skb)) | |
goto out; | |
+ skb->skb_iif = dev->ifindex; | |
+ | |
__gre_xmit(skb, dev, tnl_params, skb->protocol); | |
return NETDEV_TX_OK; | |
@@ -660,6 +662,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, | |
if (skb_cow_head(skb, dev->needed_headroom)) | |
goto free_skb; | |
+ skb->skb_iif = dev->ifindex; | |
+ | |
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); | |
return NETDEV_TX_OK; | |
@@ -703,7 +707,6 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, | |
It allows to construct virtual multiprotocol broadcast "LAN" | |
over the Internet, provided multicast routing is tuned. | |
- | |
I have no idea was this bicycle invented before me, | |
so that I had to set ARPHRD_IPGRE to a random value. | |
I have an impression, that Cisco could make something similar, | |
@@ -1062,7 +1065,7 @@ static void ipgre_tap_setup(struct net_device *dev) | |
{ | |
ether_setup(dev); | |
dev->netdev_ops = &gre_tap_netdev_ops; | |
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; | |
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_GRE_V4_TAP; | |
ip_tunnel_setup(dev, gre_tap_net_id); | |
} | |
--- a/net/ipv4/ip_output.c | |
+++ b/net/ipv4/ip_output.c | |
@@ -922,10 +922,12 @@ static int __ip_append_data(struct sock *sk, | |
csummode = CHECKSUM_PARTIAL; | |
cork->length += length; | |
- if (((length > mtu) || (skb && skb_is_gso(skb))) && | |
+ if ((skb && skb_is_gso(skb)) || | |
+ (((length + (skb ? skb->len : fragheaderlen)) > mtu) && | |
+ (skb_queue_len(queue) <= 1) && | |
(sk->sk_protocol == IPPROTO_UDP) && | |
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && | |
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { | |
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && | |
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { | |
err = ip_ufo_append_data(sk, queue, getfrag, from, length, | |
hh_len, fragheaderlen, transhdrlen, | |
maxfraglen, flags); | |
@@ -1241,6 +1243,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, | |
return -EINVAL; | |
if ((size + skb->len > mtu) && | |
+ (skb_queue_len(&sk->sk_write_queue) == 1) && | |
(sk->sk_protocol == IPPROTO_UDP) && | |
(rt->dst.dev->features & NETIF_F_UFO)) { | |
if (skb->ip_summed != CHECKSUM_PARTIAL) | |
--- a/net/ipv4/ip_tunnel_core.c | |
+++ b/net/ipv4/ip_tunnel_core.c | |
@@ -47,6 +47,7 @@ | |
#include <net/netns/generic.h> | |
#include <net/rtnetlink.h> | |
#include <net/dst_metadata.h> | |
+#include <net/vxlan.h> | |
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | |
__be32 src, __be32 dst, __u8 proto, | |
@@ -55,7 +56,12 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | |
int pkt_len = skb->len - skb_inner_network_offset(skb); | |
struct net *net = dev_net(rt->dst.dev); | |
struct iphdr *iph; | |
+ struct net_device *in_dev = NULL; | |
int err; | |
+ int skb_iif; | |
+ | |
+ /* Save input interface index */ | |
+ skb_iif = skb->skb_iif; | |
skb_scrub_packet(skb, xnet); | |
@@ -79,7 +85,16 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, | |
iph->ttl = ttl; | |
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); | |
+ /* Get input interface */ | |
+ if (skb_iif) | |
+ in_dev = __dev_get_by_index(&init_net, skb_iif); | |
+ | |
+ if (proto == IPPROTO_IPV6 || proto == IPPROTO_GRE || | |
+ is_vxlan_dev(in_dev)) | |
+ skb->skb_iif = skb_iif; | |
+ | |
err = ip_local_out(net, sk, skb); | |
+ | |
if (unlikely(net_xmit_eval(err))) | |
pkt_len = 0; | |
return pkt_len; | |
--- a/net/ipv4/ipmr.c | |
+++ b/net/ipv4/ipmr.c | |
@@ -136,6 +136,9 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, | |
int cmd); | |
static void mroute_clean_tables(struct mr_table *mrt, bool all); | |
static void ipmr_expire_process(unsigned long arg); | |
+static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, __be32 origin, | |
+ __be32 mcastgrp); | |
+static ipmr_mfc_event_offload_callback_t __rcu ipmr_mfc_event_offload_callback; | |
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | |
#define ipmr_for_each_table(mrt, net) \ | |
@@ -182,6 +185,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, | |
case FR_ACT_UNREACHABLE: | |
return -ENETUNREACH; | |
case FR_ACT_PROHIBIT: | |
+ case FR_ACT_POLICY_FAILED: | |
return -EACCES; | |
case FR_ACT_BLACKHOLE: | |
default: | |
@@ -225,6 +229,78 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, | |
return 0; | |
} | |
+/* ipmr_sync_entry_update() | |
+ * Call the registered offload callback to report an update to a multicast | |
+ * route entry. The callback receives the list of destination interfaces and | |
+ * the interface count | |
+ */ | |
+static void ipmr_sync_entry_update(struct mr_table *mrt, | |
+ struct mfc_cache *cache) | |
+{ | |
+ int vifi, dest_if_count = 0; | |
+ u32 dest_dev[MAXVIFS]; | |
+ __be32 origin; | |
+ __be32 group; | |
+ ipmr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ memset(dest_dev, 0, sizeof(dest_dev)); | |
+ | |
+ origin = cache->mfc_origin; | |
+ group = cache->mfc_mcastgrp; | |
+ | |
+ read_lock(&mrt_lock); | |
+ for (vifi = 0; vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if (!((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255))) { | |
+ continue; | |
+ } | |
+ if (dest_if_count == MAXVIFS) { | |
+ read_unlock(&mrt_lock); | |
+ return; | |
+ } | |
+ | |
+ if (!VIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ return; | |
+ } | |
+ dest_dev[dest_if_count] = mrt->vif_table[vifi].dev->ifindex; | |
+ dest_if_count++; | |
+ } | |
+ read_unlock(&mrt_lock); | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ipmr_mfc_event_offload_callback); | |
+ | |
+ if (!offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return; | |
+ } | |
+ | |
+ offload_update_cb_f(group, origin, dest_if_count, dest_dev, | |
+ IPMR_MFC_EVENT_UPDATE); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* ipmr_sync_entry_delete() | |
+ * Call the registered offload callback to inform of a multicast route entry | |
+ * delete event | |
+ */ | |
+static void ipmr_sync_entry_delete(u32 origin, u32 group) | |
+{ | |
+ ipmr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ipmr_mfc_event_offload_callback); | |
+ | |
+ if (!offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return; | |
+ } | |
+ | |
+ offload_update_cb_f(group, origin, 0, NULL, IPMR_MFC_EVENT_DELETE); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { | |
.family = RTNL_FAMILY_IPMR, | |
.rule_size = sizeof(struct ipmr_rule), | |
@@ -239,6 +315,150 @@ static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { | |
.owner = THIS_MODULE, | |
}; | |
+/* ipmr_register_mfc_event_offload_callback() | |
+ * Register the IPv4 Multicast update offload callback with IPMR | |
+ */ | |
+bool ipmr_register_mfc_event_offload_callback( | |
+ ipmr_mfc_event_offload_callback_t mfc_offload_cb) | |
+{ | |
+ ipmr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ipmr_mfc_event_offload_callback); | |
+ | |
+ if (offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return false; | |
+ } | |
+ | |
+ rcu_assign_pointer(ipmr_mfc_event_offload_callback, mfc_offload_cb); | |
+ rcu_read_unlock(); | |
+ return true; | |
+} | |
+EXPORT_SYMBOL(ipmr_register_mfc_event_offload_callback); | |
+ | |
+/* ipmr_unregister_mfc_event_offload_callback() | |
+ * De-register the IPv4 Multicast update offload callback with IPMR | |
+ */ | |
+void ipmr_unregister_mfc_event_offload_callback(void) | |
+{ | |
+ rcu_read_lock(); | |
+ rcu_assign_pointer(ipmr_mfc_event_offload_callback, NULL); | |
+ rcu_read_unlock(); | |
+} | |
+EXPORT_SYMBOL(ipmr_unregister_mfc_event_offload_callback); | |
+ | |
+/* ipmr_find_mfc_entry() | |
+ * Returns destination interface list for a particular multicast flow, and | |
+ * the number of interfaces in the list | |
+ */ | |
+int ipmr_find_mfc_entry(struct net *net, __be32 origin, __be32 group, | |
+ u32 max_dest_cnt, u32 dest_dev[]) | |
+{ | |
+ int vifi, dest_if_count = 0; | |
+ struct mr_table *mrt; | |
+ struct mfc_cache *cache; | |
+ | |
+ mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | |
+ if (!mrt) | |
+ return -ENOENT; | |
+ | |
+ rcu_read_lock(); | |
+ cache = ipmr_cache_find(mrt, origin, group); | |
+ if (!cache) { | |
+ rcu_read_unlock(); | |
+ return -ENOENT; | |
+ } | |
+ | |
+ read_lock(&mrt_lock); | |
+ for (vifi = 0; vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if (!((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255))) { | |
+ continue; | |
+ } | |
+ | |
+ /* We have another valid destination interface entry. Check if | |
+ * the number of the destination interfaces for the route is | |
+ * exceeding the size of the array given to us | |
+ */ | |
+ if (dest_if_count == max_dest_cnt) { | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (!VIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ dest_dev[dest_if_count] = mrt->vif_table[vifi].dev->ifindex; | |
+ dest_if_count++; | |
+ } | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ | |
+ return dest_if_count; | |
+} | |
+EXPORT_SYMBOL(ipmr_find_mfc_entry); | |
+ | |
+/* ipmr_mfc_stats_update() | |
+ * Update the MFC/VIF statistics for offloaded flows | |
+ */ | |
+int ipmr_mfc_stats_update(struct net *net, __be32 origin, __be32 group, | |
+ u64 pkts_in, u64 bytes_in, | |
+ u64 pkts_out, u64 bytes_out) | |
+{ | |
+ int vif, vifi; | |
+ struct mr_table *mrt; | |
+ struct mfc_cache *cache; | |
+ | |
+ mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); | |
+ if (!mrt) | |
+ return -ENOENT; | |
+ | |
+ rcu_read_lock(); | |
+ cache = ipmr_cache_find(mrt, origin, group); | |
+ if (!cache) { | |
+ rcu_read_unlock(); | |
+ return -ENOENT; | |
+ } | |
+ | |
+ vif = cache->mfc_parent; | |
+ | |
+ read_lock(&mrt_lock); | |
+ if (!VIF_EXISTS(mrt, vif)) { | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ mrt->vif_table[vif].pkt_in += pkts_in; | |
+ mrt->vif_table[vif].bytes_in += bytes_in; | |
+ cache->mfc_un.res.pkt += pkts_out; | |
+ cache->mfc_un.res.bytes += bytes_out; | |
+ | |
+ for (vifi = cache->mfc_un.res.minvif; | |
+ vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if ((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255)) { | |
+ if (!VIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ return -EINVAL; | |
+ } | |
+ mrt->vif_table[vifi].pkt_out += pkts_out; | |
+ mrt->vif_table[vifi].bytes_out += bytes_out; | |
+ } | |
+ } | |
+ read_unlock(&mrt_lock); | |
+ rcu_read_unlock(); | |
+ | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(ipmr_mfc_stats_update); | |
+ | |
static int __net_init ipmr_rules_init(struct net *net) | |
{ | |
struct fib_rules_ops *ops; | |
@@ -1106,6 +1326,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) | |
{ | |
int line; | |
struct mfc_cache *c, *next; | |
+ u32 origin, group; | |
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | |
@@ -1113,9 +1334,14 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) | |
if (c->mfc_origin == mfc->mfcc_origin.s_addr && | |
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && | |
(parent == -1 || parent == c->mfc_parent)) { | |
+ origin = c->mfc_origin; | |
+ group = c->mfc_mcastgrp; | |
list_del_rcu(&c->list); | |
mroute_netlink_event(mrt, c, RTM_DELROUTE); | |
ipmr_cache_free(c); | |
+ | |
+ /* Inform offload modules of the delete event */ | |
+ ipmr_sync_entry_delete(origin, group); | |
return 0; | |
} | |
} | |
@@ -1151,6 +1377,9 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, | |
c->mfc_flags |= MFC_STATIC; | |
write_unlock_bh(&mrt_lock); | |
mroute_netlink_event(mrt, c, RTM_NEWROUTE); | |
+ | |
+ /* Inform offload modules of the update event */ | |
+ ipmr_sync_entry_update(mrt, c); | |
return 0; | |
} | |
@@ -1207,6 +1436,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) | |
int i; | |
LIST_HEAD(list); | |
struct mfc_cache *c, *next; | |
+ u32 origin, group; | |
/* Shut down all active vif entries */ | |
@@ -1223,9 +1453,14 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) | |
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { | |
if (!all && (c->mfc_flags & MFC_STATIC)) | |
continue; | |
+ origin = c->mfc_origin; | |
+ group = c->mfc_mcastgrp; | |
list_del_rcu(&c->list); | |
mroute_netlink_event(mrt, c, RTM_DELROUTE); | |
ipmr_cache_free(c); | |
+ | |
+ /* Inform offload modules of the delete event */ | |
+ ipmr_sync_entry_delete(origin, group); | |
} | |
} | |
@@ -2496,7 +2731,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | |
const char *name = vif->dev ? vif->dev->name : "none"; | |
seq_printf(seq, | |
- "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", | |
+ "%2Zd %-10s %8llu %7llu %8llu %7llu %05X %08X %08X\n", | |
vif - mrt->vif_table, | |
name, vif->bytes_in, vif->pkt_in, | |
vif->bytes_out, vif->pkt_out, | |
--- a/net/ipv4/netfilter/arp_tables.c | |
+++ b/net/ipv4/netfilter/arp_tables.c | |
@@ -329,6 +329,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |
} | |
if (table_base + v | |
!= arpt_next_entry(e)) { | |
+ if (unlikely(stackidx >= private->stacksize)) { | |
+ verdict = NF_DROP; | |
+ break; | |
+ } | |
jumpstack[stackidx++] = e; | |
} | |
--- a/net/ipv4/netfilter/ip_tables.c | |
+++ b/net/ipv4/netfilter/ip_tables.c | |
@@ -82,9 +82,14 @@ ip_packet_match(const struct iphdr *ip, | |
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) | |
- if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, | |
+ if (ipinfo->flags & IPT_F_NO_DEF_MATCH) | |
+ return true; | |
+ | |
+ if (FWINV(ipinfo->smsk.s_addr && | |
+ (ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, | |
IPT_INV_SRCIP) || | |
- FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, | |
+ FWINV(ipinfo->dmsk.s_addr && | |
+ (ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, | |
IPT_INV_DSTIP)) { | |
dprintf("Source or dest mismatch.\n"); | |
@@ -135,6 +140,29 @@ ip_packet_match(const struct iphdr *ip, | |
return true; | |
} | |
+static void | |
+ip_checkdefault(struct ipt_ip *ip) | |
+{ | |
+ static const char iface_mask[IFNAMSIZ] = {}; | |
+ | |
+ if (ip->invflags || ip->flags & IPT_F_FRAG) | |
+ return; | |
+ | |
+ if (memcmp(ip->iniface_mask, iface_mask, IFNAMSIZ) != 0) | |
+ return; | |
+ | |
+ if (memcmp(ip->outiface_mask, iface_mask, IFNAMSIZ) != 0) | |
+ return; | |
+ | |
+ if (ip->smsk.s_addr || ip->dmsk.s_addr) | |
+ return; | |
+ | |
+ if (ip->proto) | |
+ return; | |
+ | |
+ ip->flags |= IPT_F_NO_DEF_MATCH; | |
+} | |
+ | |
static bool | |
ip_checkentry(const struct ipt_ip *ip) | |
{ | |
@@ -282,6 +310,33 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) | |
return (void *)entry + entry->next_offset; | |
} | |
+static bool | |
+ipt_handle_default_rule(struct ipt_entry *e, unsigned int *verdict) | |
+{ | |
+ struct xt_entry_target *t; | |
+ struct xt_standard_target *st; | |
+ | |
+ if (e->target_offset != sizeof(struct ipt_entry)) | |
+ return false; | |
+ | |
+ if (!(e->ip.flags & IPT_F_NO_DEF_MATCH)) | |
+ return false; | |
+ | |
+ t = ipt_get_target(e); | |
+ if (t->u.kernel.target->target) | |
+ return false; | |
+ | |
+ st = (struct xt_standard_target *) t; | |
+ if (st->verdict == XT_RETURN) | |
+ return false; | |
+ | |
+ if (st->verdict >= 0) | |
+ return false; | |
+ | |
+ *verdict = (unsigned)(-st->verdict) - 1; | |
+ return true; | |
+} | |
+ | |
/* Returns one of the generic firewall policies, like NF_ACCEPT. */ | |
unsigned int | |
ipt_do_table(struct sk_buff *skb, | |
@@ -302,28 +357,8 @@ ipt_do_table(struct sk_buff *skb, | |
unsigned int addend; | |
/* Initialization */ | |
- stackidx = 0; | |
- ip = ip_hdr(skb); | |
- indev = state->in ? state->in->name : nulldevname; | |
- outdev = state->out ? state->out->name : nulldevname; | |
- /* We handle fragments by dealing with the first fragment as | |
- * if it was a normal packet. All other fragments are treated | |
- * normally, except that they will NEVER match rules that ask | |
- * things we don't know, ie. tcp syn flag or ports). If the | |
- * rule is also a fragment-specific rule, non-fragments won't | |
- * match it. */ | |
- acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; | |
- acpar.thoff = ip_hdrlen(skb); | |
- acpar.hotdrop = false; | |
- acpar.net = state->net; | |
- acpar.in = state->in; | |
- acpar.out = state->out; | |
- acpar.family = NFPROTO_IPV4; | |
- acpar.hooknum = hook; | |
- | |
IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | |
local_bh_disable(); | |
- addend = xt_write_recseq_begin(); | |
private = table->private; | |
cpu = smp_processor_id(); | |
/* | |
@@ -332,6 +367,23 @@ ipt_do_table(struct sk_buff *skb, | |
*/ | |
smp_read_barrier_depends(); | |
table_base = private->entries; | |
+ | |
+ e = get_entry(table_base, private->hook_entry[hook]); | |
+ if (ipt_handle_default_rule(e, &verdict)) { | |
+ struct xt_counters *counter; | |
+ | |
+ counter = xt_get_this_cpu_counter(&e->counters); | |
+ ADD_COUNTER(*counter, skb->len, 1); | |
+ local_bh_enable(); | |
+ return verdict; | |
+ } | |
+ | |
+ stackidx = 0; | |
+ ip = ip_hdr(skb); | |
+ indev = state->in ? state->in->name : nulldevname; | |
+ outdev = state->out ? state->out->name : nulldevname; | |
+ | |
+ addend = xt_write_recseq_begin(); | |
jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; | |
/* Switch to alternate jumpstack if we're being invoked via TEE. | |
@@ -344,7 +396,20 @@ ipt_do_table(struct sk_buff *skb, | |
if (static_key_false(&xt_tee_enabled)) | |
jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); | |
- e = get_entry(table_base, private->hook_entry[hook]); | |
+ /* We handle fragments by dealing with the first fragment as | |
+ * if it was a normal packet. All other fragments are treated | |
+ * normally, except that they will NEVER match rules that ask | |
+ * things we don't know, ie. tcp syn flag or ports). If the | |
+ * rule is also a fragment-specific rule, non-fragments won't | |
+ * match it. */ | |
+ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; | |
+ acpar.thoff = ip_hdrlen(skb); | |
+ acpar.hotdrop = false; | |
+ acpar.net = state->net; | |
+ acpar.in = state->in; | |
+ acpar.out = state->out; | |
+ acpar.family = NFPROTO_IPV4; | |
+ acpar.hooknum = hook; | |
pr_debug("Entering %s(hook %u), UF %p\n", | |
table->name, hook, | |
@@ -408,9 +473,11 @@ ipt_do_table(struct sk_buff *skb, | |
} | |
if (table_base + v != ipt_next_entry(e) && | |
!(e->ip.flags & IPT_F_GOTO)) { | |
+ if (unlikely(stackidx >= private->stacksize)) { | |
+ verdict = NF_DROP; | |
+ break; | |
+ } | |
jumpstack[stackidx++] = e; | |
- pr_debug("Pushed %p into pos %u\n", | |
- e, stackidx - 1); | |
} | |
e = get_entry(table_base, v); | |
@@ -587,6 +654,28 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) | |
module_put(par.match->me); | |
} | |
+static int | |
+check_entry(struct ipt_entry *e) | |
+{ | |
+ const struct xt_entry_target *t; | |
+ | |
+ if (!ip_checkentry(&e->ip)) | |
+ return -EINVAL; | |
+ | |
+ ip_checkdefault(&e->ip); | |
+ | |
+ if (e->target_offset + sizeof(struct xt_entry_target) > | |
+ e->next_offset) | |
+ return -EINVAL; | |
+ | |
+ t = ipt_get_target_c(e); | |
+ | |
+ if (e->target_offset + t->u.target_size > e->next_offset) | |
+ return -EINVAL; | |
+ | |
+ return 0; | |
+} | |
+ | |
static int | |
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) | |
{ | |
@@ -664,6 +753,10 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, | |
struct xt_mtchk_param mtpar; | |
struct xt_entry_match *ematch; | |
+ ret = check_entry(e); | |
+ if (ret) | |
+ return ret; | |
+ | |
e->counters.pcnt = xt_percpu_counter_alloc(); | |
if (IS_ERR_VALUE(e->counters.pcnt)) | |
return -ENOMEM; | |
@@ -948,6 +1041,7 @@ copy_entries_to_user(unsigned int total_size, | |
const struct xt_table_info *private = table->private; | |
int ret = 0; | |
const void *loc_cpu_entry; | |
+ u8 flags; | |
counters = alloc_counters(table); | |
if (IS_ERR(counters)) | |
@@ -975,6 +1069,14 @@ copy_entries_to_user(unsigned int total_size, | |
goto free_counters; | |
} | |
+ flags = e->ip.flags & IPT_F_MASK; | |
+ if (copy_to_user(userptr + off | |
+ + offsetof(struct ipt_entry, ip.flags), | |
+ &flags, sizeof(flags)) != 0) { | |
+ ret = -EFAULT; | |
+ goto free_counters; | |
+ } | |
+ | |
for (i = sizeof(struct ipt_entry); | |
i < e->target_offset; | |
i += m->u.match_size) { | |
@@ -1468,8 +1570,10 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, | |
return -EINVAL; | |
} | |
- if (!ip_checkentry(&e->ip)) | |
- return -EINVAL; | |
+ /* For purposes of check_entry casting the compat entry is fine */ | |
+ ret = check_entry((struct ipt_entry *)e); | |
+ if (ret) | |
+ return ret; | |
ret = xt_compat_check_entry_offsets(e, e->elems, | |
e->target_offset, e->next_offset); | |
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |
@@ -41,8 +41,8 @@ static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, | |
if (ap == NULL) | |
return false; | |
- tuple->src.u3.ip = ap[0]; | |
- tuple->dst.u3.ip = ap[1]; | |
+ tuple->src.u3.ip = net_hdr_word(ap++); | |
+ tuple->dst.u3.ip = net_hdr_word(ap); | |
return true; | |
} | |
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | |
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | |
@@ -25,6 +25,9 @@ | |
#include <net/netfilter/nf_nat_core.h> | |
#include <net/netfilter/nf_nat_l3proto.h> | |
#include <net/netfilter/nf_nat_l4proto.h> | |
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) | |
+#include <net/netfilter/br_netfilter.h> | |
+#endif | |
static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; | |
@@ -302,6 +305,32 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, | |
} | |
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ | |
case IP_CT_NEW: | |
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) | |
+ /* when skb is forwarding between ports of a bridge,the | |
+ * nf_bridge will be set and nf_bridge->physoutdev is not null, | |
+ * We can assume that it is not expecting NAT operation. | |
+ * | |
+ * when BR_HOOK is enabled, multicast packets will reach | |
+ * postrouting twice,the first time is when it is forwarded | |
+ * between ports of a bridge, the second time is that it is | |
+ * forwarded to upstream port. | |
+ * | |
+ * It will perform traversing of the NAT table at the first | |
+ * time, the next time, it will use the result of first time. | |
+ * since forwarding betweeng ports of a bridge, it won't hit | |
+ * rules of SNAT, it cause NO NAT operation on this skb when | |
+ * forwarding to the upstream port. | |
+ * | |
+ * To avoid the scenario above, accept it when it is forwarding | |
+ * between ports of a bridge for multicast. | |
+ */ | |
+ if (skb->pkt_type == PACKET_MULTICAST) { | |
+ struct nf_bridge_info *nf_bridge = | |
+ nf_bridge_info_get(skb); | |
+ if (nf_bridge && nf_bridge->physoutdev) | |
+ return NF_ACCEPT; | |
+ } | |
+#endif | |
/* Seen it before? This can happen for loopback, retrans, | |
* or local packets. | |
*/ | |
@@ -312,7 +341,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, | |
if (ret != NF_ACCEPT) | |
return ret; | |
- if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) | |
+ if (nf_nat_initialized(ct, maniptype)) | |
break; | |
ret = nf_nat_alloc_null_binding(ct, state->hook); | |
--- a/net/ipv4/netfilter/nf_reject_ipv4.c | |
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c | |
@@ -124,6 +124,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) | |
/* ip_route_me_harder expects skb->dst to be set */ | |
skb_dst_set_noref(nskb, skb_dst(oldskb)); | |
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); | |
+ | |
skb_reserve(nskb, LL_MAX_HEADER); | |
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, | |
ip4_dst_hoplimit(skb_dst(nskb))); | |
--- a/net/ipv4/proc.c | |
+++ b/net/ipv4/proc.c | |
@@ -539,6 +539,9 @@ static __net_initdata struct pernet_operations ip_proc_ops = { | |
int __init ip_misc_proc_init(void) | |
{ | |
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ return 0; | |
+ | |
return register_pernet_subsys(&ip_proc_ops); | |
} | |
--- a/net/ipv4/protocol.c | |
+++ b/net/ipv4/protocol.c | |
@@ -77,3 +77,29 @@ int inet_del_offload(const struct net_offload *prot, unsigned char protocol) | |
return ret; | |
} | |
EXPORT_SYMBOL(inet_del_offload); | |
+ | |
+int inet_update_protocol(const struct net_protocol *new_prot, | |
+ unsigned char protocol, const struct net_protocol **old_prot) | |
+{ | |
+ int ret; | |
+ | |
+ rcu_read_lock(); | |
+ *old_prot = rcu_dereference(inet_protos[protocol]); | |
+ if (!*old_prot) { | |
+ rcu_read_unlock(); | |
+ return -1; | |
+ } | |
+ rcu_read_unlock(); | |
+ | |
+ /* | |
+ * old_prot is not protected as cmpxchg is successful only if | |
+ * old_prot matches with the value in inet_protos[protocol] | |
+ */ | |
+ ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], | |
+ *old_prot, new_prot) == *old_prot) ? 0 : -1; | |
+ | |
+ synchronize_net(); | |
+ | |
+ return ret; | |
+} | |
+EXPORT_SYMBOL(inet_update_protocol); | |
--- a/net/ipv4/raw.c | |
+++ b/net/ipv4/raw.c | |
@@ -497,11 +497,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |
int err; | |
struct ip_options_data opt_copy; | |
struct raw_frag_vec rfv; | |
+ int hdrincl; | |
err = -EMSGSIZE; | |
if (len > 0xFFFF) | |
goto out; | |
+ /* hdrincl should be READ_ONCE(inet->hdrincl) | |
+ * but READ_ONCE() doesn't work with bit fields | |
+ */ | |
+ hdrincl = inet->hdrincl; | |
/* | |
* Check the flags. | |
*/ | |
@@ -576,7 +581,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |
/* Linux does not mangle headers on raw sockets, | |
* so that IP options + IP_HDRINCL is non-sense. | |
*/ | |
- if (inet->hdrincl) | |
+ if (hdrincl) | |
goto done; | |
if (ipc.opt->opt.srr) { | |
if (!daddr) | |
@@ -598,9 +603,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, | |
RT_SCOPE_UNIVERSE, | |
- inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, | |
+ hdrincl ? IPPROTO_RAW : sk->sk_protocol, | |
inet_sk_flowi_flags(sk) | | |
- (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), | |
+ (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), | |
daddr, saddr, 0, 0); | |
if (!saddr && ipc.oif) { | |
@@ -609,7 +614,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |
goto done; | |
} | |
- if (!inet->hdrincl) { | |
+ if (!hdrincl) { | |
rfv.msg = msg; | |
rfv.hlen = 0; | |
@@ -634,7 +639,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) | |
goto do_confirm; | |
back_from_confirm: | |
- if (inet->hdrincl) | |
+ if (hdrincl) | |
err = raw_send_hdrinc(sk, &fl4, msg, len, | |
&rt, msg->msg_flags); | |
--- a/net/ipv4/route.c | |
+++ b/net/ipv4/route.c | |
@@ -420,6 +420,9 @@ static struct pernet_operations ip_rt_proc_ops __net_initdata = { | |
static int __init ip_rt_proc_init(void) | |
{ | |
+ if (IS_ENABLED(CONFIG_PROC_STRIPPED)) | |
+ return 0; | |
+ | |
return register_pernet_subsys(&ip_rt_proc_ops); | |
} | |
@@ -455,7 +458,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | |
else if (skb) | |
pkey = &ip_hdr(skb)->daddr; | |
- n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); | |
+ n = __ipv4_neigh_lookup(dev, net_hdr_word(pkey)); | |
if (n) | |
return n; | |
return neigh_create(&arp_tbl, pkey, dev); | |
@@ -1148,6 +1151,9 @@ static void ipv4_link_failure(struct sk_buff *skb) | |
{ | |
struct rtable *rt; | |
+ /* Forwarding packets, do not have IPCB() initialized, do so | |
+ */ | |
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | |
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); | |
rt = skb_rtable(skb); | |
--- a/net/ipv4/sysctl_net_ipv4.c | |
+++ b/net/ipv4/sysctl_net_ipv4.c | |
@@ -799,6 +799,13 @@ static struct ctl_table ipv4_table[] = { | |
.proc_handler = proc_dointvec_minmax, | |
.extra1 = &one | |
}, | |
+ { | |
+ .procname = "ip_use_legacy_tos", | |
+ .data = &sysctl_ip_use_legacy_tos, | |
+ .maxlen = sizeof(int), | |
+ .mode = 0644, | |
+ .proc_handler = proc_dointvec, | |
+ }, | |
{ } | |
}; | |
--- a/net/ipv4/tcp_input.c | |
+++ b/net/ipv4/tcp_input.c | |
@@ -3818,14 +3818,16 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr | |
{ | |
const __be32 *ptr = (const __be32 *)(th + 1); | |
- if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | |
- | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | |
+ if (net_hdr_word(ptr) == | |
+ htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | |
tp->rx_opt.saw_tstamp = 1; | |
++ptr; | |
- tp->rx_opt.rcv_tsval = ntohl(*ptr); | |
+ tp->rx_opt.rcv_tsval = get_unaligned_be32(ptr); | |
++ptr; | |
- if (*ptr) | |
- tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; | |
+ if (net_hdr_word(ptr)) | |
+ tp->rx_opt.rcv_tsecr = get_unaligned_be32(ptr) - | |
+ tp->tsoffset; | |
else | |
tp->rx_opt.rcv_tsecr = 0; | |
return true; | |
--- a/net/ipv4/tcp_output.c | |
+++ b/net/ipv4/tcp_output.c | |
@@ -451,48 +451,53 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |
u16 options = opts->options; /* mungable copy */ | |
if (unlikely(OPTION_MD5 & options)) { | |
- *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | |
- (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | |
+ (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); | |
/* overload cookie hash location */ | |
opts->hash_location = (__u8 *)ptr; | |
ptr += 4; | |
} | |
if (unlikely(opts->mss)) { | |
- *ptr++ = htonl((TCPOPT_MSS << 24) | | |
- (TCPOLEN_MSS << 16) | | |
- opts->mss); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | | |
+ opts->mss); | |
} | |
if (likely(OPTION_TS & options)) { | |
if (unlikely(OPTION_SACK_ADVERTISE & options)) { | |
- *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | | |
- (TCPOLEN_SACK_PERM << 16) | | |
- (TCPOPT_TIMESTAMP << 8) | | |
- TCPOLEN_TIMESTAMP); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_SACK_PERM << 24) | | |
+ (TCPOLEN_SACK_PERM << 16) | | |
+ (TCPOPT_TIMESTAMP << 8) | | |
+ TCPOLEN_TIMESTAMP); | |
options &= ~OPTION_SACK_ADVERTISE; | |
} else { | |
- *ptr++ = htonl((TCPOPT_NOP << 24) | | |
- (TCPOPT_NOP << 16) | | |
- (TCPOPT_TIMESTAMP << 8) | | |
- TCPOLEN_TIMESTAMP); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_NOP << 24) | | |
+ (TCPOPT_NOP << 16) | | |
+ (TCPOPT_TIMESTAMP << 8) | | |
+ TCPOLEN_TIMESTAMP); | |
} | |
- *ptr++ = htonl(opts->tsval); | |
- *ptr++ = htonl(opts->tsecr); | |
+ net_hdr_word(ptr++) = htonl(opts->tsval); | |
+ net_hdr_word(ptr++) = htonl(opts->tsecr); | |
} | |
if (unlikely(OPTION_SACK_ADVERTISE & options)) { | |
- *ptr++ = htonl((TCPOPT_NOP << 24) | | |
- (TCPOPT_NOP << 16) | | |
- (TCPOPT_SACK_PERM << 8) | | |
- TCPOLEN_SACK_PERM); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_NOP << 24) | | |
+ (TCPOPT_NOP << 16) | | |
+ (TCPOPT_SACK_PERM << 8) | | |
+ TCPOLEN_SACK_PERM); | |
} | |
if (unlikely(OPTION_WSCALE & options)) { | |
- *ptr++ = htonl((TCPOPT_NOP << 24) | | |
- (TCPOPT_WINDOW << 16) | | |
- (TCPOLEN_WINDOW << 8) | | |
- opts->ws); | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_NOP << 24) | | |
+ (TCPOPT_WINDOW << 16) | | |
+ (TCPOLEN_WINDOW << 8) | | |
+ opts->ws); | |
} | |
if (unlikely(opts->num_sack_blocks)) { | |
@@ -500,16 +505,17 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |
tp->duplicate_sack : tp->selective_acks; | |
int this_sack; | |
- *ptr++ = htonl((TCPOPT_NOP << 24) | | |
- (TCPOPT_NOP << 16) | | |
- (TCPOPT_SACK << 8) | | |
- (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * | |
+ net_hdr_word(ptr++) = | |
+ htonl((TCPOPT_NOP << 24) | | |
+ (TCPOPT_NOP << 16) | | |
+ (TCPOPT_SACK << 8) | | |
+ (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * | |
TCPOLEN_SACK_PERBLOCK))); | |
for (this_sack = 0; this_sack < opts->num_sack_blocks; | |
++this_sack) { | |
- *ptr++ = htonl(sp[this_sack].start_seq); | |
- *ptr++ = htonl(sp[this_sack].end_seq); | |
+ net_hdr_word(ptr++) = htonl(sp[this_sack].start_seq); | |
+ net_hdr_word(ptr++) = htonl(sp[this_sack].end_seq); | |
} | |
tp->rx_opt.dsack = 0; | |
@@ -522,13 +528,14 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |
if (foc->exp) { | |
len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; | |
- *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | | |
+ net_hdr_word(ptr) = | |
+ htonl((TCPOPT_EXP << 24) | (len << 16) | | |
TCPOPT_FASTOPEN_MAGIC); | |
p += TCPOLEN_EXP_FASTOPEN_BASE; | |
} else { | |
len = TCPOLEN_FASTOPEN_BASE + foc->len; | |
- *p++ = TCPOPT_FASTOPEN; | |
- *p++ = len; | |
+ net_hdr_word(p++) = TCPOPT_FASTOPEN; | |
+ net_hdr_word(p++) = len; | |
} | |
memcpy(p, foc->val, foc->len); | |
--- a/net/ipv4/udp.c | |
+++ b/net/ipv4/udp.c | |
@@ -819,7 +819,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) | |
if (is_udplite) /* UDP-Lite */ | |
csum = udplite_csum(skb); | |
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */ | |
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ | |
skb->ip_summed = CHECKSUM_NONE; | |
goto send; | |
--- a/net/ipv6/addrconf.c | |
+++ b/net/ipv6/addrconf.c | |
@@ -1768,6 +1768,35 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add | |
return result; | |
} | |
+/* ipv6_dev_find() | |
+ * Find (and hold) net device that has the given address. | |
+ * Or NULL on failure. | |
+ */ | |
+struct net_device *ipv6_dev_find(struct net *net, struct in6_addr *addr, | |
+ int strict) | |
+{ | |
+ struct inet6_ifaddr *ifp; | |
+ struct net_device *dev; | |
+ | |
+ ifp = ipv6_get_ifaddr(net, addr, NULL, strict); | |
+ if (!ifp) | |
+ return NULL; | |
+ | |
+ if (!ifp->idev) { | |
+ in6_ifa_put(ifp); | |
+ return NULL; | |
+ } | |
+ | |
+ dev = ifp->idev->dev; | |
+ if (dev) | |
+ dev_hold(dev); | |
+ | |
+ in6_ifa_put(ifp); | |
+ | |
+ return dev; | |
+} | |
+EXPORT_SYMBOL(ipv6_dev_find); | |
+ | |
/* Gets referenced address, destroys ifaddr */ | |
static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) | |
@@ -2053,6 +2082,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) | |
case ARPHRD_IEEE1394: | |
return addrconf_ifid_ieee1394(eui, dev); | |
case ARPHRD_TUNNEL6: | |
+ case ARPHRD_RAWIP: | |
return addrconf_ifid_ip6tnl(eui, dev); | |
} | |
return -1; | |
@@ -3076,7 +3106,8 @@ static void addrconf_dev_config(struct net_device *dev) | |
(dev->type != ARPHRD_IEEE802154) && | |
(dev->type != ARPHRD_IEEE1394) && | |
(dev->type != ARPHRD_TUNNEL6) && | |
- (dev->type != ARPHRD_6LOWPAN)) { | |
+ (dev->type != ARPHRD_6LOWPAN) && | |
+ (dev->type != ARPHRD_RAWIP)) { | |
/* Alas, we support only Ethernet autoconfiguration. */ | |
return; | |
} | |
--- a/net/ipv6/datagram.c | |
+++ b/net/ipv6/datagram.c | |
@@ -429,7 +429,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) | |
ipv6_iface_scope_id(&sin->sin6_addr, | |
IP6CB(skb)->iif); | |
} else { | |
- ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), | |
+ ipv6_addr_set_v4mapped(net_hdr_word(nh + serr->addr_offset), | |
&sin->sin6_addr); | |
sin->sin6_scope_id = 0; | |
} | |
@@ -766,12 +766,12 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, | |
} | |
if (fl6->flowlabel&IPV6_FLOWINFO_MASK) { | |
- if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { | |
+ if ((fl6->flowlabel^net_hdr_word(CMSG_DATA(cmsg)))&~IPV6_FLOWINFO_MASK) { | |
err = -EINVAL; | |
goto exit_f; | |
} | |
} | |
- fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); | |
+ fl6->flowlabel = IPV6_FLOWINFO_MASK & net_hdr_word(CMSG_DATA(cmsg)); | |
break; | |
case IPV6_2292HOPOPTS: | |
--- a/net/ipv6/esp6.c | |
+++ b/net/ipv6/esp6.c | |
@@ -168,12 +168,19 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) | |
u8 *tail; | |
__be32 *seqhi; | |
__be64 seqno; | |
+ bool nosupp_sg; | |
/* skb is pure payload to encrypt */ | |
aead = x->data; | |
alen = crypto_aead_authsize(aead); | |
ivlen = crypto_aead_ivsize(aead); | |
+ nosupp_sg = crypto_tfm_alg_flags(&aead->base) & CRYPTO_ALG_NOSUPP_SG; | |
+ if (nosupp_sg && skb_linearize(skb)) { | |
+ err = -ENOMEM; | |
+ goto error; | |
+ } | |
+ | |
tfclen = 0; | |
if (x->tfcpad) { | |
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); | |
@@ -367,6 +374,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) | |
__be32 *seqhi; | |
u8 *iv; | |
struct scatterlist *sg; | |
+ bool nosupp_sg; | |
if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) { | |
ret = -EINVAL; | |
@@ -378,6 +386,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) | |
goto out; | |
} | |
+ nosupp_sg = crypto_tfm_alg_flags(&aead->base) & CRYPTO_ALG_NOSUPP_SG; | |
+ if (nosupp_sg && skb_linearize(skb)) { | |
+ ret = -ENOMEM; | |
+ goto out; | |
+ } | |
+ | |
nfrags = skb_cow_data(skb, 0, &trailer); | |
if (nfrags < 0) { | |
ret = -EINVAL; | |
--- a/net/ipv6/exthdrs.c | |
+++ b/net/ipv6/exthdrs.c | |
@@ -573,7 +573,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) | |
goto drop; | |
} | |
- pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); | |
+ pkt_len = ntohl(net_hdr_word(nh + optoff + 2)); | |
if (pkt_len <= IPV6_MAXPLEN) { | |
IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), | |
IPSTATS_MIB_INHDRERRORS); | |
--- a/net/ipv6/fib6_rules.c | |
+++ b/net/ipv6/fib6_rules.c | |
@@ -84,6 +84,10 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, | |
err = -EACCES; | |
rt = net->ipv6.ip6_prohibit_entry; | |
goto discard_pkt; | |
+ case FR_ACT_POLICY_FAILED: | |
+ err = -EACCES; | |
+ rt = net->ipv6.ip6_policy_failed_entry; | |
+ goto discard_pkt; | |
} | |
table = fib6_get_table(net, rule->table); | |
--- a/net/ipv6/ip6_fib.c | |
+++ b/net/ipv6/ip6_fib.c | |
@@ -138,7 +138,7 @@ static __be32 addr_bit_set(const void *token, int fn_bit) | |
* See include/asm-generic/bitops/le.h. | |
*/ | |
return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & | |
- addr[fn_bit >> 5]; | |
+ net_hdr_word(&addr[fn_bit >> 5]); | |
} | |
static struct fib6_node *node_alloc(void) | |
--- a/net/ipv6/ip6_gre.c | |
+++ b/net/ipv6/ip6_gre.c | |
@@ -57,7 +57,6 @@ | |
#include <net/ip6_tunnel.h> | |
#include <net/gre.h> | |
- | |
static bool log_ecn_error = true; | |
module_param(log_ecn_error, bool, 0644); | |
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | |
@@ -366,7 +365,6 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) | |
dev_put(dev); | |
} | |
- | |
static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |
u8 type, u8 code, int offset, __be32 info) | |
{ | |
@@ -398,7 +396,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |
key = key_off ? *(__be32 *)(skb->data + key_off) : 0; | |
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, | |
- key, greh->protocol); | |
+ key, greh->protocol); | |
if (!t) | |
return; | |
@@ -479,11 +477,11 @@ static int ip6gre_rcv(struct sk_buff *skb) | |
offset += 4; | |
} | |
if (flags&GRE_KEY) { | |
- key = *(__be32 *)(h + offset); | |
+ key = net_hdr_word(h + offset); | |
offset += 4; | |
} | |
if (flags&GRE_SEQ) { | |
- seqno = ntohl(*(__be32 *)(h + offset)); | |
+ seqno = ntohl(net_hdr_word(h + offset)); | |
offset += 4; | |
} | |
} | |
@@ -745,7 +743,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |
if (tunnel->parms.o_flags&GRE_SEQ) { | |
++tunnel->o_seqno; | |
- *ptr = htonl(tunnel->o_seqno); | |
+ net_hdr_word(ptr) = htonl(tunnel->o_seqno); | |
ptr--; | |
} | |
if (tunnel->parms.o_flags&GRE_KEY) { | |
@@ -760,7 +758,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |
} | |
skb_set_inner_protocol(skb, protocol); | |
- | |
+ skb->skb_iif = dev->ifindex; | |
ip6tunnel_xmit(NULL, skb, dev); | |
return 0; | |
tx_err_link_failure: | |
@@ -841,7 +839,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) | |
dsfield = ipv6_get_dsfield(ipv6h); | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) | |
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); | |
+ fl6.flowlabel |= net_hdr_word(ipv6h) & IPV6_TCLASS_MASK; | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) | |
fl6.flowlabel |= ip6_flowlabel(ipv6h); | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) | |
@@ -1305,7 +1303,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) | |
dev_hold(dev); | |
} | |
- | |
static struct inet6_protocol ip6gre_protocol __read_mostly = { | |
.handler = ip6gre_rcv, | |
.err_handler = ip6gre_err, | |
@@ -1361,7 +1358,6 @@ static int __net_init ip6gre_init_net(struct net *net) | |
*/ | |
ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; | |
- | |
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); | |
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; | |
@@ -1438,7 +1434,6 @@ out: | |
return ip6gre_tunnel_validate(tb, data); | |
} | |
- | |
static void ip6gre_netlink_parms(struct nlattr *data[], | |
struct __ip6_tnl_parm *parms) | |
{ | |
@@ -1515,7 +1510,7 @@ static void ip6gre_tap_setup(struct net_device *dev) | |
dev->netdev_ops = &ip6gre_tap_netdev_ops; | |
dev->destructor = ip6gre_dev_free; | |
- | |
+ dev->priv_flags |= IFF_GRE_V6_TAP; | |
dev->features |= NETIF_F_NETNS_LOCAL; | |
} | |
--- a/net/ipv6/ip6_offload.c | |
+++ b/net/ipv6/ip6_offload.c | |
@@ -222,7 +222,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, | |
continue; | |
iph2 = (struct ipv6hdr *)(p->data + off); | |
- first_word = *(__be32 *)iph ^ *(__be32 *)iph2; | |
+ first_word = net_hdr_word(iph) ^ net_hdr_word(iph2); | |
/* All fields must match except length and Traffic Class. | |
* XXX skbs on the gro_list have all been parsed and pulled | |
--- a/net/ipv6/ip6_output.c | |
+++ b/net/ipv6/ip6_output.c | |
@@ -1353,11 +1353,12 @@ emsgsize: | |
*/ | |
cork->length += length; | |
- if (((length > mtu) || | |
- (skb && skb_is_gso(skb))) && | |
+ if ((skb && skb_is_gso(skb)) || | |
+ (((length + (skb ? skb->len : headersize)) > mtu) && | |
+ (skb_queue_len(queue) <= 1) && | |
(sk->sk_protocol == IPPROTO_UDP) && | |
- (rt->dst.dev->features & NETIF_F_UFO) && | |
- (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { | |
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && | |
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { | |
err = ip6_ufo_append_data(sk, queue, getfrag, from, length, | |
hh_len, fragheaderlen, exthdrlen, | |
transhdrlen, mtu, flags, fl6); | |
--- a/net/ipv6/ip6_tunnel.c | |
+++ b/net/ipv6/ip6_tunnel.c | |
@@ -16,6 +16,8 @@ | |
* as published by the Free Software Foundation; either version | |
* 2 of the License, or (at your option) any later version. | |
* | |
+ * Changes: | |
+ * Steven Barth <cyrus@openwrt.org>: MAP-E FMR support | |
*/ | |
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
@@ -71,11 +73,9 @@ static bool log_ecn_error = true; | |
module_param(log_ecn_error, bool, 0644); | |
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); | |
-static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) | |
+static u32 HASH(const struct in6_addr *addr) | |
{ | |
- u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); | |
- | |
- return hash_32(hash, HASH_SIZE_SHIFT); | |
+ return hash_32(ipv6_addr_hash(addr), HASH_SIZE_SHIFT); | |
} | |
static int ip6_tnl_dev_init(struct net_device *dev); | |
@@ -122,6 +122,24 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) | |
return &dev->stats; | |
} | |
+/* | |
+ * Update offload stats | |
+ */ | |
+void ip6_update_offload_stats(struct net_device *dev, void *ptr) | |
+{ | |
+ struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, 0); | |
+ const struct pcpu_sw_netstats *offload_stats = | |
+ (struct pcpu_sw_netstats *)ptr; | |
+ | |
+ u64_stats_update_begin(&tstats->syncp); | |
+ tstats->tx_packets += offload_stats->tx_packets; | |
+ tstats->tx_bytes += offload_stats->tx_bytes; | |
+ tstats->rx_packets += offload_stats->rx_packets; | |
+ tstats->rx_bytes += offload_stats->rx_bytes; | |
+ u64_stats_update_end(&tstats->syncp); | |
+} | |
+EXPORT_SYMBOL(ip6_update_offload_stats); | |
+ | |
/* | |
* Locking : hash tables are protected by RCU and RTNL | |
*/ | |
@@ -230,20 +248,29 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); | |
static struct ip6_tnl * | |
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) | |
{ | |
- unsigned int hash = HASH(remote, local); | |
+ unsigned int hash = HASH(local); | |
struct ip6_tnl *t; | |
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | |
struct in6_addr any; | |
+ struct __ip6_tnl_fmr *fmr; | |
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { | |
- if (ipv6_addr_equal(local, &t->parms.laddr) && | |
- ipv6_addr_equal(remote, &t->parms.raddr) && | |
- (t->dev->flags & IFF_UP)) | |
+ if (!ipv6_addr_equal(local, &t->parms.laddr) || | |
+ !(t->dev->flags & IFF_UP)) | |
+ continue; | |
+ | |
+ if (ipv6_addr_equal(remote, &t->parms.raddr)) | |
return t; | |
+ | |
+ for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { | |
+ if (ipv6_prefix_equal(remote, &fmr->ip6_prefix, | |
+ fmr->ip6_prefix_len)) | |
+ return t; | |
+ } | |
} | |
memset(&any, 0, sizeof(any)); | |
- hash = HASH(&any, local); | |
+ hash = HASH(local); | |
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { | |
if (ipv6_addr_equal(local, &t->parms.laddr) && | |
ipv6_addr_any(&t->parms.raddr) && | |
@@ -251,7 +278,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ | |
return t; | |
} | |
- hash = HASH(remote, &any); | |
+ hash = HASH(&any); | |
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { | |
if (ipv6_addr_equal(remote, &t->parms.raddr) && | |
ipv6_addr_any(&t->parms.laddr) && | |
@@ -287,7 +314,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) | |
if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { | |
prio = 1; | |
- h = HASH(remote, local); | |
+ h = HASH(local); | |
} | |
return &ip6n->tnls[prio][h]; | |
} | |
@@ -460,6 +487,12 @@ ip6_tnl_dev_uninit(struct net_device *dev) | |
struct net *net = t->net; | |
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); | |
+ while (t->parms.fmrs) { | |
+ struct __ip6_tnl_fmr *next = t->parms.fmrs->next; | |
+ kfree(t->parms.fmrs); | |
+ t->parms.fmrs = next; | |
+ } | |
+ | |
if (dev == ip6n->fb_tnl_dev) | |
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); | |
else | |
@@ -856,6 +889,127 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, | |
} | |
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); | |
+ | |
+/** | |
+ * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR | |
+ * @dest: destination IPv6 address buffer | |
+ * @skb: received socket buffer | |
+ * @fmr: MAP FMR | |
+ * @xmit: Calculate for xmit or rcv | |
+ **/ | |
+static void ip4ip6_fmr_calc(struct in6_addr *dest, | |
+ const struct iphdr *iph, const uint8_t *end, | |
+ const struct __ip6_tnl_fmr *fmr, bool xmit, bool draft03) | |
+{ | |
+ int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len); | |
+ u8 *portp = NULL; | |
+ bool use_dest_addr; | |
+ const struct iphdr *dsth = iph; | |
+ | |
+ if ((u8*)dsth >= end) | |
+ return; | |
+ | |
+ /* find significant IP header */ | |
+ if (iph->protocol == IPPROTO_ICMP) { | |
+ struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); | |
+ if (ih && ((u8*)&ih[1]) <= end && ( | |
+ ih->type == ICMP_DEST_UNREACH || | |
+ ih->type == ICMP_SOURCE_QUENCH || | |
+ ih->type == ICMP_TIME_EXCEEDED || | |
+ ih->type == ICMP_PARAMETERPROB || | |
+ ih->type == ICMP_REDIRECT)) | |
+ dsth = (const struct iphdr*)&ih[1]; | |
+ } | |
+ | |
+ /* in xmit-path use dest port by default and source port only if | |
+ this is an ICMP reply to something else; vice versa in rcv-path */ | |
+ use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph); | |
+ | |
+ /* get dst port */ | |
+ if (((u8 *)&dsth[1]) <= end && ( | |
+ dsth->protocol == IPPROTO_UDP || | |
+ dsth->protocol == IPPROTO_TCP || | |
+ dsth->protocol == IPPROTO_SCTP || | |
+ dsth->protocol == IPPROTO_DCCP)) { | |
+ /* for UDP, TCP, SCTP and DCCP source and dest port | |
+ follow IPv4 header directly */ | |
+ portp = ((u8*)dsth) + dsth->ihl * 4; | |
+ | |
+ if (use_dest_addr) | |
+ portp += sizeof(u16); | |
+ } else if (iph->protocol == IPPROTO_ICMP) { | |
+ struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); | |
+ | |
+ /* use icmp identifier as port */ | |
+ if (((u8 *)ih) <= end && ( | |
+ (use_dest_addr && ( | |
+ ih->type == ICMP_ECHOREPLY || | |
+ ih->type == ICMP_TIMESTAMPREPLY || | |
+ ih->type == ICMP_INFO_REPLY || | |
+ ih->type == ICMP_ADDRESSREPLY)) || | |
+ (!use_dest_addr && ( | |
+ ih->type == ICMP_ECHO || | |
+ ih->type == ICMP_TIMESTAMP || | |
+ ih->type == ICMP_INFO_REQUEST || | |
+ ih->type == ICMP_ADDRESS) | |
+ ))) | |
+ portp = (u8*)&ih->un.echo.id; | |
+ } | |
+ | |
+ if ((portp && &portp[2] <= end) || psidlen == 0) { | |
+ int frombyte = fmr->ip6_prefix_len / 8; | |
+ int fromrem = fmr->ip6_prefix_len % 8; | |
+ int bytes = sizeof(struct in6_addr) - frombyte; | |
+ const u32 *addr = (use_dest_addr) ? &dsth->daddr : &dsth->saddr; | |
+ u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len); | |
+ u64 t = 0; | |
+ | |
+ /* extract PSID from port and add it to eabits */ | |
+ u16 psidbits = 0; | |
+ if (psidlen > 0) { | |
+ psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]); | |
+ psidbits >>= 16 - psidlen - fmr->offset; | |
+ psidbits = (u16)(psidbits << (16 - psidlen)); | |
+ eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen)); | |
+ } | |
+ | |
+ /* rewrite destination address */ | |
+ *dest = fmr->ip6_prefix; | |
+ memcpy(&dest->s6_addr[10], addr, sizeof(*addr)); | |
+ dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen)); | |
+ | |
+ if (bytes > sizeof(u64)) | |
+ bytes = sizeof(u64); | |
+ | |
+ /* insert eabits */ | |
+ memcpy(&t, &dest->s6_addr[frombyte], bytes); | |
+ t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1) | |
+ << (64 - fmr->ea_len - fromrem)); | |
+ t = cpu_to_be64(t | (eabits >> fromrem)); | |
+ memcpy(&dest->s6_addr[frombyte], &t, bytes); | |
+ if (draft03) { | |
+ /** | |
+ * Draft03 IPv6 address format | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * |PL| 8 16 24 32 40 48 56 | | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * |64| u | IPv4 address |PSID |0 | | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * Final specification IPv6 address format | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * |PL| 8 16 24 32 40 48 56 | | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * |64| 0 | IPv4 address |PSID | | |
+ * +--+---+---+---+---+---+---+---+---+ | |
+ * We need move last six Bytes 1 byte forward | |
+ */ | |
+ memmove(&dest->s6_addr[9], &dest->s6_addr[10], 6); | |
+ dest->s6_addr[15] = 0; | |
+ } | |
+ } | |
+} | |
+ | |
+ | |
/** | |
* ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally | |
* @skb: received socket buffer | |
@@ -901,6 +1055,28 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, | |
skb_reset_network_header(skb); | |
skb->protocol = htons(protocol); | |
memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); | |
+ if (protocol == ETH_P_IP && | |
+ !ipv6_addr_equal(&ipv6h->saddr, &t->parms.raddr)) { | |
+ /* Packet didn't come from BR, so lookup FMR */ | |
+ struct __ip6_tnl_fmr *fmr; | |
+ struct in6_addr expected = t->parms.raddr; | |
+ for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) | |
+ if (ipv6_prefix_equal(&ipv6h->saddr, | |
+ &fmr->ip6_prefix, fmr->ip6_prefix_len)) | |
+ break; | |
+ | |
+ /* Check that IPv6 matches IPv4 source to prevent spoofing */ | |
+ if (fmr) | |
+ ip4ip6_fmr_calc(&expected, ip_hdr(skb), | |
+ skb_tail_pointer(skb), | |
+ fmr, false, | |
+ t->parms.draft03); | |
+ | |
+ if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) { | |
+ rcu_read_unlock(); | |
+ goto discard; | |
+ } | |
+ } | |
__skb_tunnel_rx(skb, t->dev, t->net); | |
@@ -924,6 +1100,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, | |
tstats->rx_bytes += skb->len; | |
u64_stats_update_end(&tstats->syncp); | |
+ /* Reset the skb_iif to Tunnels interface index */ | |
+ skb->skb_iif = t->dev->ifindex; | |
netif_rx(skb); | |
rcu_read_unlock(); | |
@@ -1173,12 +1351,15 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, | |
skb_push(skb, sizeof(struct ipv6hdr)); | |
skb_reset_network_header(skb); | |
ipv6h = ipv6_hdr(skb); | |
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), | |
+ ip6_flow_hdr(ipv6h, dsfield, | |
ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); | |
ipv6h->hop_limit = t->parms.hop_limit; | |
ipv6h->nexthdr = proto; | |
ipv6h->saddr = fl6->saddr; | |
ipv6h->daddr = fl6->daddr; | |
+ | |
+ /* Reset the skb_iif to Tunnels interface index */ | |
+ skb->skb_iif = dev->ifindex; | |
ip6tunnel_xmit(NULL, skb, dev); | |
return 0; | |
tx_err_link_failure: | |
@@ -1200,6 +1381,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | |
__u32 mtu; | |
u8 tproto; | |
int err; | |
+ struct __ip6_tnl_fmr *fmr; | |
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
@@ -1221,6 +1403,19 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) | |
fl6.flowi6_mark = skb->mark; | |
+ /* try to find matching FMR */ | |
+ for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { | |
+ unsigned mshift = 32 - fmr->ip4_prefix_len; | |
+ if (ntohl(fmr->ip4_prefix.s_addr) >> mshift == | |
+ ntohl(iph->daddr) >> mshift) | |
+ break; | |
+ } | |
+ | |
+ /* change dstaddr according to FMR */ | |
+ if (fmr) | |
+ ip4ip6_fmr_calc(&fl6.daddr, iph, skb_tail_pointer(skb), fmr, | |
+ true, t->parms.draft03); | |
+ | |
err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); | |
if (err != 0) { | |
/* XXX: send ICMP error even if DF is not set. */ | |
@@ -1269,7 +1464,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) | |
dsfield = ipv6_get_dsfield(ipv6h); | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) | |
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); | |
+ fl6.flowlabel |= net_hdr_word(ipv6h) & IPV6_TCLASS_MASK; | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) | |
fl6.flowlabel |= ip6_flowlabel(ipv6h); | |
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) | |
@@ -1389,6 +1584,14 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) | |
t->parms.flowinfo = p->flowinfo; | |
t->parms.link = p->link; | |
t->parms.proto = p->proto; | |
+ | |
+ while (t->parms.fmrs) { | |
+ struct __ip6_tnl_fmr *next = t->parms.fmrs->next; | |
+ kfree(t->parms.fmrs); | |
+ t->parms.fmrs = next; | |
+ } | |
+ t->parms.fmrs = p->fmrs; | |
+ | |
ip6_tnl_dst_reset(t); | |
ip6_tnl_link_config(t); | |
return 0; | |
@@ -1427,6 +1630,7 @@ ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) | |
p->flowinfo = u->flowinfo; | |
p->link = u->link; | |
p->proto = u->proto; | |
+ p->fmrs = NULL; | |
memcpy(p->name, u->name, sizeof(u->name)); | |
} | |
@@ -1608,6 +1812,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { | |
.ndo_start_xmit = ip6_tnl_xmit, | |
.ndo_do_ioctl = ip6_tnl_ioctl, | |
.ndo_change_mtu = ip6_tnl_change_mtu, | |
+ .ndo_get_stats64 = ip_tunnel_get_stats64, | |
.ndo_get_stats = ip6_get_stats, | |
.ndo_get_iflink = ip6_tnl_get_iflink, | |
}; | |
@@ -1722,6 +1927,15 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) | |
return 0; | |
} | |
+static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = { | |
+ [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) }, | |
+ [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) }, | |
+ [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 }, | |
+ [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 }, | |
+ [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 }, | |
+ [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 } | |
+}; | |
+ | |
static void ip6_tnl_netlink_parms(struct nlattr *data[], | |
struct __ip6_tnl_parm *parms) | |
{ | |
@@ -1753,6 +1967,49 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], | |
if (data[IFLA_IPTUN_PROTO]) | |
parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); | |
+ | |
+ if (data[IFLA_IPTUN_DRAFT03]) | |
+ parms->draft03 = nla_get_u8(data[IFLA_IPTUN_DRAFT03]); | |
+ | |
+ if (data[IFLA_IPTUN_FMRS]) { | |
+ unsigned rem; | |
+ struct nlattr *fmr; | |
+ nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) { | |
+ struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c; | |
+ struct __ip6_tnl_fmr *nfmr; | |
+ | |
+ nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX, | |
+ fmr, ip6_tnl_fmr_policy); | |
+ | |
+ if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL))) | |
+ continue; | |
+ | |
+ nfmr->offset = 6; | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX])) | |
+ nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX], | |
+ sizeof(nfmr->ip6_prefix)); | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX])) | |
+ nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX], | |
+ sizeof(nfmr->ip4_prefix)); | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN])) | |
+ nfmr->ip6_prefix_len = nla_get_u8(c); | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN])) | |
+ nfmr->ip4_prefix_len = nla_get_u8(c); | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN])) | |
+ nfmr->ea_len = nla_get_u8(c); | |
+ | |
+ if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET])) | |
+ nfmr->offset = nla_get_u8(c); | |
+ | |
+ nfmr->next = parms->fmrs; | |
+ parms->fmrs = nfmr; | |
+ } | |
+ } | |
} | |
static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, | |
@@ -1805,6 +2062,12 @@ static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) | |
static size_t ip6_tnl_get_size(const struct net_device *dev) | |
{ | |
+ const struct ip6_tnl *t = netdev_priv(dev); | |
+ struct __ip6_tnl_fmr *c; | |
+ int fmrs = 0; | |
+ for (c = t->parms.fmrs; c; c = c->next) | |
+ ++fmrs; | |
+ | |
return | |
/* IFLA_IPTUN_LINK */ | |
nla_total_size(4) + | |
@@ -1822,6 +2085,24 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) | |
nla_total_size(4) + | |
/* IFLA_IPTUN_PROTO */ | |
nla_total_size(1) + | |
+ /* IFLA_IPTUN_FMRS */ | |
+ nla_total_size(0) + | |
+ ( | |
+ /* nest */ | |
+ nla_total_size(0) + | |
+ /* IFLA_IPTUN_FMR_IP6_PREFIX */ | |
+ nla_total_size(sizeof(struct in6_addr)) + | |
+ /* IFLA_IPTUN_FMR_IP4_PREFIX */ | |
+ nla_total_size(sizeof(struct in_addr)) + | |
+ /* IFLA_IPTUN_FMR_EA_LEN */ | |
+ nla_total_size(1) + | |
+ /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */ | |
+ nla_total_size(1) + | |
+ /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */ | |
+ nla_total_size(1) + | |
+ /* IFLA_IPTUN_FMR_OFFSET */ | |
+ nla_total_size(1) | |
+ ) * fmrs + | |
0; | |
} | |
@@ -1838,11 +2119,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) | |
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || | |
nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || | |
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) | |
- goto nla_put_failure; | |
- return 0; | |
+ return -EMSGSIZE; | |
-nla_put_failure: | |
- return -EMSGSIZE; | |
+ return 0; | |
} | |
struct net *ip6_tnl_get_link_net(const struct net_device *dev) | |
@@ -1862,6 +2141,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { | |
[IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, | |
[IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, | |
[IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, | |
+ [IFLA_IPTUN_FMRS] = { .type = NLA_NESTED }, | |
}; | |
static struct rtnl_link_ops ip6_link_ops __read_mostly = { | |
--- a/net/ipv6/ip6mr.c | |
+++ b/net/ipv6/ip6mr.c | |
@@ -120,6 +120,11 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, | |
struct netlink_callback *cb); | |
static void mroute_clean_tables(struct mr6_table *mrt, bool all); | |
static void ipmr_expire_process(unsigned long arg); | |
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, | |
+ const struct in6_addr *origin, | |
+ const struct in6_addr *mcastgrp); | |
+static ip6mr_mfc_event_offload_callback_t __rcu | |
+ ip6mr_mfc_event_offload_callback; | |
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES | |
#define ip6mr_for_each_table(mrt, net) \ | |
@@ -167,6 +172,8 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, | |
return -ENETUNREACH; | |
case FR_ACT_PROHIBIT: | |
return -EACCES; | |
+ case FR_ACT_POLICY_FAILED: | |
+ return -EACCES; | |
case FR_ACT_BLACKHOLE: | |
default: | |
return -EINVAL; | |
@@ -338,6 +345,82 @@ static void ip6mr_free_table(struct mr6_table *mrt) | |
kfree(mrt); | |
} | |
+/* ip6mr_sync_entry_update() | |
+ * Call the registered offload callback to report an update to a multicast | |
+ * route entry. The callback receives the list of destination interfaces and | |
+ * the interface count | |
+ */ | |
+static void ip6mr_sync_entry_update(struct mr6_table *mrt, | |
+ struct mfc6_cache *cache) | |
+{ | |
+ int vifi, dest_if_count = 0; | |
+ u32 dest_dev[MAXMIFS]; | |
+ struct in6_addr mc_origin, mc_group; | |
+ ip6mr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ memset(dest_dev, 0, sizeof(dest_dev)); | |
+ | |
+ read_lock(&mrt_lock); | |
+ | |
+ for (vifi = 0; vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if (!((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255))) { | |
+ continue; | |
+ } | |
+ | |
+ if (dest_if_count == MAXMIFS) { | |
+ read_unlock(&mrt_lock); | |
+ return; | |
+ } | |
+ | |
+ if (!MIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ return; | |
+ } | |
+ | |
+ dest_dev[dest_if_count] = mrt->vif6_table[vifi].dev->ifindex; | |
+ dest_if_count++; | |
+ } | |
+ | |
+ memcpy(&mc_origin, &cache->mf6c_origin, sizeof(struct in6_addr)); | |
+ memcpy(&mc_group, &cache->mf6c_mcastgrp, sizeof(struct in6_addr)); | |
+ read_unlock(&mrt_lock); | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ip6mr_mfc_event_offload_callback); | |
+ | |
+ if (!offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return; | |
+ } | |
+ | |
+ offload_update_cb_f(&mc_group, &mc_origin, dest_if_count, dest_dev, | |
+ IP6MR_MFC_EVENT_UPDATE); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* ip6mr_sync_entry_delete() | |
+ * Call the registered offload callback to inform of a multicast route entry | |
+ * delete event | |
+ */ | |
+static void ip6mr_sync_entry_delete(struct in6_addr *mc_origin, | |
+ struct in6_addr *mc_group) | |
+{ | |
+ ip6mr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ip6mr_mfc_event_offload_callback); | |
+ | |
+ if (!offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return; | |
+ } | |
+ | |
+ offload_update_cb_f(mc_group, mc_origin, 0, NULL, | |
+ IP6MR_MFC_EVENT_DELETE); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
#ifdef CONFIG_PROC_FS | |
struct ipmr_mfc_iter { | |
@@ -454,7 +537,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) | |
const char *name = vif->dev ? vif->dev->name : "none"; | |
seq_printf(seq, | |
- "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", | |
+ "%2td %-10s %8llu %7llu %8llu %7llu %05X\n", | |
vif - mrt->vif6_table, | |
name, vif->bytes_in, vif->pkt_in, | |
vif->bytes_out, vif->pkt_out, | |
@@ -770,6 +853,145 @@ failure: | |
} | |
#endif | |
+/* ip6mr_register_mfc_event_offload_callback() | |
+ * Register the IPv6 multicast update callback for offload modules | |
+ */ | |
+bool ip6mr_register_mfc_event_offload_callback( | |
+ ip6mr_mfc_event_offload_callback_t mfc_offload_cb) | |
+{ | |
+ ip6mr_mfc_event_offload_callback_t offload_update_cb_f; | |
+ | |
+ rcu_read_lock(); | |
+ offload_update_cb_f = rcu_dereference(ip6mr_mfc_event_offload_callback); | |
+ | |
+ if (offload_update_cb_f) { | |
+ rcu_read_unlock(); | |
+ return false; | |
+ } | |
+ | |
+ rcu_assign_pointer(ip6mr_mfc_event_offload_callback, mfc_offload_cb); | |
+ rcu_read_unlock(); | |
+ return true; | |
+} | |
+EXPORT_SYMBOL(ip6mr_register_mfc_event_offload_callback); | |
+ | |
+/* ip6mr_unregister_mfc_event_offload_callback() | |
+ * De-register the IPv6 multicast update callback for offload modules | |
+ */ | |
+void ip6mr_unregister_mfc_event_offload_callback(void) | |
+{ | |
+ rcu_read_lock(); | |
+ rcu_assign_pointer(ip6mr_mfc_event_offload_callback, NULL); | |
+ rcu_read_unlock(); | |
+} | |
+EXPORT_SYMBOL(ip6mr_unregister_mfc_event_offload_callback); | |
+ | |
+/* ip6mr_find_mfc_entry() | |
+ * Return the destination interface list for a particular multicast flow, and | |
+ * the number of interfaces in the list | |
+ */ | |
+int ip6mr_find_mfc_entry(struct net *net, struct in6_addr *origin, | |
+ struct in6_addr *group, u32 max_dest_cnt, | |
+ u32 dest_dev[]) | |
+{ | |
+ int vifi, dest_if_count = 0; | |
+ struct mr6_table *mrt; | |
+ struct mfc6_cache *cache; | |
+ | |
+ mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); | |
+ if (!mrt) | |
+ return -ENOENT; | |
+ | |
+ read_lock(&mrt_lock); | |
+ cache = ip6mr_cache_find(mrt, origin, group); | |
+ if (!cache) { | |
+ read_unlock(&mrt_lock); | |
+ return -ENOENT; | |
+ } | |
+ | |
+ for (vifi = 0; vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if (!((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255))) { | |
+ continue; | |
+ } | |
+ | |
+ /* We have another valid destination interface entry. Check if | |
+ * the number of the destination interfaces for the route is | |
+ * exceeding the size of the array given to us | |
+ */ | |
+ if (dest_if_count == max_dest_cnt) { | |
+ read_unlock(&mrt_lock); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ if (!MIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ dest_dev[dest_if_count] = mrt->vif6_table[vifi].dev->ifindex; | |
+ dest_if_count++; | |
+ } | |
+ read_unlock(&mrt_lock); | |
+ | |
+ return dest_if_count; | |
+} | |
+EXPORT_SYMBOL(ip6mr_find_mfc_entry); | |
+ | |
+/* ip6mr_mfc_stats_update() | |
+ * Update the MFC/VIF statistics for offloaded flows | |
+ */ | |
+int ip6mr_mfc_stats_update(struct net *net, struct in6_addr *origin, | |
+ struct in6_addr *group, u64 pkts_in, | |
+ u64 bytes_in, uint64_t pkts_out, | |
+ u64 bytes_out) | |
+{ | |
+ int vif, vifi; | |
+ struct mr6_table *mrt; | |
+ struct mfc6_cache *cache; | |
+ | |
+ mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); | |
+ | |
+ if (!mrt) | |
+ return -ENOENT; | |
+ | |
+ read_lock(&mrt_lock); | |
+ cache = ip6mr_cache_find(mrt, origin, group); | |
+ if (!cache) { | |
+ read_unlock(&mrt_lock); | |
+ return -ENOENT; | |
+ } | |
+ | |
+ vif = cache->mf6c_parent; | |
+ | |
+ if (!MIF_EXISTS(mrt, vif)) { | |
+ read_unlock(&mrt_lock); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ mrt->vif6_table[vif].pkt_in += pkts_in; | |
+ mrt->vif6_table[vif].bytes_in += bytes_in; | |
+ cache->mfc_un.res.pkt += pkts_out; | |
+ cache->mfc_un.res.bytes += bytes_out; | |
+ | |
+ for (vifi = cache->mfc_un.res.minvif; | |
+ vifi < cache->mfc_un.res.maxvif; vifi++) { | |
+ if ((cache->mfc_un.res.ttls[vifi] > 0) && | |
+ (cache->mfc_un.res.ttls[vifi] < 255)) { | |
+ if (!MIF_EXISTS(mrt, vifi)) { | |
+ read_unlock(&mrt_lock); | |
+ return -EINVAL; | |
+ } | |
+ mrt->vif6_table[vifi].pkt_out += pkts_out; | |
+ mrt->vif6_table[vifi].bytes_out += bytes_out; | |
+ } | |
+ } | |
+ | |
+ read_unlock(&mrt_lock); | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(ip6mr_mfc_stats_update); | |
+ | |
/* | |
* Delete a VIF entry | |
*/ | |
@@ -1302,6 +1524,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, | |
{ | |
int line; | |
struct mfc6_cache *c, *next; | |
+ struct in6_addr mc_origin, mc_group; | |
line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); | |
@@ -1310,12 +1533,20 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, | |
ipv6_addr_equal(&c->mf6c_mcastgrp, | |
&mfc->mf6cc_mcastgrp.sin6_addr) && | |
(parent == -1 || parent == c->mf6c_parent)) { | |
+ memcpy(&mc_origin, &c->mf6c_origin, | |
+ sizeof(struct in6_addr)); | |
+ memcpy(&mc_group, &c->mf6c_mcastgrp, | |
+ sizeof(struct in6_addr)); | |
+ | |
write_lock_bh(&mrt_lock); | |
list_del(&c->list); | |
write_unlock_bh(&mrt_lock); | |
mr6_netlink_event(mrt, c, RTM_DELROUTE); | |
ip6mr_cache_free(c); | |
+ | |
+ /* Inform offload modules of the delete event */ | |
+ ip6mr_sync_entry_delete(&mc_origin, &mc_group); | |
return 0; | |
} | |
} | |
@@ -1486,6 +1717,9 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, | |
c->mfc_flags |= MFC_STATIC; | |
write_unlock_bh(&mrt_lock); | |
mr6_netlink_event(mrt, c, RTM_NEWROUTE); | |
+ | |
+ /* Inform offload modules of the update event */ | |
+ ip6mr_sync_entry_update(mrt, c); | |
return 0; | |
} | |
@@ -1544,6 +1778,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all) | |
int i; | |
LIST_HEAD(list); | |
struct mfc6_cache *c, *next; | |
+ struct in6_addr mc_origin, mc_group; | |
/* | |
* Shut down all active vif entries | |
@@ -1562,12 +1797,19 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all) | |
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { | |
if (!all && (c->mfc_flags & MFC_STATIC)) | |
continue; | |
+ memcpy(&mc_origin, &c->mf6c_origin, | |
+ sizeof(struct in6_addr)); | |
+ memcpy(&mc_group, &c->mf6c_mcastgrp, | |
+ sizeof(struct in6_addr)); | |
write_lock_bh(&mrt_lock); | |
list_del(&c->list); | |
write_unlock_bh(&mrt_lock); | |
mr6_netlink_event(mrt, c, RTM_DELROUTE); | |
ip6mr_cache_free(c); | |
+ | |
+ /* Inform offload modules of the delete event */ | |
+ ip6mr_sync_entry_delete(&mc_origin, &mc_group); | |
} | |
} | |
--- a/net/ipv6/mcast_snoop.c | |
+++ b/net/ipv6/mcast_snoop.c | |
@@ -53,7 +53,7 @@ static int ipv6_mc_check_exthdrs(struct sk_buff *skb) | |
ip6h = ipv6_hdr(skb); | |
- if (ip6h->nexthdr != IPPROTO_HOPOPTS) | |
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS && ip6h->nexthdr != IPPROTO_ICMPV6) | |
return -ENOMSG; | |
nexthdr = ip6h->nexthdr; | |
@@ -63,9 +63,6 @@ static int ipv6_mc_check_exthdrs(struct sk_buff *skb) | |
if (offset < 0) | |
return -EINVAL; | |
- if (nexthdr != IPPROTO_ICMPV6) | |
- return -ENOMSG; | |
- | |
skb_set_transport_header(skb, offset); | |
return 0; | |
@@ -120,6 +117,7 @@ static int ipv6_mc_check_mld_msg(struct sk_buff *skb) | |
switch (mld->mld_type) { | |
case ICMPV6_MGM_REDUCTION: | |
case ICMPV6_MGM_REPORT: | |
+ case ICMPV6_NDISC_NBR_SOLICITATION: | |
/* fall through */ | |
return 0; | |
case ICMPV6_MLD2_REPORT: | |
--- a/net/ipv6/ndisc.c | |
+++ b/net/ipv6/ndisc.c | |
@@ -594,6 +594,7 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, | |
ndisc_send_skb(skb, daddr, saddr); | |
} | |
+EXPORT_SYMBOL(ndisc_send_ns); | |
void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, | |
const struct in6_addr *daddr) | |
--- a/net/ipv6/netfilter/Kconfig | |
+++ b/net/ipv6/netfilter/Kconfig | |
@@ -334,5 +334,14 @@ endif # IP6_NF_NAT | |
endif # IP6_NF_IPTABLES | |
+config NF_IPV6_DUMMY_HEADER | |
+ tristate "Retain Dummy fragment header" | |
+ depends on NF_DEFRAG_IPV6 | |
+ default n | |
+ help | |
+ This option allows to retain dummy fragment header in an IPv6 packet. | |
+ Dummy fragment header is the fragment header with Fragment Offset and | |
+ M bit as 0. | |
+ | |
endmenu | |
--- a/net/ipv6/netfilter/ip6_tables.c | |
+++ b/net/ipv6/netfilter/ip6_tables.c | |
@@ -425,6 +425,10 @@ ip6t_do_table(struct sk_buff *skb, | |
} | |
if (table_base + v != ip6t_next_entry(e) && | |
!(e->ipv6.flags & IP6T_F_GOTO)) { | |
+ if (unlikely(stackidx >= private->stacksize)) { | |
+ verdict = NF_DROP; | |
+ break; | |
+ } | |
jumpstack[stackidx++] = e; | |
} | |
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c | |
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |
@@ -601,6 +601,23 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use | |
hdr = ipv6_hdr(clone); | |
fhdr = (struct frag_hdr *)skb_transport_header(clone); | |
+#if IS_ENABLED(CONFIG_NF_IPV6_DUMMY_HEADER) | |
+ /* | |
+ * Revoke dummy header removal by IPv6 reassembly code. | |
+ * | |
+ * Fragment header with MF and fragment offset field as 0, is a | |
+ * dummy fragment header. | |
+ * | |
+ * MAP-T's RFC mandates CE to add the dummy header in packets and | |
+ * adds its identification in its ID field. This field should be | |
+ * conserved and delivered to BR, which uses it to identify the | |
+ * particular CE. | |
+ */ | |
+ if (unlikely((fhdr->frag_off & htons(IP6_OFFSET | IP6_MF)) == 0)) { | |
+ goto ret_orig; | |
+ } | |
+#endif | |
+ | |
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, | |
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); | |
if (fq == NULL) { | |
--- a/net/ipv6/netfilter/nf_log_ipv6.c | |
+++ b/net/ipv6/netfilter/nf_log_ipv6.c | |
@@ -66,9 +66,9 @@ static void dump_ipv6_packet(struct nf_log_buf *m, | |
/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ | |
nf_log_buf_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", | |
ntohs(ih->payload_len) + sizeof(struct ipv6hdr), | |
- (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, | |
+ (ntohl(net_hdr_word(ih)) & 0x0ff00000) >> 20, | |
ih->hop_limit, | |
- (ntohl(*(__be32 *)ih) & 0x000fffff)); | |
+ (ntohl(net_hdr_word(ih)) & 0x000fffff)); | |
fragment = 0; | |
ptr = ip6hoff + sizeof(struct ipv6hdr); | |
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | |
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | |
@@ -21,6 +21,10 @@ | |
#include <net/ipv6.h> | |
#include <net/netfilter/ipv6/nf_nat_masquerade.h> | |
+#define MAX_WORK_COUNT 16 | |
+ | |
+static atomic_t v6_worker_count; | |
+ | |
unsigned int | |
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, | |
const struct net_device *out) | |
@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = { | |
.notifier_call = masq_device_event, | |
}; | |
+struct masq_dev_work { | |
+ struct work_struct work; | |
+ struct net *net; | |
+ int ifindex; | |
+}; | |
+ | |
+static void iterate_cleanup_work(struct work_struct *work) | |
+{ | |
+ struct masq_dev_work *w; | |
+ long index; | |
+ | |
+ w = container_of(work, struct masq_dev_work, work); | |
+ | |
+ index = w->ifindex; | |
+ nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0); | |
+ | |
+ put_net(w->net); | |
+ kfree(w); | |
+ atomic_dec(&v6_worker_count); | |
+ module_put(THIS_MODULE); | |
+} | |
+ | |
+/* ipv6 inet notifier is an atomic notifier, i.e. we cannot | |
+ * schedule. | |
+ * | |
+ * Unfortunately, nf_ct_iterate_cleanup can run for a long | |
+ * time if there are lots of conntracks and the system | |
+ * handles high softirq load, so it frequently calls cond_resched | |
+ * while iterating the conntrack table. | |
+ * | |
+ * So we defer nf_ct_iterate_cleanup walk to the system workqueue. | |
+ * | |
+ * As we can have 'a lot' of inet_events (depending on amount | |
+ * of ipv6 addresses being deleted), we also need to add an upper | |
+ * limit to the number of queued work items. | |
+ */ | |
static int masq_inet_event(struct notifier_block *this, | |
unsigned long event, void *ptr) | |
{ | |
struct inet6_ifaddr *ifa = ptr; | |
- struct netdev_notifier_info info; | |
+ const struct net_device *dev; | |
+ struct masq_dev_work *w; | |
+ struct net *net; | |
+ | |
+ if (event != NETDEV_DOWN || | |
+ atomic_read(&v6_worker_count) >= MAX_WORK_COUNT) | |
+ return NOTIFY_DONE; | |
+ | |
+ dev = ifa->idev->dev; | |
+ net = maybe_get_net(dev_net(dev)); | |
+ if (!net) | |
+ return NOTIFY_DONE; | |
- netdev_notifier_info_init(&info, ifa->idev->dev); | |
- return masq_device_event(this, event, &info); | |
+ if (!try_module_get(THIS_MODULE)) | |
+ goto err_module; | |
+ | |
+ w = kmalloc(sizeof(*w), GFP_ATOMIC); | |
+ if (w) { | |
+ atomic_inc(&v6_worker_count); | |
+ | |
+ INIT_WORK(&w->work, iterate_cleanup_work); | |
+ w->ifindex = dev->ifindex; | |
+ w->net = net; | |
+ schedule_work(&w->work); | |
+ | |
+ return NOTIFY_DONE; | |
+ } | |
+ | |
+ module_put(THIS_MODULE); | |
+ err_module: | |
+ put_net(net); | |
+ return NOTIFY_DONE; | |
} | |
static struct notifier_block masq_inet_notifier = { | |
--- a/net/ipv6/netfilter/nf_reject_ipv6.c | |
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c | |
@@ -135,7 +135,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) | |
struct sk_buff *nskb; | |
struct tcphdr _otcph; | |
const struct tcphdr *otcph; | |
- unsigned int otcplen, hh_len; | |
+ unsigned int otcplen; | |
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); | |
struct ipv6hdr *ip6h; | |
struct dst_entry *dst = NULL; | |
@@ -157,6 +157,17 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) | |
fl6.daddr = oip6h->saddr; | |
fl6.fl6_sport = otcph->dest; | |
fl6.fl6_dport = otcph->source; | |
+ | |
+ /* For forwarding packet, the skb->skb_iif is the incoming device's | |
+ * ifindex, but it is 0 for local out skb, use dst->dev's ifindex | |
+ * instead. | |
+ */ | |
+ if (oldskb->skb_iif != 0) | |
+ fl6.flowi6_oif = oldskb->skb_iif; | |
+ else | |
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); | |
+ | |
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); | |
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); | |
dst = ip6_route_output(net, NULL, &fl6); | |
if (dst == NULL || dst->error) { | |
@@ -167,8 +178,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) | |
if (IS_ERR(dst)) | |
return; | |
- hh_len = (dst->dev->hard_header_len + 15)&~15; | |
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) | |
+ nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) | |
+ sizeof(struct tcphdr) + dst->trailer_len, | |
GFP_ATOMIC); | |
@@ -180,7 +190,9 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) | |
skb_dst_set(nskb, dst); | |
- skb_reserve(nskb, hh_len + dst->header_len); | |
+ nskb->mark = fl6.flowi6_mark; | |
+ | |
+ skb_reserve(nskb, LL_MAX_HEADER); | |
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, | |
ip6_dst_hoplimit(dst)); | |
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); | |
--- a/net/ipv6/protocol.c | |
+++ b/net/ipv6/protocol.c | |
@@ -72,3 +72,29 @@ int inet6_del_offload(const struct net_offload *prot, unsigned char protocol) | |
return ret; | |
} | |
EXPORT_SYMBOL(inet6_del_offload); | |
+ | |
+int inet6_update_protocol(const struct inet6_protocol *new_prot, | |
+ unsigned char protocol, const struct inet6_protocol **old_prot) | |
+{ | |
+ int ret; | |
+ | |
+ rcu_read_lock(); | |
+ *old_prot = rcu_dereference(inet6_protos[protocol]); | |
+ if (!*old_prot) { | |
+ rcu_read_unlock(); | |
+ return -1; | |
+ } | |
+ rcu_read_unlock(); | |
+ | |
+ /* | |
+ * old_prot is not protected as cmpxchg is successful only if | |
+ * old_prot matches with the value in inet6_protos[protocol] | |
+ */ | |
+ ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], | |
+ *old_prot, new_prot) == *old_prot) ? 0 : -1; | |
+ | |
+ synchronize_net(); | |
+ | |
+ return ret; | |
+} | |
+EXPORT_SYMBOL(inet6_update_protocol); | |
--- a/net/ipv6/route.c | |
+++ b/net/ipv6/route.c | |
@@ -90,6 +90,8 @@ static int ip6_pkt_discard(struct sk_buff *skb); | |
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); | |
static int ip6_pkt_prohibit(struct sk_buff *skb); | |
static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); | |
+static int ip6_pkt_policy_failed(struct sk_buff *skb); | |
+static int ip6_pkt_policy_failed_out(struct net *net, struct sock *sk, struct sk_buff *skb); | |
static void ip6_link_failure(struct sk_buff *skb); | |
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |
struct sk_buff *skb, u32 mtu); | |
@@ -175,6 +177,9 @@ static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) | |
return dst_metrics_write_ptr(rt->dst.from); | |
} | |
+/* Define route change notification chain. */ | |
+ATOMIC_NOTIFIER_HEAD(ip6route_chain); | |
+ | |
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) | |
{ | |
struct rt6_info *rt = (struct rt6_info *)dst; | |
@@ -297,6 +302,21 @@ static const struct rt6_info ip6_prohibit_entry_template = { | |
.rt6i_ref = ATOMIC_INIT(1), | |
}; | |
+static const struct rt6_info ip6_policy_failed_entry_template = { | |
+ .dst = { | |
+ .__refcnt = ATOMIC_INIT(1), | |
+ .__use = 1, | |
+ .obsolete = DST_OBSOLETE_FORCE_CHK, | |
+ .error = -EACCES, | |
+ .input = ip6_pkt_policy_failed, | |
+ .output = ip6_pkt_policy_failed_out, | |
+ }, | |
+ .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), | |
+ .rt6i_protocol = RTPROT_KERNEL, | |
+ .rt6i_metric = ~(u32) 0, | |
+ .rt6i_ref = ATOMIC_INIT(1), | |
+}; | |
+ | |
static const struct rt6_info ip6_blk_hole_entry_template = { | |
.dst = { | |
.__refcnt = ATOMIC_INIT(1), | |
@@ -1885,6 +1905,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) | |
rt->dst.output = ip6_pkt_prohibit_out; | |
rt->dst.input = ip6_pkt_prohibit; | |
break; | |
+ case RTN_POLICY_FAILED: | |
+ rt->dst.error = -EACCES; | |
+ rt->dst.output = ip6_pkt_policy_failed_out; | |
+ rt->dst.input = ip6_pkt_policy_failed; | |
+ break; | |
case RTN_THROW: | |
case RTN_UNREACHABLE: | |
default: | |
@@ -2012,6 +2037,9 @@ int ip6_route_add(struct fib6_config *cfg) | |
goto out; | |
err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); | |
+ if (!err) | |
+ atomic_notifier_call_chain(&ip6route_chain, | |
+ RTM_NEWROUTE, rt); | |
kfree(mxc.mx); | |
@@ -2040,6 +2068,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) | |
err = fib6_del(rt, info); | |
write_unlock_bh(&table->tb6_lock); | |
+ if (!err) | |
+ atomic_notifier_call_chain(&ip6route_chain, | |
+ RTM_DELROUTE, rt); | |
out: | |
ip6_rt_put(rt); | |
return err; | |
@@ -2486,6 +2517,17 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff | |
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); | |
} | |
+static int ip6_pkt_policy_failed(struct sk_buff *skb) | |
+{ | |
+ return ip6_pkt_drop(skb, ICMPV6_POLICY_FAIL, IPSTATS_MIB_INNOROUTES); | |
+} | |
+ | |
+static int ip6_pkt_policy_failed_out(struct net *net, struct sock *sk, struct sk_buff *skb) | |
+{ | |
+ skb->dev = skb_dst(skb)->dev; | |
+ return ip6_pkt_drop(skb, ICMPV6_POLICY_FAIL, IPSTATS_MIB_OUTNOROUTES); | |
+} | |
+ | |
/* | |
* Allocate a dst for local (unicast / anycast) address. | |
*/ | |
@@ -2728,7 +2770,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, | |
if (rtm->rtm_type == RTN_UNREACHABLE || | |
rtm->rtm_type == RTN_BLACKHOLE || | |
rtm->rtm_type == RTN_PROHIBIT || | |
- rtm->rtm_type == RTN_THROW) | |
+ rtm->rtm_type == RTN_THROW || | |
+ rtm->rtm_type == RTN_POLICY_FAILED) | |
cfg->fc_flags |= RTF_REJECT; | |
if (rtm->rtm_type == RTN_LOCAL) | |
@@ -3087,6 +3130,9 @@ static int rt6_fill_node(struct net *net, | |
case -EACCES: | |
rtm->rtm_type = RTN_PROHIBIT; | |
break; | |
+ case -EPERM: | |
+ rtm->rtm_type = RTN_POLICY_FAILED; | |
+ break; | |
case -EAGAIN: | |
rtm->rtm_type = RTN_THROW; | |
break; | |
@@ -3363,6 +3409,8 @@ static int ip6_route_dev_notify(struct notifier_block *this, | |
#ifdef CONFIG_IPV6_MULTIPLE_TABLES | |
net->ipv6.ip6_prohibit_entry->dst.dev = dev; | |
net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); | |
+ net->ipv6.ip6_policy_failed_entry->dst.dev = dev; | |
+ net->ipv6.ip6_policy_failed_entry->rt6i_idev = in6_dev_get(dev); | |
net->ipv6.ip6_blk_hole_entry->dst.dev = dev; | |
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); | |
#endif | |
@@ -3371,6 +3419,18 @@ static int ip6_route_dev_notify(struct notifier_block *this, | |
return NOTIFY_OK; | |
} | |
+int rt6_register_notifier(struct notifier_block *nb) | |
+{ | |
+ return atomic_notifier_chain_register(&ip6route_chain, nb); | |
+} | |
+EXPORT_SYMBOL(rt6_register_notifier); | |
+ | |
+int rt6_unregister_notifier(struct notifier_block *nb) | |
+{ | |
+ return atomic_notifier_chain_unregister(&ip6route_chain, nb); | |
+} | |
+EXPORT_SYMBOL(rt6_unregister_notifier); | |
+ | |
/* | |
* /proc | |
*/ | |
@@ -3579,6 +3639,17 @@ static int __net_init ip6_route_net_init(struct net *net) | |
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; | |
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, | |
ip6_template_metrics, true); | |
+ | |
+ net->ipv6.ip6_policy_failed_entry = | |
+ kmemdup(&ip6_policy_failed_entry_template, | |
+ sizeof(*net->ipv6.ip6_policy_failed_entry), GFP_KERNEL); | |
+ if (!net->ipv6.ip6_policy_failed_entry) | |
+ goto out_ip6_blk_hole_entry; | |
+ net->ipv6.ip6_policy_failed_entry->dst.path = | |
+ (struct dst_entry *)net->ipv6.ip6_policy_failed_entry; | |
+ net->ipv6.ip6_policy_failed_entry->dst.ops = &net->ipv6.ip6_dst_ops; | |
+ dst_init_metrics(&net->ipv6.ip6_policy_failed_entry->dst, | |
+ ip6_template_metrics, true); | |
#endif | |
net->ipv6.sysctl.flush_delay = 0; | |
@@ -3597,6 +3668,8 @@ out: | |
return ret; | |
#ifdef CONFIG_IPV6_MULTIPLE_TABLES | |
+out_ip6_blk_hole_entry: | |
+ kfree(net->ipv6.ip6_blk_hole_entry); | |
out_ip6_prohibit_entry: | |
kfree(net->ipv6.ip6_prohibit_entry); | |
out_ip6_null_entry: | |
@@ -3614,6 +3687,7 @@ static void __net_exit ip6_route_net_exit(struct net *net) | |
#ifdef CONFIG_IPV6_MULTIPLE_TABLES | |
kfree(net->ipv6.ip6_prohibit_entry); | |
kfree(net->ipv6.ip6_blk_hole_entry); | |
+ kfree(net->ipv6.ip6_policy_failed_entry); | |
#endif | |
dst_entries_destroy(&net->ipv6.ip6_dst_ops); | |
} | |
@@ -3711,6 +3785,9 @@ int __init ip6_route_init(void) | |
init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); | |
init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; | |
init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); | |
+ init_net.ipv6.ip6_policy_failed_entry->dst.dev = init_net.loopback_dev; | |
+ init_net.ipv6.ip6_policy_failed_entry->rt6i_idev = | |
+ in6_dev_get(init_net.loopback_dev); | |
#endif | |
ret = fib6_init(); | |
if (ret) | |
--- a/net/ipv6/sit.c | |
+++ b/net/ipv6/sit.c | |
@@ -87,6 +87,21 @@ struct sit_net { | |
struct net_device *fb_tunnel_dev; | |
}; | |
+void ipip6_update_offload_stats(struct net_device *dev, void *ptr) | |
+{ | |
+ struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, 0); | |
+ const struct pcpu_sw_netstats *offload_stats = | |
+ (struct pcpu_sw_netstats *)ptr; | |
+ | |
+ u64_stats_update_begin(&tstats->syncp); | |
+ tstats->tx_packets += offload_stats->tx_packets; | |
+ tstats->tx_bytes += offload_stats->tx_bytes; | |
+ tstats->rx_packets += offload_stats->rx_packets; | |
+ tstats->rx_bytes += offload_stats->rx_bytes; | |
+ u64_stats_update_end(&tstats->syncp); | |
+} | |
+EXPORT_SYMBOL(ipip6_update_offload_stats); | |
+ | |
/* | |
* Must be invoked with rcu_read_lock | |
*/ | |
@@ -709,6 +724,8 @@ static int ipip6_rcv(struct sk_buff *skb) | |
tstats->rx_bytes += skb->len; | |
u64_stats_update_end(&tstats->syncp); | |
+ /* Reset the skb_iif to Tunnels interface index */ | |
+ skb->skb_iif = tunnel->dev->ifindex; | |
netif_rx(skb); | |
return 0; | |
@@ -984,6 +1001,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |
skb_set_inner_ipproto(skb, IPPROTO_IPV6); | |
+ /* Reset the skb_iif to Tunnels interface index */ | |
+ skb->skb_iif = tunnel->dev->ifindex; | |
err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, | |
protocol, tos, ttl, df, | |
!net_eq(tunnel->net, dev_net(dev))); | |
--- a/net/ipv6/tcp_ipv6.c | |
+++ b/net/ipv6/tcp_ipv6.c | |
@@ -39,6 +39,7 @@ | |
#include <linux/ipsec.h> | |
#include <linux/times.h> | |
#include <linux/slab.h> | |
+#include <asm/unaligned.h> | |
#include <linux/uaccess.h> | |
#include <linux/ipv6.h> | |
#include <linux/icmpv6.h> | |
@@ -781,10 +782,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 | |
topt = (__be32 *)(t1 + 1); | |
if (tsecr) { | |
- *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | |
- (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); | |
- *topt++ = htonl(tsval); | |
- *topt++ = htonl(tsecr); | |
+ put_unaligned_be32((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | | |
+ (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP, topt++); | |
+ put_unaligned_be32(tsval, topt++); | |
+ put_unaligned_be32(tsecr, topt++); | |
} | |
#ifdef CONFIG_TCP_MD5SIG | |
@@ -1033,6 +1034,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * | |
newtp->af_specific = &tcp_sock_ipv6_mapped_specific; | |
#endif | |
+ newnp->ipv6_mc_list = NULL; | |
newnp->ipv6_ac_list = NULL; | |
newnp->ipv6_fl_list = NULL; | |
newnp->pktoptions = NULL; | |
@@ -1102,6 +1104,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * | |
First: no IPv4 options. | |
*/ | |
newinet->inet_opt = NULL; | |
+ newnp->ipv6_mc_list = NULL; | |
newnp->ipv6_ac_list = NULL; | |
newnp->ipv6_fl_list = NULL; | |
--- a/net/ipx/af_ipx.c | |
+++ b/net/ipx/af_ipx.c | |
@@ -1168,11 +1168,10 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg) | |
sipx->sipx_network = ipxif->if_netnum; | |
memcpy(sipx->sipx_node, ipxif->if_node, | |
sizeof(sipx->sipx_node)); | |
- rc = -EFAULT; | |
+ rc = 0; | |
if (copy_to_user(arg, &ifr, sizeof(ifr))) | |
- break; | |
+ rc = -EFAULT; | |
ipxitf_put(ipxif); | |
- rc = 0; | |
break; | |
} | |
case SIOCAIPXITFCRT: | |
--- a/net/key/af_key.c | |
+++ b/net/key/af_key.c | |
@@ -187,30 +187,22 @@ static int pfkey_release(struct socket *sock) | |
return 0; | |
} | |
-static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, | |
- gfp_t allocation, struct sock *sk) | |
+static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, | |
+ struct sock *sk) | |
{ | |
int err = -ENOBUFS; | |
- sock_hold(sk); | |
- if (*skb2 == NULL) { | |
- if (atomic_read(&skb->users) != 1) { | |
- *skb2 = skb_clone(skb, allocation); | |
- } else { | |
- *skb2 = skb; | |
- atomic_inc(&skb->users); | |
- } | |
- } | |
- if (*skb2 != NULL) { | |
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { | |
- skb_set_owner_r(*skb2, sk); | |
- skb_queue_tail(&sk->sk_receive_queue, *skb2); | |
- sk->sk_data_ready(sk); | |
- *skb2 = NULL; | |
- err = 0; | |
- } | |
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) | |
+ return err; | |
+ | |
+ skb = skb_clone(skb, allocation); | |
+ | |
+ if (skb) { | |
+ skb_set_owner_r(skb, sk); | |
+ skb_queue_tail(&sk->sk_receive_queue, skb); | |
+ sk->sk_data_ready(sk); | |
+ err = 0; | |
} | |
- sock_put(sk); | |
return err; | |
} | |
@@ -225,7 +217,6 @@ static int pfkey_broadcast(struct sk_buff *skb, | |
{ | |
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); | |
struct sock *sk; | |
- struct sk_buff *skb2 = NULL; | |
int err = -ESRCH; | |
/* XXX Do we need something like netlink_overrun? I think | |
@@ -244,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, | |
* socket. | |
*/ | |
if (pfk->promisc) | |
- pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); | |
+ pfkey_broadcast_one(skb, GFP_ATOMIC, sk); | |
/* the exact target will be processed later */ | |
if (sk == one_sk) | |
@@ -259,7 +250,7 @@ static int pfkey_broadcast(struct sk_buff *skb, | |
continue; | |
} | |
- err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); | |
+ err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); | |
/* Error is cleared after successful sending to at least one | |
* registered KM */ | |
@@ -269,9 +260,8 @@ static int pfkey_broadcast(struct sk_buff *skb, | |
rcu_read_unlock(); | |
if (one_sk != NULL) | |
- err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); | |
+ err = pfkey_broadcast_one(skb, GFP_KERNEL, one_sk); | |
- kfree_skb(skb2); | |
kfree_skb(skb); | |
return err; | |
} | |
--- a/net/l2tp/l2tp_core.c | |
+++ b/net/l2tp/l2tp_core.c | |
@@ -365,6 +365,30 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth) | |
} | |
EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth); | |
+void l2tp_stats_update(struct l2tp_tunnel *tunnel, | |
+ struct l2tp_session *session, | |
+ struct l2tp_stats *stats) | |
+{ | |
+ atomic_long_add(atomic_long_read(&stats->rx_packets), | |
+ &tunnel->stats.rx_packets); | |
+ atomic_long_add(atomic_long_read(&stats->rx_bytes), | |
+ &tunnel->stats.rx_bytes); | |
+ atomic_long_add(atomic_long_read(&stats->tx_packets), | |
+ &tunnel->stats.tx_packets); | |
+ atomic_long_add(atomic_long_read(&stats->tx_bytes), | |
+ &tunnel->stats.tx_bytes); | |
+ | |
+ atomic_long_add(atomic_long_read(&stats->rx_packets), | |
+ &session->stats.rx_packets); | |
+ atomic_long_add(atomic_long_read(&stats->rx_bytes), | |
+ &session->stats.rx_bytes); | |
+ atomic_long_add(atomic_long_read(&stats->tx_packets), | |
+ &session->stats.tx_packets); | |
+ atomic_long_add(atomic_long_read(&stats->tx_bytes), | |
+ &session->stats.tx_bytes); | |
+} | |
+EXPORT_SYMBOL_GPL(l2tp_stats_update); | |
+ | |
/***************************************************************************** | |
* Receive data handling | |
*****************************************************************************/ | |
@@ -1198,7 +1222,6 @@ static void l2tp_tunnel_destruct(struct sock *sk) | |
l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name); | |
- | |
/* Disable udp encapsulation */ | |
switch (tunnel->encap) { | |
case L2TP_ENCAPTYPE_UDP: | |
--- a/net/l2tp/l2tp_core.h | |
+++ b/net/l2tp/l2tp_core.h | |
@@ -247,6 +247,8 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); | |
struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); | |
struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); | |
struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); | |
+void l2tp_stats_update(struct l2tp_tunnel *tunnel, struct l2tp_session *session, | |
+ struct l2tp_stats *stats); | |
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, | |
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, | |
--- a/net/l2tp/l2tp_eth.c | |
+++ b/net/l2tp/l2tp_eth.c | |
@@ -95,7 +95,11 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |
struct l2tp_eth *priv = netdev_priv(dev); | |
struct l2tp_session *session = priv->session; | |
unsigned int len = skb->len; | |
- int ret = l2tp_xmit_skb(session, skb, session->hdr_len); | |
+ int ret; | |
+ | |
+ skb->skb_iif = dev->ifindex; | |
+ | |
+ ret = l2tp_xmit_skb(session, skb, session->hdr_len); | |
if (likely(ret == NET_XMIT_SUCCESS)) { | |
atomic_long_add(len, &priv->tx_bytes); | |
@@ -133,6 +137,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev) | |
{ | |
ether_setup(dev); | |
dev->priv_flags &= ~IFF_TX_SKB_SHARING; | |
+ dev->priv_flags |= IFF_PPP_L2TPV3; | |
dev->features |= NETIF_F_LLTX; | |
dev->netdev_ops = &l2tp_eth_netdev_ops; | |
dev->destructor = free_netdev; | |
@@ -166,6 +171,8 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, | |
skb_dst_drop(skb); | |
nf_reset(skb); | |
+ skb->skb_iif = dev->ifindex; | |
+ | |
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { | |
atomic_long_inc(&priv->rx_packets); | |
atomic_long_add(data_len, &priv->rx_bytes); | |
--- a/net/l2tp/l2tp_ppp.c | |
+++ b/net/l2tp/l2tp_ppp.c | |
@@ -98,6 +98,7 @@ | |
#include <net/udp.h> | |
#include <net/xfrm.h> | |
#include <net/inet_common.h> | |
+#include <linux/if_pppox.h> | |
#include <asm/byteorder.h> | |
#include <linux/atomic.h> | |
@@ -131,9 +132,16 @@ struct pppol2tp_session { | |
}; | |
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb); | |
- | |
-static const struct ppp_channel_ops pppol2tp_chan_ops = { | |
- .start_xmit = pppol2tp_xmit, | |
+static int pppol2tp_get_channel_protocol(struct ppp_channel *); | |
+static int pppol2tp_get_channel_protocol_ver(struct ppp_channel *); | |
+static void pppol2tp_hold_chan(struct ppp_channel *); | |
+static void pppol2tp_release_chan(struct ppp_channel *); | |
+static const struct pppol2tp_channel_ops pppol2tp_chan_ops = { | |
+ .ops.start_xmit = pppol2tp_xmit, | |
+ .ops.get_channel_protocol = pppol2tp_get_channel_protocol, | |
+ .ops.get_channel_protocol_ver = pppol2tp_get_channel_protocol_ver, | |
+ .ops.hold = pppol2tp_hold_chan, | |
+ .ops.release = pppol2tp_release_chan, | |
}; | |
static const struct proto_ops pppol2tp_ops; | |
@@ -251,6 +259,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int | |
nf_reset(skb); | |
po = pppox_sk(sk); | |
+ skb->skb_iif = ppp_dev_index(&po->chan); | |
ppp_input(&po->chan, skb); | |
} else { | |
l2tp_dbg(session, PPPOL2TP_MSG_DATA, | |
@@ -368,6 +377,126 @@ error: | |
return error; | |
} | |
+/* pppol2tp_hold_chan() */ | |
+static void pppol2tp_hold_chan(struct ppp_channel *chan) | |
+{ | |
+ struct sock *sk = (struct sock *)chan->private; | |
+ | |
+ sock_hold(sk); | |
+} | |
+ | |
+/* pppol2tp_release_chan() */ | |
+static void pppol2tp_release_chan(struct ppp_channel *chan) | |
+{ | |
+ struct sock *sk = (struct sock *)chan->private; | |
+ | |
+ sock_put(sk); | |
+} | |
+ | |
+/* pppol2tp_get_channel_protocol() | |
+ * Return the protocol type of the L2TP over PPP protocol | |
+ */ | |
+static int pppol2tp_get_channel_protocol(struct ppp_channel *chan) | |
+{ | |
+ return PX_PROTO_OL2TP; | |
+} | |
+ | |
+/* pppol2tp_get_channel_protocol_ver() | |
+ * Return the protocol version of the L2TP over PPP protocol | |
+ */ | |
+static int pppol2tp_get_channel_protocol_ver(struct ppp_channel *chan) | |
+{ | |
+ struct sock *sk; | |
+ struct l2tp_session *session; | |
+ struct l2tp_tunnel *tunnel; | |
+ struct pppol2tp_session *ps; | |
+ int version = 0; | |
+ | |
+ if (chan && chan->private) | |
+ sk = (struct sock *)chan->private; | |
+ else | |
+ return -1; | |
+ | |
+ /* Get session and tunnel contexts from the socket */ | |
+ session = pppol2tp_sock_to_session(sk); | |
+ if (!session) | |
+ return -1; | |
+ | |
+ ps = l2tp_session_priv(session); | |
+ if (!ps->tunnel_sock) { | |
+ sock_put(sk); | |
+ return -1; | |
+ } | |
+ | |
+ tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); | |
+ if (!tunnel) { | |
+ sock_put(sk); | |
+ return -1; | |
+ } | |
+ | |
+ version = tunnel->version; | |
+ | |
+ sock_put(ps->tunnel_sock); | |
+ sock_put(sk); | |
+ | |
+ return version; | |
+} | |
+ | |
+/* pppol2tp_get_addressing() */ | |
+static int pppol2tp_get_addressing(struct ppp_channel *chan, | |
+ struct pppol2tp_common_addr *addr) | |
+{ | |
+ struct sock *sk = (struct sock *)chan->private; | |
+ struct sock *sk_tun; | |
+ struct l2tp_session *session; | |
+ struct l2tp_tunnel *tunnel; | |
+ struct pppol2tp_session *ps; | |
+ struct inet_sock *isk = NULL; | |
+ int err = -ENXIO; | |
+ | |
+ /* Get session and tunnel contexts from the socket */ | |
+ session = pppol2tp_sock_to_session(sk); | |
+ if (!session) | |
+ return err; | |
+ | |
+ ps = l2tp_session_priv(session); | |
+ sk_tun = ps->tunnel_sock; | |
+ if (!sk_tun) { | |
+ sock_put(sk); | |
+ return err; | |
+ } | |
+ | |
+ tunnel = l2tp_sock_to_tunnel(sk_tun); | |
+ if (!tunnel) { | |
+ sock_put(sk_tun); | |
+ sock_put(sk); | |
+ return err; | |
+ } | |
+ isk = inet_sk(ps->tunnel_sock); | |
+ | |
+ addr->local_tunnel_id = tunnel->tunnel_id; | |
+ addr->remote_tunnel_id = tunnel->peer_tunnel_id; | |
+ addr->local_session_id = session->session_id; | |
+ addr->remote_session_id = session->peer_session_id; | |
+ | |
+ addr->local_addr.sin_port = isk->inet_sport; | |
+ addr->remote_addr.sin_port = isk->inet_dport; | |
+ addr->local_addr.sin_addr.s_addr = isk->inet_saddr; | |
+ addr->remote_addr.sin_addr.s_addr = isk->inet_daddr; | |
+ | |
+ sock_put(sk_tun); | |
+ sock_put(sk); | |
+ return 0; | |
+} | |
+ | |
+/* pppol2tp_channel_addressing_get() */ | |
+int pppol2tp_channel_addressing_get(struct ppp_channel *chan, | |
+ struct pppol2tp_common_addr *addr) | |
+{ | |
+ return pppol2tp_get_addressing(chan, addr); | |
+} | |
+EXPORT_SYMBOL(pppol2tp_channel_addressing_get); | |
+ | |
/* Transmit function called by generic PPP driver. Sends PPP frame | |
* over PPPoL2TP socket. | |
* | |
@@ -421,6 +550,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) | |
__skb_push(skb, sizeof(ppph)); | |
skb->data[0] = ppph[0]; | |
skb->data[1] = ppph[1]; | |
+ /* set incoming interface as the ppp interface */ | |
+ if ((skb->protocol == htons(ETH_P_IP)) || | |
+ (skb->protocol == htons(ETH_P_IPV6))) | |
+ skb->skb_iif = ppp_dev_index(chan); | |
local_bh_disable(); | |
l2tp_xmit_skb(session, skb, session->hdr_len); | |
@@ -779,7 +912,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, | |
po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; | |
po->chan.private = sk; | |
- po->chan.ops = &pppol2tp_chan_ops; | |
+ po->chan.ops = &pppol2tp_chan_ops.ops; | |
po->chan.mtu = session->mtu; | |
error = ppp_register_net_channel(sock_net(sk), &po->chan); | |
--- a/net/mac80211/mlme.c | |
+++ b/net/mac80211/mlme.c | |
@@ -156,10 +156,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, | |
memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); | |
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); | |
+ memset(chandef, 0, sizeof(struct cfg80211_chan_def)); | |
chandef->chan = channel; | |
chandef->width = NL80211_CHAN_WIDTH_20_NOHT; | |
chandef->center_freq1 = channel->center_freq; | |
- chandef->center_freq2 = 0; | |
if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { | |
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; | |
--- a/net/mac80211/status.c | |
+++ b/net/mac80211/status.c | |
@@ -248,6 +248,8 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) | |
/* IEEE80211_RADIOTAP_RATE rate */ | |
if (info->status.rates[0].idx >= 0 && | |
!(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | | |
+ RATE_INFO_FLAGS_DMG | | |
+ RATE_INFO_FLAGS_EDMG | | |
IEEE80211_TX_RC_VHT_MCS))) | |
len += 2; | |
@@ -299,6 +301,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, | |
/* IEEE80211_RADIOTAP_RATE */ | |
if (info->status.rates[0].idx >= 0 && | |
!(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | | |
+ RATE_INFO_FLAGS_DMG | | |
+ RATE_INFO_FLAGS_EDMG | | |
IEEE80211_TX_RC_VHT_MCS))) { | |
u16 rate; | |
--- a/net/netfilter/Kconfig | |
+++ b/net/netfilter/Kconfig | |
@@ -10,7 +10,7 @@ config NETFILTER_INGRESS | |
infrastructure. | |
config NETFILTER_NETLINK | |
- tristate | |
+ tristate "Netfilter NFNETLINK interface" | |
config NETFILTER_NETLINK_ACCT | |
tristate "Netfilter NFACCT over NFNETLINK interface" | |
@@ -114,6 +114,18 @@ config NF_CONNTRACK_EVENTS | |
If unsure, say `N'. | |
+config NF_CONNTRACK_RTCACHE | |
+ tristate "Cache route entries in conntrack objects" | |
+ depends on NETFILTER_ADVANCED | |
+ depends on NF_CONNTRACK | |
+ help | |
+ If this option is enabled, the connection tracking code will | |
+ cache routing information for each connection that is being | |
+ forwarded, at a cost of 32 bytes per conntrack object. | |
+ | |
+ To compile it as a module, choose M here. If unsure, say N. | |
+ The module will be called nf_conntrack_rtcache. | |
+ | |
config NF_CONNTRACK_TIMEOUT | |
bool 'Connection tracking timeout' | |
depends on NETFILTER_ADVANCED | |
@@ -124,6 +136,21 @@ config NF_CONNTRACK_TIMEOUT | |
If unsure, say `N'. | |
+config NF_CONNTRACK_DSCPREMARK_EXT | |
+ bool 'Connection tracking extension for dscp remark target' | |
+ depends on NETFILTER_ADVANCED | |
+ help | |
+ This option enables support for connection tracking extension | |
+ for dscp remark. | |
+ | |
+config NF_CONNTRACK_CHAIN_EVENTS | |
+ bool "Register multiple callbacks to ct events" | |
+ depends on NF_CONNTRACK_EVENTS | |
+ help | |
+ Support multiple registrations. | |
+ | |
+ If unsure, say `N'. | |
+ | |
config NF_CONNTRACK_TIMESTAMP | |
bool 'Connection tracking timestamping' | |
depends on NETFILTER_ADVANCED | |
@@ -206,7 +233,6 @@ config NF_CONNTRACK_FTP | |
config NF_CONNTRACK_H323 | |
tristate "H.323 protocol support" | |
- depends on IPV6 || IPV6=n | |
depends on NETFILTER_ADVANCED | |
help | |
H.323 is a VoIP signalling protocol from ITU-T. As one of the most | |
@@ -420,6 +446,15 @@ config NF_NAT_TFTP | |
depends on NF_CONNTRACK && NF_NAT | |
default NF_NAT && NF_CONNTRACK_TFTP | |
+config NF_NAT_TRY_NEXT_RULE | |
+ tristate | |
+ depends on NF_CONNTRACK && NF_NAT | |
+ default n | |
+ help | |
+ If this option is enabled, the iptables will move on to the | |
+ next rule in the chain if a unique tuple is not found for | |
+ translation from the current matched rule. | |
+ | |
config NF_NAT_REDIRECT | |
tristate "IPv4/IPv6 redirect support" | |
depends on NF_NAT | |
@@ -918,7 +953,6 @@ config NETFILTER_XT_TARGET_SECMARK | |
config NETFILTER_XT_TARGET_TCPMSS | |
tristate '"TCPMSS" target support' | |
- depends on IPV6 || IPV6=n | |
default m if NETFILTER_ADVANCED=n | |
---help--- | |
This option adds a `TCPMSS' target, which allows you to alter the | |
@@ -1170,6 +1204,13 @@ config NETFILTER_XT_MATCH_IPCOMP | |
To compile it as a module, choose M here. If unsure, say N. | |
+config NETFILTER_XT_MATCH_ID | |
+ tristate '"id" match support' | |
+ depends on NETFILTER_ADVANCED | |
+ ---help--- | |
+ This option adds a `id' dummy-match, which allows you to put | |
+ numeric IDs into your iptables ruleset. | |
+ | |
config NETFILTER_XT_MATCH_IPRANGE | |
tristate '"iprange" address range match support' | |
depends on NETFILTER_ADVANCED | |
--- a/net/netfilter/Makefile | |
+++ b/net/netfilter/Makefile | |
@@ -5,6 +5,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o | |
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o | |
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o | |
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o | |
+nf_conntrack-$(CONFIG_NF_CONNTRACK_DSCPREMARK_EXT) += nf_conntrack_dscpremark_ext.o | |
obj-$(CONFIG_NETFILTER) = netfilter.o | |
@@ -16,6 +17,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o | |
# connection tracking | |
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o | |
+# optional conntrack route cache extension | |
+obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o | |
+ | |
# SCTP protocol connection tracking | |
obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o | |
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o | |
@@ -141,6 +145,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o | |
+obj-$(CONFIG_NETFILTER_XT_MATCH_ID) += xt_id.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_IPCOMP) += xt_ipcomp.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o | |
obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o | |
--- a/net/netfilter/ipvs/ip_vs_ctl.c | |
+++ b/net/netfilter/ipvs/ip_vs_ctl.c | |
@@ -2804,7 +2804,7 @@ static struct genl_family ip_vs_genl_family = { | |
.hdrsize = 0, | |
.name = IPVS_GENL_NAME, | |
.version = IPVS_GENL_VERSION, | |
- .maxattr = IPVS_CMD_MAX, | |
+ .maxattr = IPVS_CMD_ATTR_MAX, | |
.netnsok = true, /* Make ipvsadm to work on netns */ | |
}; | |
--- a/net/netfilter/nf_conntrack_core.c | |
+++ b/net/netfilter/nf_conntrack_core.c | |
@@ -47,6 +47,7 @@ | |
#include <net/netfilter/nf_conntrack_zones.h> | |
#include <net/netfilter/nf_conntrack_timestamp.h> | |
#include <net/netfilter/nf_conntrack_timeout.h> | |
+#include <net/netfilter/nf_conntrack_dscpremark_ext.h> | |
#include <net/netfilter/nf_conntrack_labels.h> | |
#include <net/netfilter/nf_conntrack_synproxy.h> | |
#include <net/netfilter/nf_nat.h> | |
@@ -237,7 +238,7 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); | |
static void | |
clean_from_lists(struct nf_conn *ct) | |
{ | |
- pr_debug("clean_from_lists(%p)\n", ct); | |
+ pr_debug("clean_from_lists(%pK)\n", ct); | |
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | |
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); | |
@@ -330,7 +331,7 @@ destroy_conntrack(struct nf_conntrack *nfct) | |
struct net *net = nf_ct_net(ct); | |
struct nf_conntrack_l4proto *l4proto; | |
- pr_debug("destroy_conntrack(%p)\n", ct); | |
+ pr_debug("destroy_conntrack(%pK)\n", ct); | |
NF_CT_ASSERT(atomic_read(&nfct->use) == 0); | |
NF_CT_ASSERT(!timer_pending(&ct->timeout)); | |
@@ -361,7 +362,7 @@ destroy_conntrack(struct nf_conntrack *nfct) | |
if (ct->master) | |
nf_ct_put(ct->master); | |
- pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); | |
+ pr_debug("destroy_conntrack: returning ct=%pK to slab\n", ct); | |
nf_conntrack_free(ct); | |
} | |
@@ -629,7 +630,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |
* confirmed us. | |
*/ | |
NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); | |
- pr_debug("Confirming conntrack %p\n", ct); | |
+ pr_debug("Confirming conntrack %pK\n", ct); | |
/* We have to check the DYING flag after unlink to prevent | |
* a race against nf_ct_get_next_corpse() possibly called from | |
* user context, else we insert an already 'dead' hash, blocking | |
@@ -961,6 +962,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |
nf_ct_acct_ext_add(ct, GFP_ATOMIC); | |
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); | |
nf_ct_labels_ext_add(ct); | |
+ nf_ct_dscpremark_ext_add(ct, GFP_ATOMIC); | |
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; | |
nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, | |
@@ -972,7 +974,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |
spin_lock(&nf_conntrack_expect_lock); | |
exp = nf_ct_find_expectation(net, zone, tuple); | |
if (exp) { | |
- pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", | |
+ pr_debug("conntrack: expectation arrives ct=%pK exp=%pK\n", | |
ct, exp); | |
/* Welcome, Mr. Bond. We've been expecting you... */ | |
__set_bit(IPS_EXPECTED_BIT, &ct->status); | |
@@ -1063,14 +1065,14 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, | |
} else { | |
/* Once we've had two way comms, always ESTABLISHED. */ | |
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { | |
- pr_debug("nf_conntrack_in: normal packet for %p\n", ct); | |
+ pr_debug("nf_conntrack_in:normal packet for %pK\n", ct); | |
*ctinfo = IP_CT_ESTABLISHED; | |
} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { | |
- pr_debug("nf_conntrack_in: related packet for %p\n", | |
+ pr_debug("nf_conntrack_in: related packet for %pK\n", | |
ct); | |
*ctinfo = IP_CT_RELATED; | |
} else { | |
- pr_debug("nf_conntrack_in: new packet for %p\n", ct); | |
+ pr_debug("nf_conntrack_in: new packet for %pK\n", ct); | |
*ctinfo = IP_CT_NEW; | |
} | |
*set_reply = 0; | |
@@ -1212,7 +1214,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, | |
/* Should be unconfirmed, so not in hash table yet */ | |
NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); | |
- pr_debug("Altering reply tuple of %p to ", ct); | |
+ pr_debug("Altering reply tuple of %pK to ", ct); | |
nf_ct_dump_tuple(newreply); | |
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; | |
@@ -1394,6 +1396,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), | |
} | |
spin_unlock(lockp); | |
local_bh_enable(); | |
+ cond_resched(); | |
} | |
for_each_possible_cpu(cpu) { | |
@@ -1406,6 +1409,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), | |
set_bit(IPS_DYING_BIT, &ct->status); | |
} | |
spin_unlock_bh(&pcpu->lock); | |
+ cond_resched(); | |
} | |
return NULL; | |
found: | |
@@ -1422,6 +1426,8 @@ void nf_ct_iterate_cleanup(struct net *net, | |
struct nf_conn *ct; | |
unsigned int bucket = 0; | |
+ might_sleep(); | |
+ | |
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { | |
/* Time to push up daises... */ | |
if (del_timer(&ct->timeout)) | |
@@ -1430,6 +1436,7 @@ void nf_ct_iterate_cleanup(struct net *net, | |
/* ... else the timer will get him soon. */ | |
nf_ct_put(ct); | |
+ cond_resched(); | |
} | |
} | |
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); | |
@@ -1478,6 +1485,7 @@ void nf_conntrack_cleanup_end(void) | |
nf_conntrack_proto_fini(); | |
nf_conntrack_seqadj_fini(); | |
nf_conntrack_labels_fini(); | |
+ nf_conntrack_dscpremark_ext_fini(); | |
nf_conntrack_helper_fini(); | |
nf_conntrack_timeout_fini(); | |
nf_conntrack_ecache_fini(); | |
@@ -1666,6 +1674,10 @@ int nf_conntrack_init_start(void) | |
NF_CONNTRACK_VERSION, nf_conntrack_htable_size, | |
nf_conntrack_max); | |
+ ret = nf_conntrack_dscpremark_ext_init(); | |
+ if (ret < 0) | |
+ goto err_dscpremark_ext; | |
+ | |
ret = nf_conntrack_expect_init(); | |
if (ret < 0) | |
goto err_expect; | |
@@ -1738,6 +1750,8 @@ err_tstamp: | |
err_acct: | |
nf_conntrack_expect_fini(); | |
err_expect: | |
+ nf_conntrack_dscpremark_ext_fini(); | |
+err_dscpremark_ext: | |
return ret; | |
} | |
@@ -1817,6 +1831,10 @@ int nf_conntrack_init_net(struct net *net) | |
ret = nf_conntrack_proto_pernet_init(net); | |
if (ret < 0) | |
goto err_proto; | |
+ | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+ ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain); | |
+#endif | |
return 0; | |
err_proto: | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/netfilter/nf_conntrack_dscpremark_ext.c | |
@@ -0,0 +1,92 @@ | |
+/* | |
+ ************************************************************************** | |
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. | |
+ * Permission to use, copy, modify, and/or distribute this software for | |
+ * any purpose with or without fee is hereby granted, provided that the | |
+ * above copyright notice and this permission notice appear in all copies. | |
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | |
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
+ ************************************************************************** | |
+ */ | |
+ | |
+/* DSCP remark handling conntrack extension registration. */ | |
+ | |
+#include <linux/netfilter.h> | |
+#include <linux/slab.h> | |
+#include <linux/kernel.h> | |
+#include <linux/moduleparam.h> | |
+#include <linux/export.h> | |
+ | |
+#include <net/netfilter/nf_conntrack.h> | |
+#include <net/netfilter/nf_conntrack_extend.h> | |
+#include <net/netfilter/nf_conntrack_dscpremark_ext.h> | |
+ | |
+/* DSCP remark conntrack extension type declaration */ | |
+static struct nf_ct_ext_type dscpremark_extend __read_mostly = { | |
+ .len = sizeof(struct nf_ct_dscpremark_ext), | |
+ .align = __alignof__(struct nf_ct_dscpremark_ext), | |
+ .id = NF_CT_EXT_DSCPREMARK, | |
+}; | |
+ | |
+/* nf_conntrack_dscpremark_ext_init() | |
+ * Initializes the DSCP remark conntrack extension. | |
+ */ | |
+int nf_conntrack_dscpremark_ext_init(void) | |
+{ | |
+ int ret; | |
+ | |
+ ret = nf_ct_extend_register(&dscpremark_extend); | |
+ if (ret < 0) { | |
+ pr_warn("nf_conntrack_dscpremark: Unable to register extension\n"); | |
+ return ret; | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+/* nf_conntrack_dscpremark_ext_set_dscp_rule_valid() | |
+ * Set DSCP rule validity flag in the extension | |
+ */ | |
+int nf_conntrack_dscpremark_ext_set_dscp_rule_valid(struct nf_conn *ct) | |
+{ | |
+ struct nf_ct_dscpremark_ext *ncde; | |
+ | |
+ ncde = nf_ct_dscpremark_ext_find(ct); | |
+ if (!ncde) | |
+ return -1; | |
+ | |
+ ncde->rule_flags = NF_CT_DSCPREMARK_EXT_DSCP_RULE_VALID; | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(nf_conntrack_dscpremark_ext_set_dscp_rule_valid); | |
+ | |
+/* nf_conntrack_dscpremark_ext_get_dscp_rule_validity() | |
+ * Check if the DSCP rule flag is valid from the extension | |
+ */ | |
+int nf_conntrack_dscpremark_ext_get_dscp_rule_validity(struct nf_conn *ct) | |
+{ | |
+ struct nf_ct_dscpremark_ext *ncde; | |
+ | |
+ ncde = nf_ct_dscpremark_ext_find(ct); | |
+ if (!ncde) | |
+ return NF_CT_DSCPREMARK_EXT_RULE_NOT_VALID; | |
+ | |
+ if (ncde->rule_flags & NF_CT_DSCPREMARK_EXT_DSCP_RULE_VALID) | |
+ return NF_CT_DSCPREMARK_EXT_RULE_VALID; | |
+ | |
+ return NF_CT_DSCPREMARK_EXT_RULE_NOT_VALID; | |
+} | |
+EXPORT_SYMBOL(nf_conntrack_dscpremark_ext_get_dscp_rule_validity); | |
+ | |
+/* nf_conntrack_dscpremark_ext_fini() | |
+ * De-initializes the DSCP remark conntrack extension. | |
+ */ | |
+void nf_conntrack_dscpremark_ext_fini(void) | |
+{ | |
+ nf_ct_extend_unregister(&dscpremark_extend); | |
+} | |
--- a/net/netfilter/nf_conntrack_ecache.c | |
+++ b/net/netfilter/nf_conntrack_ecache.c | |
@@ -18,6 +18,9 @@ | |
#include <linux/stddef.h> | |
#include <linux/err.h> | |
#include <linux/percpu.h> | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+#include <linux/notifier.h> | |
+#endif | |
#include <linux/kernel.h> | |
#include <linux/netdevice.h> | |
#include <linux/slab.h> | |
@@ -115,6 +118,52 @@ static void ecache_work(struct work_struct *work) | |
/* deliver cached events and clear cache entry - must be called with locally | |
* disabled softirqs */ | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+void nf_ct_deliver_cached_events(struct nf_conn *ct) | |
+{ | |
+ unsigned long events, missed; | |
+ struct nf_conntrack_ecache *e; | |
+ struct nf_ct_event item; | |
+ struct net *net = nf_ct_net(ct); | |
+ int ret = 0; | |
+ | |
+ e = nf_ct_ecache_find(ct); | |
+ if (!e) | |
+ return; | |
+ | |
+ events = xchg(&e->cache, 0); | |
+ | |
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) | |
+ return; | |
+ | |
+ /* | |
+ * We make a copy of the missed event cache without taking | |
+ * the lock, thus we may send missed events twice. However, | |
+ * this does not harm and it happens very rarely. | |
+ */ | |
+ missed = e->missed; | |
+ | |
+ if (!((events | missed) & e->ctmask)) | |
+ return; | |
+ | |
+ item.ct = ct; | |
+ item.portid = 0; | |
+ item.report = 0; | |
+ | |
+ atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, | |
+ events | missed, &item); | |
+ | |
+ if (likely(ret >= 0 && !missed)) | |
+ return; | |
+ | |
+ spin_lock_bh(&ct->lock); | |
+ if (ret < 0) | |
+ e->missed |= events; | |
+ else | |
+ e->missed &= ~missed; | |
+ spin_unlock_bh(&ct->lock); | |
+} | |
+#else | |
void nf_ct_deliver_cached_events(struct nf_conn *ct) | |
{ | |
struct net *net = nf_ct_net(ct); | |
@@ -165,8 +214,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) | |
out_unlock: | |
rcu_read_unlock(); | |
} | |
+#endif | |
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb) | |
+{ | |
+ return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb); | |
+} | |
+#else | |
int nf_conntrack_register_notifier(struct net *net, | |
struct nf_ct_event_notifier *new) | |
{ | |
@@ -187,8 +243,16 @@ out_unlock: | |
mutex_unlock(&nf_ct_ecache_mutex); | |
return ret; | |
} | |
+#endif | |
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb) | |
+{ | |
+ return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, | |
+ nb); | |
+} | |
+#else | |
void nf_conntrack_unregister_notifier(struct net *net, | |
struct nf_ct_event_notifier *new) | |
{ | |
@@ -201,6 +265,7 @@ void nf_conntrack_unregister_notifier(struct net *net, | |
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); | |
mutex_unlock(&nf_ct_ecache_mutex); | |
} | |
+#endif | |
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); | |
int nf_ct_expect_register_notifier(struct net *net, | |
--- a/net/netfilter/nf_conntrack_netlink.c | |
+++ b/net/netfilter/nf_conntrack_netlink.c | |
@@ -28,6 +28,9 @@ | |
#include <linux/netlink.h> | |
#include <linux/spinlock.h> | |
#include <linux/interrupt.h> | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+#include <linux/notifier.h> | |
+#endif | |
#include <linux/slab.h> | |
#include <linux/netfilter.h> | |
@@ -629,19 +632,27 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) | |
} | |
#ifdef CONFIG_NF_CONNTRACK_EVENTS | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+static int ctnetlink_conntrack_event(struct notifier_block *this, | |
+ unsigned long events, void *ptr) | |
+#else | |
static int | |
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |
+#endif | |
{ | |
const struct nf_conntrack_zone *zone; | |
struct net *net; | |
struct nlmsghdr *nlh; | |
struct nfgenmsg *nfmsg; | |
struct nlattr *nest_parms; | |
- struct nf_conn *ct = item->ct; | |
struct sk_buff *skb; | |
unsigned int type; | |
unsigned int flags = 0, group; | |
int err; | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+ struct nf_ct_event *item = (struct nf_ct_event *)ptr; | |
+#endif | |
+ struct nf_conn *ct = item->ct; | |
/* ignore our fake conntrack entry */ | |
if (nf_ct_is_untracked(ct)) | |
@@ -3258,9 +3269,15 @@ ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb, | |
} | |
#ifdef CONFIG_NF_CONNTRACK_EVENTS | |
+#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS | |
+static struct notifier_block ctnl_notifier = { | |
+ .notifier_call = ctnetlink_conntrack_event, | |
+}; | |
+#else | |
static struct nf_ct_event_notifier ctnl_notifier = { | |
.fcn = ctnetlink_conntrack_event, | |
}; | |
+#endif | |
static struct nf_exp_event_notifier ctnl_notifier_exp = { | |
.fcn = ctnetlink_expect_event, | |
--- a/net/netfilter/nf_conntrack_proto_gre.c | |
+++ b/net/netfilter/nf_conntrack_proto_gre.c | |
@@ -1,4 +1,6 @@ | |
/* | |
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved. | |
+ * | |
* ip_conntrack_proto_gre.c - Version 3.0 | |
* | |
* Connection tracking protocol helper module for GRE. | |
@@ -393,17 +395,62 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { | |
.init_net = gre_init_net, | |
}; | |
+static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre6 __read_mostly = { | |
+ .l3proto = AF_INET6, | |
+ .l4proto = IPPROTO_GRE, | |
+ .name = "gre", | |
+ .pkt_to_tuple = gre_pkt_to_tuple, | |
+ .invert_tuple = gre_invert_tuple, | |
+ .print_tuple = gre_print_tuple, | |
+ .print_conntrack = gre_print_conntrack, | |
+ .get_timeouts = gre_get_timeouts, | |
+ .packet = gre_packet, | |
+ .new = gre_new, | |
+ .destroy = gre_destroy, | |
+ .me = THIS_MODULE, | |
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK) | |
+ .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, | |
+ .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, | |
+ .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, | |
+ .nla_policy = nf_ct_port_nla_policy, | |
+#endif | |
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) | |
+ .ctnl_timeout = { | |
+ .nlattr_to_obj = gre_timeout_nlattr_to_obj, | |
+ .obj_to_nlattr = gre_timeout_obj_to_nlattr, | |
+ .nlattr_max = CTA_TIMEOUT_GRE_MAX, | |
+ .obj_size = sizeof(unsigned int) * GRE_CT_MAX, | |
+ .nla_policy = gre_timeout_nla_policy, | |
+ }, | |
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ | |
+ .net_id = &proto_gre_net_id, | |
+ .init_net = gre_init_net, | |
+}; | |
+ | |
static int proto_gre_net_init(struct net *net) | |
{ | |
int ret = 0; | |
ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4); | |
- if (ret < 0) | |
+ if (ret < 0) { | |
pr_err("nf_conntrack_gre4: pernet registration failed.\n"); | |
+ goto out; | |
+ } | |
+ | |
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre6); | |
+ if (ret < 0) { | |
+ pr_err("nf_conntrack_gre6: pernet registration failed.\n"); | |
+ goto cleanup_gre4; | |
+ } | |
+ return 0; | |
+cleanup_gre4: | |
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); | |
+out: | |
return ret; | |
} | |
static void proto_gre_net_exit(struct net *net) | |
{ | |
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre6); | |
nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4); | |
nf_ct_gre_keymap_flush(net); | |
} | |
@@ -427,7 +474,13 @@ static int __init nf_ct_proto_gre_init(void) | |
if (ret < 0) | |
goto out_gre4; | |
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre6); | |
+ if (ret < 0) | |
+ goto out_gre6; | |
+ | |
return 0; | |
+out_gre6: | |
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); | |
out_gre4: | |
unregister_pernet_subsys(&proto_gre_net_ops); | |
out_pernet: | |
@@ -436,6 +489,7 @@ out_pernet: | |
static void __exit nf_ct_proto_gre_fini(void) | |
{ | |
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre6); | |
nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4); | |
unregister_pernet_subsys(&proto_gre_net_ops); | |
} | |
--- a/net/netfilter/nf_conntrack_proto_tcp.c | |
+++ b/net/netfilter/nf_conntrack_proto_tcp.c | |
@@ -33,10 +33,15 @@ | |
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | |
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> | |
+/* Do not check the TCP window for incoming packets */ | |
+int nf_ct_tcp_no_window_check __read_mostly = 1; | |
+EXPORT_SYMBOL_GPL(nf_ct_tcp_no_window_check); | |
+ | |
/* "Be conservative in what you do, | |
be liberal in what you accept from others." | |
If it's non-zero, we mark only out of window RST segments as INVALID. */ | |
-static int nf_ct_tcp_be_liberal __read_mostly = 0; | |
+int nf_ct_tcp_be_liberal __read_mostly; | |
+EXPORT_SYMBOL_GPL(nf_ct_tcp_be_liberal); | |
/* If it is set to zero, we disable picking up already established | |
connections. */ | |
@@ -453,7 +458,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, | |
/* Fast path for timestamp-only option */ | |
if (length == TCPOLEN_TSTAMP_ALIGNED | |
- && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | |
+ && net_hdr_word(ptr) == htonl((TCPOPT_NOP << 24) | |
| (TCPOPT_NOP << 16) | |
| (TCPOPT_TIMESTAMP << 8) | |
| TCPOLEN_TIMESTAMP)) | |
@@ -515,6 +520,9 @@ static bool tcp_in_window(const struct nf_conn *ct, | |
s32 receiver_offset; | |
bool res, in_recv_win; | |
+ if (nf_ct_tcp_no_window_check) | |
+ return true; | |
+ | |
/* | |
* Get the required data from the packet. | |
*/ | |
@@ -1481,6 +1489,13 @@ static struct ctl_table tcp_sysctl_table[] = { | |
.mode = 0644, | |
.proc_handler = proc_dointvec, | |
}, | |
+ { | |
+ .procname = "nf_conntrack_tcp_no_window_check", | |
+ .data = &nf_ct_tcp_no_window_check, | |
+ .maxlen = sizeof(unsigned int), | |
+ .mode = 0644, | |
+ .proc_handler = proc_dointvec, | |
+ }, | |
{ } | |
}; | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/netfilter/nf_conntrack_rtcache.c | |
@@ -0,0 +1,416 @@ | |
+/* route cache for netfilter. | |
+ * | |
+ * (C) 2014 Red Hat GmbH | |
+ * | |
+ * This program is free software; you can redistribute it and/or modify | |
+ * it under the terms of the GNU General Public License version 2 as | |
+ * published by the Free Software Foundation. | |
+ */ | |
+ | |
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
+ | |
+#include <linux/types.h> | |
+#include <linux/netfilter.h> | |
+#include <linux/skbuff.h> | |
+#include <linux/stddef.h> | |
+#include <linux/kernel.h> | |
+#include <linux/netdevice.h> | |
+#include <linux/export.h> | |
+#include <linux/module.h> | |
+ | |
+#include <net/dst.h> | |
+ | |
+#include <net/netfilter/nf_conntrack.h> | |
+#include <net/netfilter/nf_conntrack_core.h> | |
+#include <net/netfilter/nf_conntrack_extend.h> | |
+#include <net/netfilter/nf_conntrack_rtcache.h> | |
+ | |
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | |
+#include <net/ip6_fib.h> | |
+#endif | |
+ | |
+static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc, | |
+ enum ip_conntrack_dir dir) | |
+{ | |
+ struct dst_entry *dst = rtc->cached_dst[dir].dst; | |
+ | |
+ dst_release(dst); | |
+} | |
+ | |
+static void nf_conn_rtcache_destroy(struct nf_conn *ct) | |
+{ | |
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); | |
+ | |
+ if (!rtc) | |
+ return; | |
+ | |
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL); | |
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY); | |
+} | |
+ | |
+static void nf_ct_rtcache_ext_add(struct nf_conn *ct) | |
+{ | |
+ struct nf_conn_rtcache *rtc; | |
+ | |
+ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC); | |
+ if (rtc) { | |
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1; | |
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL; | |
+ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1; | |
+ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL; | |
+ } | |
+} | |
+ | |
+static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct) | |
+{ | |
+ if (nf_ct_is_untracked(ct)) | |
+ return NULL; | |
+ return nf_ct_rtcache_find(ct); | |
+} | |
+ | |
+static struct dst_entry * | |
+nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc, | |
+ enum ip_conntrack_dir dir) | |
+{ | |
+ return rtc->cached_dst[dir].dst; | |
+} | |
+ | |
+static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst) | |
+{ | |
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | |
+ if (pf == NFPROTO_IPV6) { | |
+ const struct rt6_info *rt = (const struct rt6_info *)dst; | |
+ | |
+ if (rt->rt6i_node) | |
+ return (u32)rt->rt6i_node->fn_sernum; | |
+ } | |
+#endif | |
+ return 0; | |
+} | |
+ | |
+static void nf_conn_rtcache_dst_set(int pf, | |
+ struct nf_conn_rtcache *rtc, | |
+ struct dst_entry *dst, | |
+ enum ip_conntrack_dir dir, int iif) | |
+{ | |
+ if (rtc->cached_dst[dir].iif != iif) | |
+ rtc->cached_dst[dir].iif = iif; | |
+ | |
+ if (rtc->cached_dst[dir].dst != dst) { | |
+ struct dst_entry *old; | |
+ | |
+ dst_hold(dst); | |
+ | |
+ old = xchg(&rtc->cached_dst[dir].dst, dst); | |
+ dst_release(old); | |
+ | |
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | |
+ if (pf == NFPROTO_IPV6) | |
+ rtc->cached_dst[dir].cookie = | |
+ nf_rtcache_get_cookie(pf, dst); | |
+#endif | |
+ } | |
+} | |
+ | |
+static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc, | |
+ enum ip_conntrack_dir dir) | |
+{ | |
+ struct dst_entry *old; | |
+ | |
+ pr_debug("Invalidate iif %d for dir %d on cache %p\n", | |
+ rtc->cached_dst[dir].iif, dir, rtc); | |
+ | |
+ old = xchg(&rtc->cached_dst[dir].dst, NULL); | |
+ dst_release(old); | |
+ rtc->cached_dst[dir].iif = -1; | |
+} | |
+ | |
+static unsigned int nf_rtcache_in(u_int8_t pf, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ struct nf_conn_rtcache *rtc; | |
+ enum ip_conntrack_info ctinfo; | |
+ enum ip_conntrack_dir dir; | |
+ struct dst_entry *dst; | |
+ struct nf_conn *ct; | |
+ int iif; | |
+ u32 cookie; | |
+ | |
+ if (skb_dst(skb) || skb->sk) | |
+ return NF_ACCEPT; | |
+ | |
+ ct = nf_ct_get(skb, &ctinfo); | |
+ if (!ct) | |
+ return NF_ACCEPT; | |
+ | |
+ rtc = nf_ct_rtcache_find_usable(ct); | |
+ if (!rtc) | |
+ return NF_ACCEPT; | |
+ | |
+ /* if iif changes, don't use cache and let ip stack | |
+ * do route lookup. | |
+ * | |
+ * If rp_filter is enabled it might toss skb, so | |
+ * we don't want to avoid these checks. | |
+ */ | |
+ dir = CTINFO2DIR(ctinfo); | |
+ iif = nf_conn_rtcache_iif_get(rtc, dir); | |
+ if (state->in->ifindex != iif) { | |
+ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n", | |
+ ct, iif, state->in->ifindex); | |
+ return NF_ACCEPT; | |
+ } | |
+ dst = nf_conn_rtcache_dst_get(rtc, dir); | |
+ if (dst == NULL) | |
+ return NF_ACCEPT; | |
+ | |
+ cookie = nf_rtcache_get_cookie(pf, dst); | |
+ | |
+ dst = dst_check(dst, cookie); | |
+ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie); | |
+ if (likely(dst)) | |
+ skb_dst_set_noref(skb, dst); | |
+ else | |
+ nf_conn_rtcache_dst_obsolete(rtc, dir); | |
+ | |
+ return NF_ACCEPT; | |
+} | |
+ | |
+static unsigned int nf_rtcache_forward(u_int8_t pf, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ struct nf_conn_rtcache *rtc; | |
+ enum ip_conntrack_info ctinfo; | |
+ enum ip_conntrack_dir dir; | |
+ struct nf_conn *ct; | |
+ struct dst_entry *dst = skb_dst(skb); | |
+ int iif; | |
+ | |
+ ct = nf_ct_get(skb, &ctinfo); | |
+ if (!ct) | |
+ return NF_ACCEPT; | |
+ | |
+ if (dst && dst_xfrm(dst)) | |
+ return NF_ACCEPT; | |
+ | |
+ if (dst && (dst->flags & DST_FAKE_RTABLE)) | |
+ return NF_ACCEPT; | |
+ | |
+ if (!nf_ct_is_confirmed(ct)) { | |
+ if (nf_ct_rtcache_find(ct)) | |
+ return NF_ACCEPT; | |
+ nf_ct_rtcache_ext_add(ct); | |
+ return NF_ACCEPT; | |
+ } | |
+ | |
+ rtc = nf_ct_rtcache_find_usable(ct); | |
+ if (!rtc) | |
+ return NF_ACCEPT; | |
+ | |
+ dir = CTINFO2DIR(ctinfo); | |
+ iif = nf_conn_rtcache_iif_get(rtc, dir); | |
+ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n", | |
+ ct, skb, dir, iif, state->in->ifindex); | |
+ if (likely(state->in->ifindex == iif)) | |
+ return NF_ACCEPT; | |
+ | |
+ nf_conn_rtcache_dst_set(pf, rtc, skb_dst(skb), dir, state->in->ifindex); | |
+ return NF_ACCEPT; | |
+} | |
+ | |
+static unsigned int nf_rtcache_in4(void *priv, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ return nf_rtcache_in(NFPROTO_IPV4, skb, state); | |
+} | |
+ | |
+static unsigned int nf_rtcache_forward4(void *priv, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ return nf_rtcache_forward(NFPROTO_IPV4, skb, state); | |
+} | |
+ | |
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | |
+static unsigned int nf_rtcache_in6(void *priv, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ return nf_rtcache_in(NFPROTO_IPV6, skb, state); | |
+} | |
+ | |
+static unsigned int nf_rtcache_forward6(void *priv, | |
+ struct sk_buff *skb, | |
+ const struct nf_hook_state *state) | |
+{ | |
+ return nf_rtcache_forward(NFPROTO_IPV6, skb, state); | |
+} | |
+#endif | |
+ | |
+static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data) | |
+{ | |
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); | |
+ struct net_device *dev = data; | |
+ | |
+ if (!rtc) | |
+ return 0; | |
+ | |
+ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif || | |
+ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) { | |
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL); | |
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY); | |
+ } | |
+ | |
+ return 0; | |
+} | |
+ | |
+static int nf_rtcache_netdev_event(struct notifier_block *this, | |
+ unsigned long event, void *ptr) | |
+{ | |
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
+ struct net *net = dev_net(dev); | |
+ | |
+ if (event == NETDEV_DOWN) | |
+ nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0); | |
+ | |
+ return NOTIFY_DONE; | |
+} | |
+ | |
+static struct notifier_block nf_rtcache_notifier = { | |
+ .notifier_call = nf_rtcache_netdev_event, | |
+}; | |
+ | |
+static struct nf_hook_ops rtcache_ops[] = { | |
+ { | |
+ .hook = nf_rtcache_in4, | |
+ .pf = NFPROTO_IPV4, | |
+ .hooknum = NF_INET_PRE_ROUTING, | |
+ .priority = NF_IP_PRI_LAST, | |
+ }, | |
+ { | |
+ .hook = nf_rtcache_forward4, | |
+ .pf = NFPROTO_IPV4, | |
+ .hooknum = NF_INET_FORWARD, | |
+ .priority = NF_IP_PRI_LAST, | |
+ }, | |
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) | |
+ { | |
+ .hook = nf_rtcache_in6, | |
+ .pf = NFPROTO_IPV6, | |
+ .hooknum = NF_INET_PRE_ROUTING, | |
+ .priority = NF_IP_PRI_LAST, | |
+ }, | |
+ { | |
+ .hook = nf_rtcache_forward6, | |
+ .pf = NFPROTO_IPV6, | |
+ .hooknum = NF_INET_FORWARD, | |
+ .priority = NF_IP_PRI_LAST, | |
+ }, | |
+#endif | |
+}; | |
+ | |
+static struct nf_ct_ext_type rtcache_extend __read_mostly = { | |
+ .len = sizeof(struct nf_conn_rtcache), | |
+ .align = __alignof__(struct nf_conn_rtcache), | |
+ .id = NF_CT_EXT_RTCACHE, | |
+ .destroy = nf_conn_rtcache_destroy, | |
+}; | |
+ | |
+static int __init nf_conntrack_rtcache_init(void) | |
+{ | |
+ int ret = nf_ct_extend_register(&rtcache_extend); | |
+ | |
+ if (ret < 0) { | |
+ pr_err("nf_conntrack_rtcache: Unable to register extension\n"); | |
+ return ret; | |
+ } | |
+ | |
+ ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); | |
+ if (ret < 0) { | |
+ nf_ct_extend_unregister(&rtcache_extend); | |
+ return ret; | |
+ } | |
+ | |
+ ret = register_netdevice_notifier(&nf_rtcache_notifier); | |
+ if (ret) { | |
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); | |
+ nf_ct_extend_unregister(&rtcache_extend); | |
+ } | |
+ | |
+ return ret; | |
+} | |
+ | |
+static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data) | |
+{ | |
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct); | |
+ | |
+ return rtc != NULL; | |
+} | |
+ | |
+static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net) | |
+{ | |
+ bool wait = false; | |
+ int cpu; | |
+ | |
+ for_each_possible_cpu(cpu) { | |
+ struct nf_conntrack_tuple_hash *h; | |
+ struct hlist_nulls_node *n; | |
+ struct nf_conn *ct; | |
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); | |
+ | |
+ rcu_read_lock(); | |
+ spin_lock_bh(&pcpu->lock); | |
+ | |
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { | |
+ ct = nf_ct_tuplehash_to_ctrack(h); | |
+ if (nf_ct_rtcache_find(ct) != NULL) { | |
+ wait = true; | |
+ break; | |
+ } | |
+ } | |
+ spin_unlock_bh(&pcpu->lock); | |
+ rcu_read_unlock(); | |
+ } | |
+ | |
+ return wait; | |
+} | |
+ | |
+static void __exit nf_conntrack_rtcache_fini(void) | |
+{ | |
+ struct net *net; | |
+ int count = 0; | |
+ | |
+ /* remove hooks so no new connections get rtcache extension */ | |
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops)); | |
+ | |
+ synchronize_net(); | |
+ | |
+ unregister_netdevice_notifier(&nf_rtcache_notifier); | |
+ | |
+ rtnl_lock(); | |
+ | |
+ /* zap all conntracks with rtcache extension */ | |
+ for_each_net(net) | |
+ nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0); | |
+ | |
+ for_each_net(net) { | |
+ /* .. and make sure they're gone from dying list, too */ | |
+ while (nf_conntrack_rtcache_wait_for_dying(net)) { | |
+ msleep(200); | |
+ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n"); | |
+ } | |
+ } | |
+ | |
+ rtnl_unlock(); | |
+ synchronize_net(); | |
+ nf_ct_extend_unregister(&rtcache_extend); | |
+} | |
+module_init(nf_conntrack_rtcache_init); | |
+module_exit(nf_conntrack_rtcache_fini); | |
+ | |
+MODULE_LICENSE("GPL"); | |
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); | |
+MODULE_DESCRIPTION("Conntrack route cache extension"); | |
--- a/net/netfilter/nf_conntrack_standalone.c | |
+++ b/net/netfilter/nf_conntrack_standalone.c | |
@@ -17,6 +17,7 @@ | |
#include <linux/percpu.h> | |
#include <linux/netdevice.h> | |
#include <linux/security.h> | |
+#include <linux/inet.h> | |
#include <net/net_namespace.h> | |
#ifdef CONFIG_SYSCTL | |
#include <linux/sysctl.h> | |
@@ -288,10 +289,66 @@ static int ct_open(struct inode *inode, struct file *file) | |
sizeof(struct ct_iter_state)); | |
} | |
+struct kill_request { | |
+ u16 family; | |
+ union nf_inet_addr addr; | |
+}; | |
+ | |
+static int kill_matching(struct nf_conn *i, void *data) | |
+{ | |
+ struct kill_request *kr = data; | |
+ struct nf_conntrack_tuple *t1 = &i->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
+ struct nf_conntrack_tuple *t2 = &i->tuplehash[IP_CT_DIR_REPLY].tuple; | |
+ | |
+ if (!kr->family) | |
+ return 1; | |
+ | |
+ if (t1->src.l3num != kr->family) | |
+ return 0; | |
+ | |
+ return (nf_inet_addr_cmp(&kr->addr, &t1->src.u3) || | |
+ nf_inet_addr_cmp(&kr->addr, &t1->dst.u3) || | |
+ nf_inet_addr_cmp(&kr->addr, &t2->src.u3) || | |
+ nf_inet_addr_cmp(&kr->addr, &t2->dst.u3)); | |
+} | |
+ | |
+static ssize_t ct_file_write(struct file *file, const char __user *buf, | |
+ size_t count, loff_t *ppos) | |
+{ | |
+ struct seq_file *seq = file->private_data; | |
+ struct net *net = seq_file_net(seq); | |
+ struct kill_request kr = { }; | |
+ char req[INET6_ADDRSTRLEN] = { }; | |
+ | |
+ if (count == 0) | |
+ return 0; | |
+ | |
+ if (count >= INET6_ADDRSTRLEN) | |
+ count = INET6_ADDRSTRLEN - 1; | |
+ | |
+ if (copy_from_user(req, buf, count)) | |
+ return -EFAULT; | |
+ | |
+ if (strnchr(req, count, ':')) { | |
+ kr.family = AF_INET6; | |
+ if (!in6_pton(req, count, (void *)&kr.addr, '\n', NULL)) | |
+ return -EINVAL; | |
+ } else if (strnchr(req, count, '.')) { | |
+ kr.family = AF_INET; | |
+ if (!in4_pton(req, count, (void *)&kr.addr, '\n', NULL)) | |
+ return -EINVAL; | |
+ } | |
+ | |
+ nf_ct_iterate_cleanup(net, kill_matching, &kr, 0, 0); | |
+ | |
+ return count; | |
+} | |
+ | |
static const struct file_operations ct_file_ops = { | |
.owner = THIS_MODULE, | |
.open = ct_open, | |
.read = seq_read, | |
+ .write = ct_file_write, | |
.llseek = seq_lseek, | |
.release = seq_release_net, | |
}; | |
@@ -393,7 +450,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net) | |
{ | |
struct proc_dir_entry *pde; | |
- pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); | |
+ pde = proc_create("nf_conntrack", 0660, net->proc_net, &ct_file_ops); | |
if (!pde) | |
goto out_nf_conntrack; | |
--- a/net/netfilter/nf_nat_core.c | |
+++ b/net/netfilter/nf_nat_core.c | |
@@ -90,6 +90,9 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) | |
struct dst_entry *dst; | |
int err; | |
+ if (skb->dev && !dev_net(skb->dev)->xfrm.policy_count[XFRM_POLICY_OUT]) | |
+ return 0; | |
+ | |
err = xfrm_decode_session(skb, &fl, family); | |
if (err < 0) | |
return err; | |
@@ -404,6 +407,13 @@ nf_nat_setup_info(struct nf_conn *ct, | |
get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); | |
+#if IS_ENABLED(CONFIG_NF_NAT_TRY_NEXT_RULE) | |
+ if (curr_tuple.src.u.all != 0 && curr_tuple.dst.u.all != 0 && | |
+ new_tuple.src.u.all != 0 && new_tuple.dst.u.all != 0 && | |
+ nf_nat_used_tuple(&new_tuple, ct)) | |
+ return XT_CONTINUE; | |
+#endif | |
+ | |
if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { | |
struct nf_conntrack_tuple reply; | |
--- a/net/netfilter/nf_nat_ftp.c | |
+++ b/net/netfilter/nf_nat_ftp.c | |
@@ -24,8 +24,39 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); | |
MODULE_DESCRIPTION("ftp NAT helper"); | |
MODULE_ALIAS("ip_nat_ftp"); | |
+static ushort psid = 0; | |
+module_param(psid, ushort, 0644); | |
+MODULE_PARM_DESC(psid, "MAP_E devices's psid"); | |
+ | |
+static uint psid_len = 0; | |
+module_param(psid_len, uint, 0644); | |
+MODULE_PARM_DESC(psid_len, "MAP_E devices's psid length"); | |
+ | |
+static uint offset = 0; | |
+module_param(offset, uint, 0644); | |
+MODULE_PARM_DESC(offset, "MAP_E devices's psid offset"); | |
+ | |
/* FIXME: Time out? --RR */ | |
+/** | |
+ * nf_nat_port_valid_check - check the port is in the range of psid | |
+ * @skb the packets to be translated | |
+ * @port the port to be checked. | |
+ **/ | |
+static int nf_nat_port_valid_check(struct sk_buff *skb, u16 port) | |
+{ | |
+ if (psid == 0 || psid_len == 0 || offset == 0) | |
+ return 1; | |
+ | |
+ if ((psid_len + offset) > 16) | |
+ return 1; | |
+ | |
+ if ((((port >> (16 - psid_len - offset)) & ((1 << psid_len) - 1))) == psid) | |
+ return 1; | |
+ | |
+ return 0; | |
+} | |
+ | |
static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type, | |
char *buffer, size_t buflen, | |
union nf_inet_addr *addr, u16 port) | |
@@ -65,7 +96,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |
struct nf_conntrack_expect *exp) | |
{ | |
union nf_inet_addr newaddr; | |
- u_int16_t port; | |
+ u16 port; | |
int dir = CTINFO2DIR(ctinfo); | |
struct nf_conn *ct = exp->master; | |
char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN]; | |
@@ -82,10 +113,16 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, | |
* this one. */ | |
exp->expectfn = nf_nat_follow_master; | |
- /* Try to get same port: if not, try to change it. */ | |
+ /* In the case of MAP-E, the FTP ALG source port number must use its own | |
+ * PSID. Otherwise the returned packets from ftp server will use other | |
+ * than its own IPv6 address. | |
+ * so let the check hook to validate the port*/ | |
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { | |
int ret; | |
+ if (!nf_nat_port_valid_check(skb, port)) | |
+ continue; | |
+ | |
exp->tuple.dst.u.tcp.port = htons(port); | |
ret = nf_ct_expect_related(exp); | |
if (ret == 0) | |
--- a/net/netfilter/nfnetlink.c | |
+++ b/net/netfilter/nfnetlink.c | |
@@ -326,10 +326,12 @@ replay: | |
nlh = nlmsg_hdr(skb); | |
err = 0; | |
- if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || | |
- skb->len < nlh->nlmsg_len) { | |
- err = -EINVAL; | |
- goto ack; | |
+ if (nlh->nlmsg_len < NLMSG_HDRLEN || | |
+ skb->len < nlh->nlmsg_len || | |
+ nlmsg_len(nlh) < sizeof(struct nfgenmsg)) { | |
+ nfnl_err_reset(&err_list); | |
+ status |= NFNL_BATCH_FAILURE; | |
+ goto done; | |
} | |
/* Only requests are handled by the kernel */ | |
--- a/net/netfilter/nft_hash.c | |
+++ b/net/netfilter/nft_hash.c | |
@@ -192,7 +192,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, | |
u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); | |
int err; | |
- err = rhashtable_walk_init(&priv->ht, &hti); | |
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); | |
iter->err = err; | |
if (err) | |
return; | |
@@ -248,7 +248,7 @@ static void nft_hash_gc(struct work_struct *work) | |
priv = container_of(work, struct nft_hash, gc_work.work); | |
set = nft_set_container_of(priv); | |
- err = rhashtable_walk_init(&priv->ht, &hti); | |
+ err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); | |
if (err) | |
goto schedule; | |
--- a/net/netfilter/x_tables.c | |
+++ b/net/netfilter/x_tables.c | |
@@ -548,7 +548,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, | |
m->u.user.match_size = msize; | |
strlcpy(name, match->name, sizeof(name)); | |
module_put(match->me); | |
- strncpy(m->u.user.name, name, sizeof(m->u.user.name)); | |
+ strlcpy(m->u.user.name, name, sizeof(m->u.user.name)); | |
*size += off; | |
*dstptr += msize; | |
@@ -856,7 +856,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, | |
t->u.user.target_size = tsize; | |
strlcpy(name, target->name, sizeof(name)); | |
module_put(target->me); | |
- strncpy(t->u.user.name, name, sizeof(t->u.user.name)); | |
+ strlcpy(t->u.user.name, name, sizeof(t->u.user.name)); | |
*size += off; | |
*dstptr += tsize; | |
--- a/net/netfilter/xt_DSCP.c | |
+++ b/net/netfilter/xt_DSCP.c | |
@@ -18,6 +18,7 @@ | |
#include <linux/netfilter/x_tables.h> | |
#include <linux/netfilter/xt_DSCP.h> | |
+#include <net/netfilter/nf_conntrack_dscpremark_ext.h> | |
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification"); | |
@@ -32,6 +33,10 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) | |
{ | |
const struct xt_DSCP_info *dinfo = par->targinfo; | |
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; | |
+#ifdef CONFIG_NF_CONNTRACK_DSCPREMARK_EXT | |
+ struct nf_conn *ct; | |
+ enum ip_conntrack_info ctinfo; | |
+#endif | |
if (dscp != dinfo->dscp) { | |
if (!skb_make_writable(skb, sizeof(struct iphdr))) | |
@@ -41,6 +46,13 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) | |
(__force __u8)(~XT_DSCP_MASK), | |
dinfo->dscp << XT_DSCP_SHIFT); | |
+#ifdef CONFIG_NF_CONNTRACK_DSCPREMARK_EXT | |
+ ct = nf_ct_get(skb, &ctinfo); | |
+ if (!ct) | |
+ return XT_CONTINUE; | |
+ | |
+ nf_conntrack_dscpremark_ext_set_dscp_rule_valid(ct); | |
+#endif | |
} | |
return XT_CONTINUE; | |
} | |
@@ -50,7 +62,10 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) | |
{ | |
const struct xt_DSCP_info *dinfo = par->targinfo; | |
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; | |
- | |
+#ifdef CONFIG_NF_CONNTRACK_DSCPREMARK_EXT | |
+ struct nf_conn *ct; | |
+ enum ip_conntrack_info ctinfo; | |
+#endif | |
if (dscp != dinfo->dscp) { | |
if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) | |
return NF_DROP; | |
@@ -58,6 +73,14 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) | |
ipv6_change_dsfield(ipv6_hdr(skb), | |
(__force __u8)(~XT_DSCP_MASK), | |
dinfo->dscp << XT_DSCP_SHIFT); | |
+ | |
+#ifdef CONFIG_NF_CONNTRACK_DSCPREMARK_EXT | |
+ ct = nf_ct_get(skb, &ctinfo); | |
+ if (!ct) | |
+ return XT_CONTINUE; | |
+ | |
+ nf_conntrack_dscpremark_ext_set_dscp_rule_valid(ct); | |
+#endif | |
} | |
return XT_CONTINUE; | |
} | |
new file mode 100644 | |
--- /dev/null | |
+++ b/net/netfilter/xt_id.c | |
@@ -0,0 +1,45 @@ | |
+/* | |
+ * Implements a dummy match to allow attaching IDs to rules | |
+ * | |
+ * 2014-08-01 Jo-Philipp Wich <jow@openwrt.org> | |
+ */ | |
+ | |
+#include <linux/module.h> | |
+#include <linux/skbuff.h> | |
+#include <linux/netfilter/x_tables.h> | |
+#include <linux/netfilter/xt_id.h> | |
+ | |
+MODULE_AUTHOR("Jo-Philipp Wich <jow@openwrt.org>"); | |
+MODULE_DESCRIPTION("Xtables: No-op match which can be tagged with a 32bit ID"); | |
+MODULE_LICENSE("GPL"); | |
+MODULE_ALIAS("ipt_id"); | |
+MODULE_ALIAS("ip6t_id"); | |
+ | |
+static bool | |
+id_mt(const struct sk_buff *skb, struct xt_action_param *par) | |
+{ | |
+ /* We always match */ | |
+ return true; | |
+} | |
+ | |
+static struct xt_match id_mt_reg __read_mostly = { | |
+ .name = "id", | |
+ .revision = 0, | |
+ .family = NFPROTO_UNSPEC, | |
+ .match = id_mt, | |
+ .matchsize = sizeof(struct xt_id_info), | |
+ .me = THIS_MODULE, | |
+}; | |
+ | |
+static int __init id_mt_init(void) | |
+{ | |
+ return xt_register_match(&id_mt_reg); | |
+} | |
+ | |
+static void __exit id_mt_exit(void) | |
+{ | |
+ xt_unregister_match(&id_mt_reg); | |
+} | |
+ | |
+module_init(id_mt_init); | |
+module_exit(id_mt_exit); | |
--- a/net/netfilter/xt_socket.c | |
+++ b/net/netfilter/xt_socket.c | |
@@ -229,7 +229,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, | |
transparent = xt_socket_sk_is_transparent(sk); | |
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && | |
- transparent) | |
+ transparent && sk_fullsock(sk)) | |
pskb->mark = sk->sk_mark; | |
if (sk != skb->sk) | |
@@ -404,7 +404,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) | |
transparent = xt_socket_sk_is_transparent(sk); | |
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && | |
- transparent) | |
+ transparent && sk_fullsock(sk)) | |
pskb->mark = sk->sk_mark; | |
if (sk != skb->sk) | |
--- a/net/netlabel/netlabel_unlabeled.c | |
+++ b/net/netlabel/netlabel_unlabeled.c | |
@@ -787,7 +787,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, | |
{ | |
u32 addr_len; | |
- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { | |
+ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && | |
+ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { | |
addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); | |
if (addr_len != sizeof(struct in_addr) && | |
addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) | |
--- a/net/netlink/Kconfig | |
+++ b/net/netlink/Kconfig | |
@@ -4,6 +4,7 @@ | |
config NETLINK_DIAG | |
tristate "NETLINK: socket monitoring interface" | |
+ select SOCK_DIAG | |
default n | |
---help--- | |
Support for NETLINK socket monitoring interface used by the ss tool. | |
--- a/net/netlink/af_netlink.c | |
+++ b/net/netlink/af_netlink.c | |
@@ -1187,24 +1187,7 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) | |
static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) | |
{ | |
- int delta; | |
- | |
WARN_ON(skb->sk != NULL); | |
- delta = skb->end - skb->tail; | |
- if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) | |
- return skb; | |
- | |
- if (skb_shared(skb)) { | |
- struct sk_buff *nskb = skb_clone(skb, allocation); | |
- if (!nskb) | |
- return skb; | |
- consume_skb(skb); | |
- skb = nskb; | |
- } | |
- | |
- if (!pskb_expand_head(skb, 0, -delta, allocation)) | |
- skb->truesize -= delta; | |
- | |
return skb; | |
} | |
@@ -2362,7 +2345,8 @@ static int netlink_walk_start(struct nl_seq_iter *iter) | |
{ | |
int err; | |
- err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti); | |
+ err = rhashtable_walk_init(&nl_table[iter->link].hash, | |
+ &iter->hti, GFP_KERNEL); | |
if (err) { | |
iter->link = MAX_LINKS; | |
return err; | |
--- a/net/netlink/genetlink.c | |
+++ b/net/netlink/genetlink.c | |
@@ -993,7 +993,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = { | |
static int genl_bind(struct net *net, int group) | |
{ | |
- int i, err = -ENOENT; | |
+ int i, err = 0; | |
down_read(&cb_lock); | |
for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { | |
--- a/net/nfc/hci/core.c | |
+++ b/net/nfc/hci/core.c | |
@@ -209,6 +209,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, | |
} | |
create_info = (struct hci_create_pipe_resp *)skb->data; | |
+ if (create_info->pipe >= NFC_HCI_MAX_PIPES) { | |
+ status = NFC_HCI_ANY_E_NOK; | |
+ goto exit; | |
+ } | |
+ | |
/* Save the new created pipe and bind with local gate, | |
* the description for skb->data[3] is destination gate id | |
* but since we received this cmd from host controller, we | |
@@ -232,6 +237,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, | |
} | |
delete_info = (struct hci_delete_pipe_noti *)skb->data; | |
+ if (delete_info->pipe >= NFC_HCI_MAX_PIPES) { | |
+ status = NFC_HCI_ANY_E_NOK; | |
+ goto exit; | |
+ } | |
+ | |
hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; | |
hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; | |
break; | |
--- a/net/openvswitch/datapath.c | |
+++ b/net/openvswitch/datapath.c | |
@@ -61,6 +61,8 @@ | |
int ovs_net_id __read_mostly; | |
EXPORT_SYMBOL_GPL(ovs_net_id); | |
+static struct ovs_accel_callback *ovs_accel_cb; | |
+ | |
static struct genl_family dp_packet_genl_family; | |
static struct genl_family dp_flow_genl_family; | |
static struct genl_family dp_datapath_genl_family; | |
@@ -252,6 +254,126 @@ void ovs_dp_detach_port(struct vport *p) | |
ovs_vport_del(p); | |
} | |
+/* Notify datapath add event to acceleration callback */ | |
+static void ovs_dp_add_notify(struct datapath *dp, struct vport *vp) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_add) | |
+ ovs_cb->ovs_accel_dp_add((void *)dp, vp->dev); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath delete event to acceleration callback */ | |
+static void ovs_dp_del_notify(struct datapath *dp, struct vport *vp) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_del) | |
+ ovs_cb->ovs_accel_dp_del((void *)dp, vp->dev); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath port add event to acceleration callback */ | |
+static void ovs_dp_port_add_notify(struct datapath *dp, struct vport *vp, | |
+ struct nlattr **a) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ const char *master = NULL; | |
+ | |
+ if (a[OVS_VPORT_ATTR_MASTER]) | |
+ master = nla_data(a[OVS_VPORT_ATTR_MASTER]); | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_port_add) | |
+ ovs_cb->ovs_accel_dp_port_add((void *)dp, (void *)vp, | |
+ vp->port_no, vp->ops->type, | |
+ master, vp->dev); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath port delete event to acceleration callback */ | |
+static void ovs_dp_port_del_notify(struct datapath *dp, struct vport *vp) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_port_del) | |
+ ovs_cb->ovs_accel_dp_port_del((void *)dp, (void *)vp, vp->dev); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath flow add event to acceleration callback */ | |
+static void ovs_dp_flow_add_notify(struct datapath *dp, struct sw_flow *sf) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_flow_add) | |
+ ovs_cb->ovs_accel_dp_flow_add((void *)dp, sf); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath flow delete event to acceleration callback */ | |
+static void ovs_dp_flow_del_notify(struct datapath *dp, struct sw_flow *sf) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_flow_del) | |
+ ovs_cb->ovs_accel_dp_flow_del((void *)dp, sf); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath flow table flush event to acceleration callback */ | |
+static void ovs_dp_flow_tbl_flush_notify(struct datapath *dp) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_flow_tbl_flush) | |
+ ovs_cb->ovs_accel_dp_flow_tbl_flush((void *)dp); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Notify datapath flow set/change event to acceleration callback */ | |
+static void ovs_dp_flow_set_notify(struct datapath *dp, struct sw_flow *sf, | |
+ struct sw_flow_actions *new_sfa) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ rcu_read_lock(); | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_flow_set) | |
+ ovs_cb->ovs_accel_dp_flow_set((void *)dp, sf, new_sfa); | |
+ rcu_read_unlock(); | |
+} | |
+ | |
+/* Forward datapath packet to acceleration callback | |
+ * Must be called with rcu_read_lock. | |
+ */ | |
+static void ovs_dp_pkt_process_notify(struct datapath *dp, struct sk_buff *skb, | |
+ struct sw_flow_key *key, struct sw_flow *sf, | |
+ struct sw_flow_actions *sfa) | |
+{ | |
+ struct ovs_accel_callback *ovs_cb; | |
+ | |
+ WARN_ON(!rcu_read_lock_held()); | |
+ | |
+ ovs_cb = rcu_dereference(ovs_accel_cb); | |
+ if (ovs_cb && ovs_cb->ovs_accel_dp_pkt_process) | |
+ ovs_cb->ovs_accel_dp_pkt_process((void *)dp, skb, key, sf, sfa); | |
+} | |
+ | |
/* Must be called with rcu_read_lock. */ | |
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) | |
{ | |
@@ -265,6 +387,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) | |
stats = this_cpu_ptr(dp->stats_percpu); | |
+ ovs_dp_pkt_process_notify(dp, skb, key, NULL, NULL); | |
+ | |
/* Look up flow. */ | |
flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); | |
if (unlikely(!flow)) { | |
@@ -286,6 +410,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) | |
ovs_flow_stats_update(flow, key->tp.flags, skb); | |
sf_acts = rcu_dereference(flow->sf_acts); | |
+ ovs_dp_pkt_process_notify(dp, skb, key, flow, sf_acts); | |
ovs_execute_actions(dp, skb, sf_acts, key); | |
stats_counter = &stats->n_hit; | |
@@ -992,6 +1117,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) | |
goto err_unlock_ovs; | |
} | |
+ ovs_dp_flow_add_notify(dp, new_flow); | |
if (unlikely(reply)) { | |
error = ovs_flow_cmd_fill_info(new_flow, | |
ovs_header->dp_ifindex, | |
@@ -1156,6 +1282,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) | |
if (likely(acts)) { | |
old_acts = ovsl_dereference(flow->sf_acts); | |
rcu_assign_pointer(flow->sf_acts, acts); | |
+ ovs_dp_flow_set_notify(dp, flow, old_acts); | |
if (unlikely(reply)) { | |
error = ovs_flow_cmd_fill_info(flow, | |
@@ -1292,6 +1419,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) | |
if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { | |
err = ovs_flow_tbl_flush(&dp->table); | |
+ ovs_dp_flow_tbl_flush_notify(dp); | |
goto unlock; | |
} | |
@@ -1304,6 +1432,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) | |
goto unlock; | |
} | |
+ ovs_dp_flow_del_notify(dp, flow); | |
ovs_flow_tbl_remove(&dp->table, flow); | |
ovs_unlock(); | |
@@ -1606,6 +1735,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) | |
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); | |
list_add_tail_rcu(&dp->list_node, &ovs_net->dps); | |
+ ovs_dp_add_notify(dp, vport); | |
ovs_unlock(); | |
ovs_notify(&dp_datapath_genl_family, reply, info); | |
@@ -1642,6 +1772,7 @@ static void __dp_destroy(struct datapath *dp) | |
list_del_rcu(&dp->list_node); | |
+ ovs_dp_del_notify(dp, ovs_vport_ovsl(dp, OVSP_LOCAL)); | |
/* OVSP_LOCAL is datapath internal port. We need to make sure that | |
* all ports in datapath are destroyed first before freeing datapath. | |
*/ | |
@@ -1975,6 +2106,7 @@ restart: | |
goto exit_unlock_free; | |
} | |
+ ovs_dp_port_add_notify(dp, vport, a); | |
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, | |
info->snd_seq, 0, OVS_VPORT_CMD_NEW); | |
BUG_ON(err < 0); | |
@@ -2063,6 +2195,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) | |
goto exit_unlock_free; | |
} | |
+ ovs_dp_port_del_notify(vport->dp, vport); | |
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, | |
info->snd_seq, 0, OVS_VPORT_CMD_DEL); | |
BUG_ON(err < 0); | |
@@ -2297,6 +2430,163 @@ static struct pernet_operations ovs_net_ops = { | |
.size = sizeof(struct ovs_net), | |
}; | |
+/* Register OVS datapath accelerator */ | |
+int ovs_register_accelerator(struct ovs_accel_callback *oac) | |
+{ | |
+ ovs_lock(); | |
+ | |
+ if (unlikely(rcu_access_pointer(ovs_accel_cb))) { | |
+ ovs_unlock(); | |
+ return -EEXIST; | |
+ } | |
+ | |
+ rcu_assign_pointer(ovs_accel_cb, oac); | |
+ ovs_unlock(); | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(ovs_register_accelerator); | |
+ | |
+/* Unregister OVS datapath accelerator */ | |
+void ovs_unregister_accelerator(struct ovs_accel_callback *oac) | |
+{ | |
+ ovs_lock(); | |
+ rcu_assign_pointer(ovs_accel_cb, NULL); | |
+ ovs_unlock(); | |
+} | |
+EXPORT_SYMBOL(ovs_unregister_accelerator); | |
+ | |
+/* Find datapath flow rule using the key*/ | |
+struct sw_flow *ovs_accel_flow_find(void *dp_inst, struct sw_flow_key *key) | |
+{ | |
+ struct datapath *dp = dp_inst; | |
+ struct sw_flow *flow; | |
+ | |
+ rcu_read_lock(); | |
+ flow = ovs_flow_tbl_lookup(&dp->table, key); | |
+ rcu_read_unlock(); | |
+ | |
+ return flow; | |
+} | |
+EXPORT_SYMBOL(ovs_accel_flow_find); | |
+ | |
+/* Update flow rule statistics */ | |
+int ovs_accel_flow_stats_update(void *dp_inst, void *out_vport, | |
+ struct sw_flow_key *key, int pkts, int bytes) | |
+{ | |
+ struct datapath *dp = dp_inst; | |
+ struct flow_stats *stats; | |
+ struct sw_flow *flow; | |
+ struct dp_stats_percpu *dp_stats; | |
+ int node = numa_node_id(); | |
+ u64 *stats_counter; | |
+ u32 n_mask_hit; | |
+ | |
+ rcu_read_lock(); | |
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); | |
+ if (!flow) { | |
+ rcu_read_unlock(); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ /* Update node specific statistics, if memory is not allocated | |
+ * for this node then update in 0 node | |
+ */ | |
+ stats = rcu_dereference(flow->stats[node]); | |
+ if (unlikely(!stats)) | |
+ stats = rcu_dereference(flow->stats[0]); | |
+ | |
+ rcu_read_unlock(); | |
+ | |
+ spin_lock(&stats->lock); | |
+ stats->used = jiffies; | |
+ stats->packet_count += pkts; | |
+ stats->byte_count += bytes; | |
+ | |
+ /* Update datapath statistics, only hit count should be updated here, | |
+ * miss count is taken care by datapath. | |
+ * n_mask_hit and stats_counter are updated per packet, whereas | |
+ * stats_counter will match the number of packets processed in datapath | |
+ * n_mask_hit is updated number of packets times the total masks that | |
+ * are processed. Datapath flows are now accelerated and this API is | |
+ * called to update flow statistics, datpath statistics should use | |
+ * number of packets. | |
+ */ | |
+ dp_stats = this_cpu_ptr(dp->stats_percpu); | |
+ stats_counter = &dp_stats->n_hit; | |
+ | |
+ u64_stats_update_begin(&dp_stats->syncp); | |
+ (*stats_counter) += pkts; | |
+ dp_stats->n_mask_hit += n_mask_hit * pkts; | |
+ u64_stats_update_end(&dp_stats->syncp); | |
+ | |
+ spin_unlock(&stats->lock); | |
+ return 0; | |
+} | |
+EXPORT_SYMBOL(ovs_accel_flow_stats_update); | |
+ | |
+/* Find netdev using vport number */ | |
+struct net_device *ovs_accel_dev_find(void *dp_inst, int vport_no) | |
+{ | |
+ struct datapath *dp = dp_inst; | |
+ struct net_device *dev; | |
+ struct vport *vport; | |
+ | |
+ rcu_read_lock(); | |
+ | |
+ vport = ovs_vport_rcu(dp, vport_no); | |
+ if (!vport) { | |
+ rcu_read_unlock(); | |
+ return NULL; | |
+ } | |
+ | |
+ dev = vport->dev; | |
+ rcu_read_unlock(); | |
+ return dev; | |
+} | |
+EXPORT_SYMBOL(ovs_accel_dev_find); | |
+ | |
+/* Find egress interface using key and skb */ | |
+struct net_device *ovs_accel_egress_dev_find(void *dp_inst, | |
+ struct sw_flow_key *key, | |
+ struct sk_buff *skb) | |
+{ | |
+ struct datapath *dp = dp_inst; | |
+ struct sw_flow *flow; | |
+ struct sw_flow_actions *sf_acts; | |
+ struct net_device *dev; | |
+ const struct nlattr *a; | |
+ int rem; | |
+ | |
+ rcu_read_lock(); | |
+ flow = ovs_accel_flow_find(dp_inst, key); | |
+ if (unlikely(!flow)) | |
+ goto done; | |
+ | |
+ sf_acts = rcu_dereference(flow->sf_acts); | |
+ for (a = sf_acts->actions, rem = sf_acts->actions_len; rem > 0; | |
+ a = nla_next(a, &rem)) { | |
+ struct vport *vport; | |
+ int port_no; | |
+ | |
+ switch (nla_type(a)) { | |
+ case OVS_ACTION_ATTR_OUTPUT: | |
+ port_no = nla_get_u32(a); | |
+ vport = ovs_vport_ovsl_rcu(dp, port_no); | |
+ if (!vport) { | |
+ goto done; | |
+ } | |
+ | |
+ dev = vport->dev; | |
+ rcu_read_unlock(); | |
+ return dev; | |
+ } | |
+ } | |
+done: | |
+ rcu_read_unlock(); | |
+ return NULL; | |
+} | |
+EXPORT_SYMBOL(ovs_accel_egress_dev_find); | |
+ | |
static int __init dp_init(void) | |
{ | |
int err; | |
--- a/net/openvswitch/datapath.h | |
+++ b/net/openvswitch/datapath.h | |
@@ -138,6 +138,37 @@ struct ovs_net { | |
bool xt_label; | |
}; | |
+/** | |
+ * struct ovs_accel_callback - OVS acceleration callbacks | |
+ * @ovs_accel_dp_add - new data path is created | |
+ * @ovs_accel_dp_del - data path is deleted | |
+ * @ovs_accel_dp_port_add - new port is added into data path | |
+ * @ovs_accel_dp_port_del - port is deleted from data path | |
+ * @ovs_accel_dp_flow_add - new flow rule is added in data path | |
+ * @ovs_accel_dp_flow_del - flow rule is deleted from data path | |
+ * @ovs_accel_dp_flow_set - existing flow rule is modified in data path | |
+ * @ovs_accel_dp_flow_tbl_flush - flow table is flushed in data path | |
+ * @ovs_accel_dp_pkt_process - Process data path packet | |
+ */ | |
+struct ovs_accel_callback { | |
+ void (*ovs_accel_dp_add)(void *dp, struct net_device *dev); | |
+ void (*ovs_accel_dp_del)(void *dp, struct net_device *dev); | |
+ void (*ovs_accel_dp_port_add)(void *dp, void *vp, | |
+ int vp_num, enum ovs_vport_type vp_type, | |
+ const char *master, struct net_device *dev); | |
+ void (*ovs_accel_dp_port_del)(void *dp, void *vp, | |
+ struct net_device *dev); | |
+ void (*ovs_accel_dp_flow_add)(void *dp, struct sw_flow *sf); | |
+ void (*ovs_accel_dp_flow_del)(void *dp, struct sw_flow *sf); | |
+ void (*ovs_accel_dp_flow_set)(void *dp, struct sw_flow *sf, | |
+ struct sw_flow_actions *sfa); | |
+ void (*ovs_accel_dp_flow_tbl_flush)(void *dp); | |
+ void (*ovs_accel_dp_pkt_process)(void *dp, struct sk_buff *skb, | |
+ struct sw_flow_key *key, | |
+ struct sw_flow *sf, | |
+ struct sw_flow_actions *sfa); | |
+}; | |
+ | |
extern int ovs_net_id; | |
void ovs_lock(void); | |
void ovs_unlock(void); | |
@@ -204,6 +235,16 @@ void ovs_dp_notify_wq(struct work_struct *work); | |
int action_fifos_init(void); | |
void action_fifos_exit(void); | |
+int ovs_register_accelerator(struct ovs_accel_callback *oac); | |
+void ovs_unregister_accelerator(struct ovs_accel_callback *oac); | |
+int ovs_accel_flow_stats_update(void *dp, void *out_vport, | |
+ struct sw_flow_key *sf, int pkts, int bytes); | |
+struct sw_flow *ovs_accel_flow_find(void *dp, struct sw_flow_key *sfk); | |
+struct net_device *ovs_accel_dev_find(void *dp, int vport_no); | |
+struct net_device *ovs_accel_egress_dev_find(void *dp_inst, | |
+ struct sw_flow_key *key, | |
+ struct sk_buff *skb); | |
+ | |
/* 'KEY' must not have any bits set outside of the 'MASK' */ | |
#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK))) | |
#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK)) | |
--- a/net/packet/Kconfig | |
+++ b/net/packet/Kconfig | |
@@ -18,6 +18,7 @@ config PACKET | |
config PACKET_DIAG | |
tristate "Packet: sockets monitoring interface" | |
depends on PACKET | |
+ select SOCK_DIAG | |
default n | |
---help--- | |
Support for PF_PACKET sockets monitoring interface used by the ss tool. | |
--- a/net/packet/af_packet.c | |
+++ b/net/packet/af_packet.c | |
@@ -1776,6 +1776,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, | |
{ | |
struct sock *sk; | |
struct sockaddr_pkt *spkt; | |
+ struct packet_sock *po; | |
/* | |
* When we registered the protocol we saved the socket in the data | |
@@ -1783,6 +1784,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, | |
*/ | |
sk = pt->af_packet_priv; | |
+ po = pkt_sk(sk); | |
/* | |
* Yank back the headers [hope the device set this | |
@@ -1795,7 +1797,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, | |
* so that this procedure is noop. | |
*/ | |
- if (skb->pkt_type == PACKET_LOOPBACK) | |
+ if (!(po->pkt_type & (1 << skb->pkt_type))) | |
goto out; | |
if (!net_eq(dev_net(dev), sock_net(sk))) | |
@@ -1998,12 +2000,12 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |
int skb_len = skb->len; | |
unsigned int snaplen, res; | |
- if (skb->pkt_type == PACKET_LOOPBACK) | |
- goto drop; | |
- | |
sk = pt->af_packet_priv; | |
po = pkt_sk(sk); | |
+ if (!(po->pkt_type & (1 << skb->pkt_type))) | |
+ goto drop; | |
+ | |
if (!net_eq(dev_net(dev), sock_net(sk))) | |
goto drop; | |
@@ -2123,12 +2125,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |
BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); | |
BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); | |
- if (skb->pkt_type == PACKET_LOOPBACK) | |
- goto drop; | |
- | |
sk = pt->af_packet_priv; | |
po = pkt_sk(sk); | |
+ if (!(po->pkt_type & (1 << skb->pkt_type))) | |
+ goto drop; | |
+ | |
if (!net_eq(dev_net(dev), sock_net(sk))) | |
goto drop; | |
@@ -3115,6 +3117,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |
mutex_init(&po->pg_vec_lock); | |
po->rollover = NULL; | |
po->prot_hook.func = packet_rcv; | |
+ po->pkt_type = PACKET_MASK_ANY & ~(1 << PACKET_LOOPBACK); | |
if (sock->type == SOCK_PACKET) | |
po->prot_hook.func = packet_rcv_spkt; | |
@@ -3737,6 +3740,16 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |
po->xmit = val ? packet_direct_xmit : dev_queue_xmit; | |
return 0; | |
} | |
+ case PACKET_RECV_TYPE: | |
+ { | |
+ unsigned int val; | |
+ if (optlen != sizeof(val)) | |
+ return -EINVAL; | |
+ if (copy_from_user(&val, optval, sizeof(val))) | |
+ return -EFAULT; | |
+ po->pkt_type = val & ~BIT(PACKET_LOOPBACK); | |
+ return 0; | |
+ } | |
default: | |
return -ENOPROTOOPT; | |
} | |
@@ -3789,6 +3802,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |
case PACKET_VNET_HDR: | |
val = po->has_vnet_hdr; | |
break; | |
+ case PACKET_RECV_TYPE: | |
+ if (len > sizeof(unsigned int)) | |
+ len = sizeof(unsigned int); | |
+ val = po->pkt_type; | |
+ | |
+ data = &val; | |
+ break; | |
case PACKET_VERSION: | |
val = po->tp_version; | |
break; | |
--- a/net/packet/internal.h | |
+++ b/net/packet/internal.h | |
@@ -129,6 +129,7 @@ struct packet_sock { | |
struct net_device __rcu *cached_dev; | |
int (*xmit)(struct sk_buff *skb); | |
struct packet_type prot_hook ____cacheline_aligned_in_smp; | |
+ unsigned int pkt_type; | |
}; | |
static struct packet_sock *pkt_sk(struct sock *sk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment