summaryrefslogtreecommitdiff
path: root/release/src/linux/linux/net
diff options
context:
space:
mode:
Diffstat (limited to 'release/src/linux/linux/net')
-rw-r--r--release/src/linux/linux/net/core/skbuff.c16
-rw-r--r--release/src/linux/linux/net/ipv4/arp.c24
-rw-r--r--release/src/linux/linux/net/ipv4/igmp.c5
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/Config.in63
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/Makefile41
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/arp_tables.c13
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c185
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c34
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c150
-rw-r--r--[-rwxr-xr-x]release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c0
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c55
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c8
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c30
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c550
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c7
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c12
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c38
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c265
-rw-r--r--[-rwxr-xr-x]release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c0
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c21
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c3
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ip_tables.c33
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c63
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c82
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c128
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c78
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c65
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c422
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c14
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c110
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_account.c942
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c59
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c256
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c222
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c83
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c57
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c272
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c868
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c101
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c570
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c6
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c62
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c4
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c88
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c998
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_string.c218
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_time.c46
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c211
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/ipt_web.c246
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c1195
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h40
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h5
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c95
-rw-r--r--release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c181
-rw-r--r--release/src/linux/linux/net/ipv4/route.c10
-rw-r--r--release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c12
-rw-r--r--release/src/linux/linux/net/ipv4/tcp_input.c311
-rw-r--r--release/src/linux/linux/net/ipv4/tcp_minisocks.c3
-rw-r--r--release/src/linux/linux/net/ipv4/tcp_output.c20
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/Config.in4
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/Makefile3
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/ip6t_IMQ.c78
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/ip6t_ROUTE.c308
-rw-r--r--release/src/linux/linux/net/ipv6/netfilter/ip6t_condition.c254
-rw-r--r--release/src/linux/linux/net/sched/Config.in3
-rw-r--r--release/src/linux/linux/net/sched/Makefile1
-rw-r--r--release/src/linux/linux/net/sched/sch_api.c3
-rw-r--r--release/src/linux/linux/net/sched/sch_esfq.c652
-rw-r--r--release/src/linux/linux/net/sched/sch_fifo.c15
-rw-r--r--release/src/linux/linux/net/sched/sch_generic.c13
-rw-r--r--release/src/linux/linux/net/sched/sch_hfsc.c1817
-rw-r--r--release/src/linux/linux/net/sched/sch_htb.c255
-rw-r--r--release/src/linux/linux/net/sched/sch_ingress.c4
-rw-r--r--release/src/linux/linux/net/sched/sch_sfq.c8
-rw-r--r--release/src/linux/linux/net/socket.c3
76 files changed, 12035 insertions, 1088 deletions
diff --git a/release/src/linux/linux/net/core/skbuff.c b/release/src/linux/linux/net/core/skbuff.c
index 57d19374..32476545 100644
--- a/release/src/linux/linux/net/core/skbuff.c
+++ b/release/src/linux/linux/net/core/skbuff.c
@@ -201,6 +201,10 @@ struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
/* Set up other state */
skb->len = 0;
skb->cloned = 0;
+#if defined(CONFIG_IMQ) || defined (CONFIG_IMQ_MODULE)
+ skb->imq_flags = 0;
+ skb->nf_info = NULL;
+#endif
skb->data_len = 0;
atomic_set(&skb->users, 1);
@@ -248,6 +252,10 @@ static inline void skb_headerinit(void *p, kmem_cache_t *cache,
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#endif
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+ skb->imq_flags = 0;
+ skb->nf_info = NULL;
+#endif
}
static void skb_drop_fraglist(struct sk_buff *skb)
@@ -397,6 +405,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
#ifdef CONFIG_NET_SCHED
C(tc_index);
#endif
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+ C(imq_flags);
+ C(nf_info);
+#endif
atomic_inc(&(skb_shinfo(skb)->dataref));
skb->cloned = 1;
@@ -440,6 +452,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#endif
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+ new->imq_flags=old->imq_flags;
+ new->nf_info=old->nf_info;
+#endif
}
/**
diff --git a/release/src/linux/linux/net/ipv4/arp.c b/release/src/linux/linux/net/ipv4/arp.c
index aecd020a..e458f7d5 100644
--- a/release/src/linux/linux/net/ipv4/arp.c
+++ b/release/src/linux/linux/net/ipv4/arp.c
@@ -171,8 +171,8 @@ struct neigh_table arp_tbl = {
id: "arp_cache",
parms: {
tbl: &arp_tbl,
- /*zhijian 2006-10-23 modify to solve arp entry timeout problem(cdrouter3.3 scaling module)*/
- #if 0
+/*zhijian 2006-10-23 modify to solve arp entry timeout problem(cdrouter3.3 scaling module)*/
+#if 0
base_reachable_time: 30 * HZ,
retrans_time: 1 * HZ,
gc_staletime: 60 * HZ,
@@ -181,16 +181,16 @@ struct neigh_table arp_tbl = {
queue_len: 3,
ucast_probes: 3,
mcast_probes: 3,
- #else
- base_reachable_time: 60 * HZ,
- retrans_time: 5 * HZ,
- gc_staletime: 120 * HZ,
- reachable_time: 60 * HZ,
- delay_probe_time: 10 * HZ,
- queue_len: 3,
- ucast_probes: 6,
- mcast_probes: 6,
- #endif
+#else
+ base_reachable_time: 60 * HZ,
+ retrans_time: 5 * HZ,
+ gc_staletime: 120 * HZ,
+ reachable_time: 60 * HZ,
+ delay_probe_time: 10 * HZ,
+ queue_len: 3,
+ ucast_probes: 6,
+ mcast_probes: 6,
+#endif
anycast_delay: 1 * HZ,
proxy_delay: (8 * HZ) / 10,
proxy_qlen: 64,
diff --git a/release/src/linux/linux/net/ipv4/igmp.c b/release/src/linux/linux/net/ipv4/igmp.c
index 3f718f2a..c53d8feb 100644
--- a/release/src/linux/linux/net/ipv4/igmp.c
+++ b/release/src/linux/linux/net/ipv4/igmp.c
@@ -677,8 +677,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
count++;
}
err = -ENOBUFS;
- //if (iml == NULL || count >= sysctl_igmp_max_memberships)
- if (iml == NULL || count > sysctl_igmp_max_memberships)// modify for cdrouter v3.3 item 300(cdrouter_mcast_100) bug
+ // if (iml == NULL || count >= sysctl_igmp_max_memberships)
+ // 43011: modify for cdrouter v3.3 item 300(cdrouter_mcast_100) bug
+ if (iml == NULL || count > sysctl_igmp_max_memberships)
goto done;
memcpy(&iml->multi, imr, sizeof(*imr));
iml->next = sk->protinfo.af_inet.mc_list;
diff --git a/release/src/linux/linux/net/ipv4/netfilter/Config.in b/release/src/linux/linux/net/ipv4/netfilter/Config.in
index 7662305e..b1f7f985 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/Config.in
+++ b/release/src/linux/linux/net/ipv4/netfilter/Config.in
@@ -7,16 +7,17 @@ comment ' IP: Netfilter Configuration'
tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK
if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK
- dep_tristate ' TFTP protocol support' CONFIG_IP_NF_TFTP $CONFIG_IP_NF_CONNTRACK
+ bool ' Connection mark tracking support' CONFIG_IP_NF_CONNTRACK_MARK
dep_tristate ' H.323 (netmeeting) support' CONFIG_IP_NF_H323 $CONFIG_IP_NF_CONNTRACK
+ dep_tristate ' TFTP protocol support' CONFIG_IP_NF_TFTP $CONFIG_IP_NF_CONNTRACK
dep_tristate ' IRC protocol support' CONFIG_IP_NF_IRC $CONFIG_IP_NF_CONNTRACK
dep_tristate ' CuSeeMe protocol support' CONFIG_IP_NF_CUSEEME $CONFIG_IP_NF_CONNTRACK
dep_tristate ' Quake III protocol support' CONFIG_IP_NF_QUAKE3 $CONFIG_IP_NF_CONNTRACK
dep_tristate ' RTSP protocol support' CONFIG_IP_NF_RTSP $CONFIG_IP_NF_CONNTRACK
dep_tristate ' MMS protocol support' CONFIG_IP_NF_MMS $CONFIG_IP_NF_CONNTRACK
- dep_tristate ' SIP protocol support' CONFIG_IP_NF_SIP $CONFIG_IP_NF_CONNTRACK
dep_tristate ' GRE protocol support' CONFIG_IP_NF_CT_PROTO_GRE $CONFIG_IP_NF_CONNTRACK
dep_tristate ' PPTP protocol support' CONFIG_IP_NF_PPTP $CONFIG_IP_NF_CT_PROTO_GRE
+ dep_tristate ' SIP protocol support' CONFIG_IP_NF_SIP $CONFIG_IP_NF_CONNTRACK
dep_tristate ' ESP protocol support' CONFIG_IP_NF_CT_PROTO_ESP $CONFIG_IP_NF_CONNTRACK
fi
@@ -27,6 +28,10 @@ tristate 'IP tables support (required for filtering/masq/NAT)' CONFIG_IP_NF_IPTA
if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
# The simple matches.
dep_tristate ' limit match support' CONFIG_IP_NF_MATCH_LIMIT $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' IPP2P match support' CONFIG_IP_NF_MATCH_IPP2P $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' geoip match support' CONFIG_IP_NF_MATCH_GEOIP $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' quota match support' CONFIG_IP_NF_MATCH_QUOTA $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' IP range match support' CONFIG_IP_NF_MATCH_IPRANGE $CONFIG_IP_NF_IPTABLES
dep_tristate ' IP address pool support' CONFIG_IP_NF_POOL $CONFIG_IP_NF_IPTABLES
if [ "$CONFIG_IP_NF_POOL" = "y" -o "$CONFIG_IP_NF_POOL" = "m" ]; then
@@ -39,6 +44,12 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
dep_tristate ' Multiple port match support' CONFIG_IP_NF_MATCH_MULTIPORT $CONFIG_IP_NF_IPTABLES
dep_tristate ' Multiple port with ranges match support' CONFIG_IP_NF_MATCH_MPORT $CONFIG_IP_NF_IPTABLES
dep_tristate ' TOS match support' CONFIG_IP_NF_MATCH_TOS $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' recent match support' CONFIG_IP_NF_MATCH_RECENT $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' account match support' CONFIG_IP_NF_MATCH_ACCOUNT $CONFIG_IP_NF_IPTABLES $CONFIG_PROC_FS
+ if [ "$CONFIG_IP_NF_MATCH_ACCOUNT" != "n" ]; then
+ bool ' account debugging output' CONFIG_IP_NF_MATCH_ACCOUNT_DEBUG
+ fi
+ dep_tristate ' condition match support' CONFIG_IP_NF_MATCH_CONDITION $CONFIG_IP_NF_IPTABLES
dep_tristate ' TIME match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_TIME $CONFIG_IP_NF_IPTABLES
dep_tristate ' ECN match support' CONFIG_IP_NF_MATCH_ECN $CONFIG_IP_NF_IPTABLES
@@ -46,19 +57,36 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
dep_tristate ' AH/ESP match support' CONFIG_IP_NF_MATCH_AH_ESP $CONFIG_IP_NF_IPTABLES
dep_tristate ' LENGTH match support' CONFIG_IP_NF_MATCH_LENGTH $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' U32 match support' CONFIG_IP_NF_MATCH_U32 $CONFIG_IP_NF_U32
dep_tristate ' TTL match support' CONFIG_IP_NF_MATCH_TTL $CONFIG_IP_NF_IPTABLES
dep_tristate ' tcpmss match support' CONFIG_IP_NF_MATCH_TCPMSS $CONFIG_IP_NF_IPTABLES
+
if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
dep_tristate ' Helper match support' CONFIG_IP_NF_MATCH_HELPER $CONFIG_IP_NF_IPTABLES
fi
if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
dep_tristate ' Connection state match support' CONFIG_IP_NF_MATCH_STATE $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' Connections/IP limit match support' CONFIG_IP_NF_MATCH_CONNLIMIT $CONFIG_IP_NF_IPTABLES
+ if [ "$CONFIG_IP_NF_CONNTRACK_MARK" != "n" ]; then
+ dep_tristate ' Connection mark match support' CONFIG_IP_NF_MATCH_CONNMARK $CONFIG_IP_NF_IPTABLES
+ fi
dep_tristate ' Connection tracking match support' CONFIG_IP_NF_MATCH_CONNTRACK $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES
fi
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' String match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_STRING $CONFIG_IP_NF_IPTABLES
dep_tristate ' Webstr match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_WEBSTR $CONFIG_IP_NF_IPTABLES
dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK
+ dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7
+
+ dep_tristate ' web match' CONFIG_IP_NF_MATCH_WEB $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' BCOUNT target' CONFIG_IP_NF_TARGET_BCOUNT $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' bcount match' CONFIG_IP_NF_MATCH_BCOUNT $CONFIG_IP_NF_TARGET_BCOUNT
+ dep_tristate ' MACSAVE target' CONFIG_IP_NF_TARGET_MACSAVE $CONFIG_IP_NF_IPTABLES
+ dep_tristate ' macsave match' CONFIG_IP_NF_MATCH_MACSAVE $CONFIG_IP_NF_TARGET_MACSAVE
+ dep_tristate ' exp match (experimental rig - do not use)' CONFIG_IP_NF_MATCH_EXP $CONFIG_IP_NF_IPTABLES
+
fi
# The targets
dep_tristate ' Packet filtering' CONFIG_IP_NF_FILTER $CONFIG_IP_NF_IPTABLES
@@ -75,8 +103,6 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
define_bool CONFIG_IP_NF_NAT_NEEDED y
dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT
dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT
- dep_tristate ' Automatic port forwarding (autofw) target support' CONFIG_IP_NF_AUTOFW $CONFIG_IP_NF_NAT
- dep_tristate ' TRIGGER target support (port-trigger)' CONFIG_IP_NF_TARGET_TRIGGER $CONFIG_IP_NF_NAT
if [ "$CONFIG_IP_NF_H323" = "m" ]; then
define_tristate CONFIG_IP_NF_NAT_H323 m
else
@@ -84,6 +110,8 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
define_tristate CONFIG_IP_NF_NAT_H323 $CONFIG_IP_NF_NAT
fi
fi
+ dep_tristate ' Automatic port forwarding (autofw) target support' CONFIG_IP_NF_AUTOFW $CONFIG_IP_NF_NAT
+ dep_tristate ' TRIGGER target support (port-trigger)' CONFIG_IP_NF_TARGET_TRIGGER $CONFIG_IP_NF_NAT
if [ "$CONFIG_IP_NF_PPTP" = "m" ]; then
define_tristate CONFIG_IP_NF_NAT_PPTP m
else
@@ -109,13 +137,20 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
dep_tristate ' Basic SNMP-ALG support (EXPERIMENTAL)' CONFIG_IP_NF_NAT_SNMP_BASIC $CONFIG_IP_NF_NAT
fi
+ if [ "$CONFIG_IP_NF_RTSP" = "m" ]; then
+ define_tristate CONFIG_IP_NF_NAT_RTSP m
+ else
+ if [ "$CONFIG_IP_NF_RTSP" = "y" ]; then
+ define_tristate CONFIG_IP_NF_NAT_RTSP $CONFIG_IP_NF_NAT
+ fi
+ fi
if [ "$CONFIG_IP_NF_IRC" = "m" ]; then
define_tristate CONFIG_IP_NF_NAT_IRC m
else
if [ "$CONFIG_IP_NF_IRC" = "y" ]; then
define_tristate CONFIG_IP_NF_NAT_IRC $CONFIG_IP_NF_NAT
fi
- fi
+ fi
if [ "$CONFIG_IP_NF_CUSEEME" = "m" ]; then
define_tristate CONFIG_IP_NF_NAT_CUSEEME m
else
@@ -153,13 +188,6 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
define_tristate CONFIG_IP_NF_NAT_TFTP $CONFIG_IP_NF_NAT
fi
fi
- if [ "$CONFIG_IP_NF_RTSP" = "m" ]; then
- define_tristate CONFIG_IP_NF_NAT_RTSP m
- else
- if [ "$CONFIG_IP_NF_RTSP" = "y" ]; then
- define_tristate CONFIG_IP_NF_NAT_RTSP $CONFIG_IP_NF_NAT
- fi
- fi
if [ "$CONFIG_IP_NF_CT_PROTO_ESP" = "m" ]; then
define_tristate CONFIG_IP_NF_NAT_PROTO_ESP m
else
@@ -178,8 +206,15 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then
dep_tristate ' DSCP target support' CONFIG_IP_NF_TARGET_DSCP $CONFIG_IP_NF_MANGLE
dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE
+ dep_tristate ' ROUTE target support' CONFIG_IP_NF_TARGET_ROUTE $CONFIG_IP_NF_MANGLE
+ dep_tristate ' CLASSIFY target support (EXPERIMENTAL)' CONFIG_IP_NF_TARGET_CLASSIFY $CONFIG_IP_NF_FILTER
+ dep_tristate ' IMQ target support' CONFIG_IP_NF_TARGET_IMQ $CONFIG_IP_NF_MANGLE
fi
dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES
+ if [ "$CONFIG_IP_NF_CONNTRACK_MARK" != "n" ]; then
+ dep_tristate ' CONNMARK target support' CONFIG_IP_NF_TARGET_CONNMARK $CONFIG_IP_NF_IPTABLES
+ fi
+ dep_tristate ' TTL target support' CONFIG_IP_NF_TARGET_TTL $CONFIG_IP_NF_IPTABLES
dep_tristate ' ULOG target support' CONFIG_IP_NF_TARGET_ULOG $CONFIG_IP_NF_IPTABLES
dep_tristate ' TCPMSS target support' CONFIG_IP_NF_TARGET_TCPMSS $CONFIG_IP_NF_IPTABLES
fi
@@ -189,6 +224,10 @@ if [ "$CONFIG_IP_NF_ARPTABLES" != "n" ]; then
dep_tristate ' ARP packet filtering' CONFIG_IP_NF_ARPFILTER $CONFIG_IP_NF_ARPTABLES
fi
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ tristate 'tomato_ct' CONFIG_IP_NF_TOMATOCT
+fi
+
# Backwards compatibility modules: only if you don't build in the others.
if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then
if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then
diff --git a/release/src/linux/linux/net/ipv4/netfilter/Makefile b/release/src/linux/linux/net/ipv4/netfilter/Makefile
index abf55469..80de56f3 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/Makefile
+++ b/release/src/linux/linux/net/ipv4/netfilter/Makefile
@@ -33,10 +33,10 @@ obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
# H.323 support
obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
-obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
-ifdef CONFIG_IP_NF_NAT_H323
+ifdef CONFIG_IP_NF_H323
export-objs += ip_conntrack_h323.o
endif
+obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
# connection tracking protocol helpers
@@ -83,10 +83,14 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
ifdef CONFIG_IP_NF_NAT_IRC
export-objs += ip_conntrack_irc.o
endif
+
+# rtsp protocol support
obj-$(CONFIG_IP_NF_RTSP) += ip_conntrack_rtsp.o
ifdef CONFIG_IP_NF_NAT_RTSP
export-objs += ip_conntrack_rtsp.o
endif
+obj-$(CONFIG_IP_NF_NAT_RTSP) += ip_nat_rtsp.o
+
# NAT helpers
obj-$(CONFIG_IP_NF_NAT_CUSEEME) += ip_nat_cuseeme.o
obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
@@ -95,7 +99,6 @@ obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
obj-$(CONFIG_IP_NF_NAT_QUAKE3) += ip_nat_quake3.o
-obj-$(CONFIG_IP_NF_NAT_RTSP) += ip_nat_rtsp.o
obj-$(CONFIG_IP_NF_NAT_MMS) += ip_nat_mms.o
# generic IP tables
@@ -109,48 +112,64 @@ obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
# matches
obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
+obj-$(CONFIG_IP_NF_MATCH_IPP2P) += ipt_ipp2p.o
+obj-$(CONFIG_IP_NF_MATCH_GEOIP) += ipt_geoip.o
+obj-$(CONFIG_IP_NF_MATCH_QUOTA) += ipt_quota.o
+obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
obj-$(CONFIG_IP_NF_POOL) += ipt_pool.o ip_pool.o
obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
-
obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
-
obj-$(CONFIG_IP_NF_MATCH_MPORT) += ipt_mport.o
-
obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
-
+obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
+obj-$(CONFIG_IP_NF_MATCH_ACCOUNT) += ipt_account.o
+obj-$(CONFIG_IP_NF_MATCH_CONDITION) += ipt_condition.o
obj-$(CONFIG_IP_NF_MATCH_TIME) += ipt_time.o
-
obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
-
obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
-
+obj-$(CONFIG_IP_NF_MATCH_U32) += ipt_u32.o
obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
+obj-$(CONFIG_IP_NF_MATCH_CONNLIMIT) += ipt_connlimit.o
+obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
+obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
obj-$(CONFIG_IP_NF_MATCH_WEBSTR) += ipt_webstr.o
obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
+obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o
+obj-$(CONFIG_IP_NF_MATCH_WEB) += ipt_web.o
+obj-$(CONFIG_IP_NF_MATCH_MACSAVE) += ipt_macsave.o
+obj-$(CONFIG_IP_NF_MATCH_EXP) += ipt_exp.o
+obj-$(CONFIG_IP_NF_MATCH_BCOUNT) += ipt_bcount.o
# targets
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_MIRROR) += ipt_MIRROR.o
+obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
+obj-$(CONFIG_IP_NF_TARGET_IMQ) += ipt_IMQ.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
+obj-$(CONFIG_IP_NF_TARGET_ROUTE) += ipt_ROUTE.o
obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o
+obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
obj-$(CONFIG_IP_NF_AUTOFW) += ip_autofw.o
obj-$(CONFIG_IP_NF_TARGET_TRIGGER) += ipt_TRIGGER.o
+obj-$(CONFIG_IP_NF_TARGET_MACSAVE) += ipt_MACSAVE.o
+obj-$(CONFIG_IP_NF_TARGET_BCOUNT) += ipt_BCOUNT.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -164,6 +183,8 @@ obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o
obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
+obj-$(CONFIG_IP_NF_TOMATOCT) += tomato_ct.o
+
include $(TOPDIR)/Rules.make
ip_conntrack.o: $(ip_conntrack-objs)
diff --git a/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c b/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c
index aa1c034a..757fc2ab 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c
@@ -986,13 +986,12 @@ static int do_add_counters(void *user, unsigned int len)
goto free;
write_lock_bh(&t->lock);
- /*************************************
- * modify by tanghui @ 2006-10-11
- * for a RACE CONDITION in the "do_add_counters()" function
- *************************************/
- //if (t->private->number != paddc->num_counters) {
- if (t->private->number != tmp.num_counters) {
- /*************************************/
+
+#if 0 // removed 1.11 forward bug test
+ // 43011 (09?): checkme: modify by tanghui @ 2006-10-11 for a RACE CONDITION in the "do_add_counters()" function
+ // if (t->private->number != tmp.num_counters) {
+#endif
+ if (t->private->number != paddc->num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c
index 9c6f040f..324951ee 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c
@@ -34,7 +34,12 @@
/* For ERR_PTR(). Yeah, I know... --RR */
#include <linux/fs.h>
-#include <linux/netdevice.h>
+#define TEST_JHASH // test jhash from 2.4.33 -- zzz
+
+#ifdef TEST_JHASH
+#include <linux/jhash.h>
+#include <linux/random.h>
+#endif
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -71,7 +76,7 @@ static kmem_cache_t *ip_conntrack_cachep;
int sysctl_ip_conntrack_tcp_timeouts[10] = {
30 MINS, /* TCP_CONNTRACK_NONE, */
- 5 DAYS, /* TCP_CONNTRACK_ESTABLISHED, */
+ 4 HOURS, /* TCP_CONNTRACK_ESTABLISHED, */ // was 5 days zzz
2 MINS, /* TCP_CONNTRACK_SYN_SENT, */
60 SECS, /* TCP_CONNTRACK_SYN_RECV, */
2 MINS, /* TCP_CONNTRACK_FIN_WAIT, */
@@ -128,9 +133,20 @@ ip_conntrack_put(struct ip_conntrack *ct)
nf_conntrack_put(&ct->infos[0]);
}
+#ifdef TEST_JHASH
+static int ip_conntrack_hash_rnd_initted;
+static unsigned int ip_conntrack_hash_rnd;
+#endif
+
static inline u_int32_t
hash_conntrack(const struct ip_conntrack_tuple *tuple)
{
+#ifdef TEST_JHASH
+ return (jhash_3words(tuple->src.ip,
+ (tuple->dst.ip ^ tuple->dst.protonum),
+ (tuple->src.u.all | (tuple->dst.u.all << 16)),
+ ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+#else
/* ntohl because more differences in low bits. */
/* To ensure that halves of the same connection don't hash
clash, we add the source per-proto again. */
@@ -139,6 +155,7 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple)
+ tuple->dst.protonum)
+ ntohs(tuple->src.u.all))
% ip_conntrack_htable_size;
+#endif
}
inline int
@@ -314,9 +331,6 @@ clean_from_lists(struct ip_conntrack *ct)
{
DEBUGP("clean_from_lists(%p)\n", ct);
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
- /* Remove from both hash lists: must not NULL out next ptrs,
- otherwise we'll look unconfirmed. Fortunately, LIST_DELETE
- doesn't do this. --RR */
LIST_DELETE(&ip_conntrack_hash
[hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)],
&ct->tuplehash[IP_CT_DIR_ORIGINAL]);
@@ -359,6 +373,14 @@ destroy_conntrack(struct nf_conntrack *nfct)
list_del(&ct->master->expected_list);
kfree(ct->master);
}
+
+ #if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE)
+ if(ct->layer7.app_proto)
+ kfree(ct->layer7.app_proto);
+ if(ct->layer7.app_data)
+ kfree(ct->layer7.app_data);
+ #endif
+
WRITE_UNLOCK(&ip_conntrack_lock);
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
@@ -489,6 +511,7 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct)
ct->timeout.expires += jiffies;
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
WRITE_UNLOCK(&ip_conntrack_lock);
return NF_ACCEPT;
}
@@ -606,7 +629,7 @@ icmp_error_track(struct sk_buff *skb,
connection. Too bad: we're in trouble anyway. */
static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
{
- return !(i->ctrack->status & IPS_ASSURED);
+ return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
}
static int early_drop(struct list_head *chain)
@@ -632,31 +655,6 @@ static int early_drop(struct list_head *chain)
return dropped;
}
-/******************lzh add ***************************************
-* DESCRIPTION:delete seleted ip conntrack from conntrack_hash list
-* INPUT : ip_conntrack_tuple_hash h
-* OUTPUT: NULL
-* AUTHOR: linzhihong
-* DATE : 2006.7.20
-*****************************************************************/
-void del_selected_conntrack(struct ip_conntrack_tuple_hash *h)
-{
- DEBUGP("hahaha enter %s\n", __FUNCTION__);
- if(h)
- {
- #if 1
- ip_ct_refresh(h->ctrack, 1*HZ);
- #else
- if(del_timer(&h->ctrack->timeout))
- {
- death_by_timeout((unsigned long)h->ctrack);
- }
- //ip_conntrack_put(h->ctrack);
- #endif
- }
-}
-/**************************** lzh end ******************************/
-
static inline int helper_cmp(const struct ip_conntrack_helper *i,
const struct ip_conntrack_tuple *rtuple)
{
@@ -670,41 +668,6 @@ struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *t
tuple);
}
-#define RESERVE_CONNTRACK_FOR_ROUTER
-#ifdef RESERVE_CONNTRACK_FOR_ROUTER
-#define RESERVE_CONNTRACK_NUM 20
-/*
- Check if the packet is for Router AP(LAN side only), or generate from
- Router itself(Both sides).
- */
-static int cmp_local_ip(u_int32_t dst, u_int32_t src)
-{
-#define IF_LAN_NAME "br0"
-
- int ret = -1;
- struct in_device *in_dev;
- struct net_device *dev;
- struct in_ifaddr **ifap = NULL;
- struct in_ifaddr *ifa = NULL;
-
- for(dev = dev_base; dev != NULL; dev = dev->next){
- if((in_dev=__in_dev_get(dev)) != NULL){
- for(ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next){
- if((ifa->ifa_address == dst && !strcmp(IF_LAN_NAME, ifa->ifa_label)) || ifa->ifa_address == src){
- /*match*/
- ret = 0;
- break;
- }
- }
- }
- }
-
- return ret;
-
-#undef IF_LAN_NAME
-}
-#endif
-
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
static struct ip_conntrack_tuple_hash *
@@ -719,9 +682,15 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
int i;
static unsigned int drop_next = 0;
+#ifdef TEST_JHASH
+ if (!ip_conntrack_hash_rnd_initted) {
+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
+ ip_conntrack_hash_rnd_initted = 1;
+ }
+#endif
+
hash = hash_conntrack(tuple);
- #ifndef RESERVE_CONNTRACK_FOR_ROUTER
if (ip_conntrack_max &&
atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
/* Try dropping from random chain, or else from the
@@ -738,32 +707,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
return ERR_PTR(-ENOMEM);
}
}
- #else
-#define IPV4_BROADCAST_ADDR 0x000000FF
-#define IPV4_MULTICAST_ADDR 0xE0000000
- if (ip_conntrack_max &&
- (ip_conntrack_max - atomic_read(&ip_conntrack_count)) <= RESERVE_CONNTRACK_NUM){
- if((atomic_read(&ip_conntrack_count) < ip_conntrack_max) &&
- (((tuple->dst).ip & IPV4_BROADCAST_ADDR == IPV4_BROADCAST_ADDR) || ((tuple->dst).ip & IPV4_BROADCAST_ADDR == IPV4_MULTICAST_ADDR) || !cmp_local_ip((tuple->dst).ip, (tuple->src).ip))){
- //packet for router(LAN side only) or packet from router, let it go thru
- }
- else{
- /* Try dropping from random chain, or else from the
- chain about to put into (in case they're trying to
- bomb one hash chain). */
- unsigned int next = (drop_next++)%ip_conntrack_htable_size;
-
- if (!early_drop(&ip_conntrack_hash[next])
- && !early_drop(&ip_conntrack_hash[hash])) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "ip_conntrack: table full, dropping"
- " packet.\n");
- return ERR_PTR(-ENOMEM);
- }
- }
- }
- #endif
if (!invert_tuple(&repl_tuple, tuple, protocol)) {
DEBUGP("Can't invert tuple.\n");
@@ -829,9 +772,12 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack, expected);
/* Welcome, Mr. Bond. We've been expecting you... */
IP_NF_ASSERT(master_ct(conntrack));
- conntrack->status = IPS_EXPECTED;
+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
conntrack->master = expected;
expected->sibling = conntrack;
+#if CONFIG_IP_NF_CONNTRACK_MARK
+ conntrack->mark = expected->expectant->mark;
+#endif
LIST_DELETE(&ip_conntrack_expect_list, expected);
INIT_LIST_HEAD(&expected->list);
expected->expectant->expecting--;
@@ -878,11 +824,11 @@ resolve_normal_ct(struct sk_buff *skb,
*set_reply = 1;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
- if (h->ctrack->status & IPS_SEEN_REPLY) {
+ if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
DEBUGP("ip_conntrack_in: normal packet for %p\n",
h->ctrack);
*ctinfo = IP_CT_ESTABLISHED;
- } else if (h->ctrack->status & IPS_EXPECTED) {
+ } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
DEBUGP("ip_conntrack_in: related packet for %p\n",
h->ctrack);
*ctinfo = IP_CT_RELATED;
@@ -1056,16 +1002,15 @@ int ip_conntrack_expect_related(struct ip_conntrack *related_to,
}
if (old) {
- /************************* lzh add ******************************************
- * fix sip alg CDROUTE test fail
- * 2007/3/16
- ***************************************************************************/
- if (old->help.exp_sip_info.nated && (old->help.exp_sip_info.type == CONN_RTP))
- {
- DEBUGP("%s: found old exp and nated, rtp port=%d\n", __FUNCTION__,ntohs(old->tuple.dst.u.udp.port));
- related_to->help.ct_sip_info.rtpport = ntohs(old->tuple.dst.u.udp.port);
+#if 0 // removed 1.11 forward bug test
+ if (1) { // 43011 (09?): checkme
+ // lzh add, fix sip alg CDROUTE test fail, 2007/3/16
+ if (old->help.exp_sip_info.nated && (old->help.exp_sip_info.type == CONN_RTP)) {
+ DEBUGP("%s: found old exp and nated, rtp port=%d\n", __FUNCTION__,ntohs(old->tuple.dst.u.udp.port));
+ related_to->help.ct_sip_info.rtpport = ntohs(old->tuple.dst.u.udp.port);
+ }
}
- /************************ lzh end ******************************************/
+#endif
WRITE_UNLOCK(&ip_conntrack_lock);
return -EEXIST;
}
@@ -1530,6 +1475,7 @@ int __init ip_conntrack_init(void)
unsigned int i;
int ret;
+#if 0
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 256 buckets. >= 1GB machines have 8192 buckets. */
if (hashsize) {
@@ -1544,6 +1490,33 @@ int __init ip_conntrack_init(void)
ip_conntrack_htable_size = 16;
}
ip_conntrack_max = 8 * ip_conntrack_htable_size;
+#else
+/*
+
+ sizeof(list_head) = 8
+ x 4096 = 32K
+
+ sizeof(ip_conntrack) = 368
+ x 2048 = 736K
+
+*/
+
+#ifdef TEST_JHASH
+/*
+ if (hashsize) ip_conntrack_htable_size = hashsize;
+ else ip_conntrack_htable_size = 4096;
+ ip_conntrack_max = 2048;
+*/
+ if (hashsize) ip_conntrack_htable_size = hashsize;
+ else ip_conntrack_htable_size = 8092;
+ ip_conntrack_max = 4096;
+#else
+ if (hashsize) ip_conntrack_htable_size = hashsize;
+ else ip_conntrack_htable_size = 4099;
+ ip_conntrack_max = 2048;
+#endif
+
+#endif
printk("ip_conntrack version %s (%u buckets, %d max)"
" - %d bytes per conntrack\n", IP_CONNTRACK_VERSION,
@@ -1605,3 +1578,5 @@ err_unreg_sockopt:
return -ENOMEM;
}
+
+
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c
index cb0b1da5..c6172945 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c
@@ -104,30 +104,26 @@ static int h245_help(const struct iphdr *iph, size_t len,
exp->seq = ntohl(tcph->seq) + i;
- *((u_int32_t *)data) = ct->tuplehash[!dir].tuple.dst.ip; //!!! Netmeeting fix
-
- {
- unsigned int chksum;
-
- chksum = csum_partial((char *)tcph + tcph->doff*4,
- datalen, 0);
-
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr,
- csum_partial((char *)tcph, tcph->doff*4, chksum));
-
- }
-
+ // 43011 (09?): checkme
+ if (1) {
+ unsigned int chksum;
+
+ *((u_int32_t *)data) = ct->tuplehash[!dir].tuple.dst.ip; //!!! Netmeeting fix
+ chksum = csum_partial((char *)tcph + tcph->doff*4, datalen, 0);
+ tcph->check = 0;
+ tcph->check = tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr,
+ csum_partial((char *)tcph, tcph->doff*4, chksum));
+ }
exp->tuple = ((struct ip_conntrack_tuple)
{ { ct->tuplehash[!dir].tuple.src.ip,
{ 0 } },
{ data_ip,
- { data_port },
+ { .tcp = { data_port } },
IPPROTO_UDP }});
exp->mask = ((struct ip_conntrack_tuple)
{ { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }});
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = NULL;
@@ -252,11 +248,11 @@ static int h225_help(const struct iphdr *iph, size_t len,
{ { ct->tuplehash[!dir].tuple.src.ip,
{ 0 } },
{ data_ip,
- { data_port },
+ { .tcp = { data_port } },
IPPROTO_TCP }});
exp->mask = ((struct ip_conntrack_tuple)
{ { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }});
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
exp->expectfn = h225_expect;
@@ -317,9 +313,7 @@ static void __exit fini(void)
ip_conntrack_helper_unregister(&h225);
}
-#ifdef CONFIG_IP_NF_NAT_NEEDED
EXPORT_SYMBOL(ip_h323_lock);
-#endif
module_init(init);
module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c
index 17cf2b7b..a0b41051 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c
@@ -1,5 +1,5 @@
/*
- * ip_conntrack_pptp.c - Version 1.11
+ * ip_conntrack_pptp.c - Version 1.9
*
* Connection tracking support for PPTP (Point to Point Tunneling Protocol).
* PPTP is a a protocol for creating virtual private networks.
@@ -9,7 +9,7 @@
* GRE is defined in RFC 1701 and RFC 1702. Documentation of
* PPTP can be found in RFC 2637
*
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>,
+ * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
@@ -21,6 +21,18 @@
* TODO: - finish support for multiple calls within one session
* (needs expect reservations in newnat)
* - testing of incoming PPTP calls
+ *
+ * Changes:
+ * 2002-02-05 - Version 1.3
+ * - Call ip_conntrack_unexpect_related() from
+ * pptp_timeout_related() to destroy expectations in case
+ * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
+ * (Philip Craig <philipc@snapgear.com>)
+ * - Add Version information at module loadtime
+ * 2002-02-10 - Version 1.6
+ * - move to C99 style initializers
+ * - remove second expectation if first arrives
+ *
*/
#include <linux/config.h>
@@ -35,13 +47,21 @@
#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+#define IP_CT_PPTP_VERSION "1.9"
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
DECLARE_LOCK(ip_pptp_lock);
+#if 0
+#include "ip_conntrack_pptp_priv.h"
+#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
+ ": " format, ## args)
+#else
#define DEBUGP(format, args...)
+#endif
#define SECS *HZ
#define MINS * 60 SECS
@@ -53,8 +73,8 @@ DECLARE_LOCK(ip_pptp_lock);
static int pptp_expectfn(struct ip_conntrack *ct)
{
- struct ip_conntrack_expect *exp, *other_exp;
struct ip_conntrack *master;
+ struct ip_conntrack_expect *exp;
DEBUGP("increasing timeouts\n");
/* increase timeout of GRE data channel conntrack entry */
@@ -67,6 +87,12 @@ static int pptp_expectfn(struct ip_conntrack *ct)
return 0;
}
+ exp = ct->master;
+ if (!exp) {
+ DEBUGP("no expectation!!\n");
+ return 0;
+ }
+
DEBUGP("completing tuples with ct info\n");
/* we can do this, since we're unconfirmed */
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
@@ -83,6 +109,26 @@ static int pptp_expectfn(struct ip_conntrack *ct)
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
htonl(master->help.ct_pptp_info.pac_call_id);
}
+
+ /* delete other expectation */
+ if (exp->expected_list.next != &exp->expected_list) {
+ struct ip_conntrack_expect *other_exp;
+ struct list_head *cur_item, *next;
+
+ for (cur_item = master->sibling_list.next;
+ cur_item != &master->sibling_list; cur_item = next) {
+ next = cur_item->next;
+ other_exp = list_entry(cur_item,
+ struct ip_conntrack_expect,
+ expected_list);
+ /* remove only if occurred at same sequence number */
+ if (other_exp != exp && other_exp->seq == exp->seq) {
+ DEBUGP("unexpecting other direction\n");
+ ip_ct_gre_keymap_destroy(other_exp);
+ ip_conntrack_unexpect_related(other_exp);
+ }
+ }
+ }
return 0;
}
@@ -90,15 +136,21 @@ static int pptp_expectfn(struct ip_conntrack *ct)
/* timeout GRE data connections */
static int pptp_timeout_related(struct ip_conntrack *ct)
{
- struct list_head *cur_item;
+ struct list_head *cur_item, *next;
struct ip_conntrack_expect *exp;
- list_for_each(cur_item, &ct->sibling_list) {
+ /* FIXME: do we have to lock something ? */
+ for (cur_item = ct->sibling_list.next;
+ cur_item != &ct->sibling_list; cur_item = next) {
+ next = cur_item->next;
exp = list_entry(cur_item, struct ip_conntrack_expect,
expected_list);
- if (!exp->sibling)
+ ip_ct_gre_keymap_destroy(exp);
+ if (!exp->sibling) {
+ ip_conntrack_unexpect_related(exp);
continue;
+ }
DEBUGP("setting timeout of conntrack %p to 0\n",
exp->sibling);
@@ -110,7 +162,7 @@ static int pptp_timeout_related(struct ip_conntrack *ct)
return 0;
}
-/* expect GRE connection in PNS->PAC direction */
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
static inline int
exp_gre(struct ip_conntrack *master,
u_int32_t seq,
@@ -121,7 +173,7 @@ exp_gre(struct ip_conntrack *master,
struct ip_conntrack_tuple inv_tuple;
memset(&exp, 0, sizeof(exp));
- /* tuple in original direction, PAC->PNS */
+ /* tuple in original direction, PNS->PAC */
exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
exp.tuple.src.u.gre.key = htonl(ntohs(peer_callid));
exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
@@ -149,11 +201,43 @@ exp_gre(struct ip_conntrack *master,
DUMP_TUPLE_RAW(&exp.tuple);
/* Add GRE keymap entries */
+ if (ip_ct_gre_keymap_add(&exp, &exp.tuple, 0) != 0)
+ return 1;
+
+ invert_tuplepr(&inv_tuple, &exp.tuple);
+ if (ip_ct_gre_keymap_add(&exp, &inv_tuple, 1) != 0) {
+ ip_ct_gre_keymap_destroy(&exp);
+ return 1;
+ }
+
+ if (ip_conntrack_expect_related(master, &exp) != 0) {
+ ip_ct_gre_keymap_destroy(&exp);
+ DEBUGP("cannot expect_related()\n");
+ return 1;
+ }
+
+ /* tuple in reply direction, PAC->PNS */
+ exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ exp.tuple.src.u.gre.key = htonl(ntohs(callid));
+ exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ exp.tuple.dst.u.gre.key = htonl(ntohs(peer_callid));
+
+ DEBUGP("calling expect_related ");
+ DUMP_TUPLE_RAW(&exp.tuple);
+
+ /* Add GRE keymap entries */
ip_ct_gre_keymap_add(&exp, &exp.tuple, 0);
invert_tuplepr(&inv_tuple, &exp.tuple);
ip_ct_gre_keymap_add(&exp, &inv_tuple, 1);
+ /* FIXME: cannot handle error correctly, since we need to free
+ * the above keymap :( */
- ip_conntrack_expect_related(master, &exp);
+ if (ip_conntrack_expect_related(master, &exp) != 0) {
+ /* free the second pair of keypmaps */
+ ip_ct_gre_keymap_destroy(&exp);
+ DEBUGP("cannot expect_related():\n");
+ return 1;
+ }
return 0;
}
@@ -240,7 +324,8 @@ pptp_inbound_pkt(struct tcphdr *tcph,
info->cstate = PPTP_CALL_OUT_CONF;
seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph);
- exp_gre(ct, seq, *cid, *pcid);
+ if (exp_gre(ct, seq, *cid, *pcid) != 0)
+ printk("ip_conntrack_pptp: error during exp_gre\n");
break;
case PPTP_IN_CALL_REQUEST:
@@ -282,7 +367,8 @@ pptp_inbound_pkt(struct tcphdr *tcph,
/* we expect a GRE connection from PAC to PNS */
seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph);
- exp_gre(ct, seq, *cid, *pcid);
+ if (exp_gre(ct, seq, *cid, *pcid) != 0)
+ printk("ip_conntrack_pptp: error during exp_gre\n");
break;
@@ -294,7 +380,6 @@ pptp_inbound_pkt(struct tcphdr *tcph,
/* untrack this call id, unexpect GRE packets */
pptp_timeout_related(ct);
- /* NEWNAT: look up exp for call id and unexpct_related */
break;
case PPTP_WAN_ERROR_NOTIFY:
@@ -446,7 +531,8 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len,
if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr,
csum_partial((char *) tcph, tcplen, 0))) {
printk(KERN_NOTICE __FILE__ ": bad csum\n");
-// return NF_ACCEPT;
+ /* W2K PPTP server sends TCP packets with wrong checksum :(( */
+ //return NF_ACCEPT;
}
if (tcph->fin || tcph->rst) {
@@ -456,8 +542,6 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len,
/* untrack this call id, unexpect GRE packets */
pptp_timeout_related(ct);
- /* no need to call unexpect_related since master conn
- * dies anyway */
}
@@ -482,6 +566,8 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len,
LOCK_BH(&ip_pptp_lock);
+ /* FIXME: We just blindly assume that the control connection is always
+ * established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
ret = pptp_outbound_pkt(tcph, pptph, datalen, ct, ctinfo);
@@ -497,13 +583,31 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len,
/* control protocol helper */
static struct ip_conntrack_helper pptp = {
- { NULL, NULL },
- "pptp", IP_CT_HELPER_F_REUSE_EXPECT, THIS_MODULE, 2, 0,
- { { 0, { tcp: { port: __constant_htons(PPTP_CONTROL_PORT) } } },
- { 0, { 0 }, IPPROTO_TCP } },
- { { 0, { tcp: { port: 0xffff } } },
- { 0, { 0 }, 0xffff } },
- conntrack_pptp_help };
+ .list = { NULL, NULL },
+ .name = "pptp",
+ .flags = IP_CT_HELPER_F_REUSE_EXPECT,
+ .me = THIS_MODULE,
+ .max_expected = 2,
+ .timeout = 0,
+ .tuple = { .src = { .ip = 0,
+ .u = { .tcp = { .port =
+ __constant_htons(PPTP_CONTROL_PORT) } }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = IPPROTO_TCP
+ }
+ },
+ .mask = { .src = { .ip = 0,
+ .u = { .tcp = { .port = 0xffff } }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = 0xffff
+ }
+ },
+ .help = conntrack_pptp_help
+};
/* ip_conntrack_pptp initialization */
static int __init init(void)
@@ -517,12 +621,14 @@ static int __init init(void)
return -EIO;
}
+ printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
return 0;
}
static void __exit fini(void)
{
ip_conntrack_helper_unregister(&pptp);
+ printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
}
module_init(init);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c
index bcb8d892..bcb8d892 100755..100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 4ff06dcb..55b3ecea 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -1,5 +1,5 @@
/*
- * ip_conntrack_proto_gre.c - Version 1.11
+ * ip_conntrack_proto_gre.c - Version 1.2
*
* Connection tracking protocol helper module for GRE.
*
@@ -17,7 +17,7 @@
*
* Documentation about PPTP can be found in RFC 2637
*
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
@@ -54,8 +54,18 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
#define GRE_TIMEOUT (30*HZ)
#define GRE_STREAM_TIMEOUT (180*HZ)
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
+ ": " format, ## args)
+#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x:%u:0x%x\n", \
+ NIPQUAD((x)->src.ip), ntohl((x)->src.u.gre.key), \
+ NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.gre.key), \
+ (x)->dst.u.gre.version, \
+ ntohs((x)->dst.u.gre.protocol))
+#else
#define DEBUGP(x, args...)
#define DUMP_TUPLE_GRE(x)
+#endif
/* GRE KEYMAP HANDLING FUNCTIONS */
static LIST_HEAD(gre_keymap_list);
@@ -103,7 +113,6 @@ int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp,
memset(km, 0, sizeof(*km));
memcpy(&km->tuple, t, sizeof(*t));
- km->master = exp;
if (!reply)
exp->proto.gre.keymap_orig = km;
@@ -132,6 +141,26 @@ void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km,
WRITE_UNLOCK(&ip_ct_gre_lock);
}
+/* destroy the keymap entries associated with specified expect */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp)
+{
+ DEBUGP("entering for exp %p\n", exp);
+ WRITE_LOCK(&ip_ct_gre_lock);
+ if (exp->proto.gre.keymap_orig) {
+ DEBUGP("removing %p from list\n", exp->proto.gre.keymap_orig);
+ list_del(&exp->proto.gre.keymap_orig->list);
+ kfree(exp->proto.gre.keymap_orig);
+ exp->proto.gre.keymap_orig = NULL;
+ }
+ if (exp->proto.gre.keymap_reply) {
+ DEBUGP("removing %p from list\n", exp->proto.gre.keymap_reply);
+ list_del(&exp->proto.gre.keymap_reply->list);
+ kfree(exp->proto.gre.keymap_reply);
+ exp->proto.gre.keymap_reply = NULL;
+ }
+ WRITE_UNLOCK(&ip_ct_gre_lock);
+}
+
/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
@@ -186,6 +215,10 @@ static int gre_pkt_to_tuple(const void *datah, size_t datalen,
srckey = gre_keymap_lookup(tuple);
+#if 0
+ DEBUGP("found src key %x for tuple ", ntohl(srckey));
+ DUMP_TUPLE_GRE(tuple);
+#endif
tuple->src.u.gre.key = srckey;
return 1;
@@ -256,18 +289,7 @@ static void gre_destroy(struct ip_conntrack *ct)
return;
}
- WRITE_LOCK(&ip_ct_gre_lock);
- if (master->proto.gre.keymap_orig) {
- DEBUGP("removing %p from list\n", master->proto.gre.keymap_orig);
- list_del(&master->proto.gre.keymap_orig->list);
- kfree(master->proto.gre.keymap_orig);
- }
- if (master->proto.gre.keymap_reply) {
- DEBUGP("removing %p from list\n", master->proto.gre.keymap_reply);
- list_del(&master->proto.gre.keymap_reply->list);
- kfree(master->proto.gre.keymap_reply);
- }
- WRITE_UNLOCK(&ip_ct_gre_lock);
+ ip_ct_gre_keymap_destroy(master);
}
/* protocol helper struct */
@@ -304,7 +326,7 @@ static void __exit fini(void)
/* delete all keymap entries */
WRITE_LOCK(&ip_ct_gre_lock);
list_for_each_safe(pos, n, &gre_keymap_list) {
- DEBUGP("deleting keymap %p\n", pos);
+ DEBUGP("deleting keymap %p at module unload time\n", pos);
list_del(pos);
kfree(pos);
}
@@ -315,6 +337,7 @@ static void __exit fini(void)
EXPORT_SYMBOL(ip_ct_gre_keymap_add);
EXPORT_SYMBOL(ip_ct_gre_keymap_change);
+EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
module_init(init);
module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 02f20742..ebb9b493 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -173,7 +173,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
have an established connection: this is a fairly common
problem case, so we can delete the conntrack
immediately. --RR */
- if (!(conntrack->status & IPS_SEEN_REPLY) && tcph->rst) {
+ if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph->rst) {
WRITE_UNLOCK(&tcp_lock);
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)conntrack);
@@ -211,12 +211,6 @@ static int tcp_new(struct ip_conntrack *conntrack,
return 0;
}
- if (tcph->syn && tcph->ack)
- {
- DEBUGP("ip_conntrack_tcp: invalid new deleting.\n");
- return 0;
- }
-
conntrack->proto.tcp.state = newconntrack;
return 1;
}
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 49ac62c7..af660a27 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -7,25 +7,13 @@
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/ip_conntrack_udp.h>
-unsigned long ip_ct_udp_isakmp_timeout = (300*HZ);
-
static int udp_pkt_to_tuple(const void *datah, size_t datalen,
struct ip_conntrack_tuple *tuple)
{
const struct udphdr *hdr = datah;
- struct isakmp_hdr *isakmp_h = (void *)hdr + 8;
tuple->src.u.udp.port = hdr->source;
tuple->dst.u.udp.port = hdr->dest;
- if(ntohs(hdr->source) == 500 && ntohs(hdr->dest) == 500)
- {
- if(NULL == isakmp_h)
- tuple->dst.u.udp.init_cookie = 0;
- else
- tuple->dst.u.udp.init_cookie = (unsigned int)(isakmp_h->init_cookie[0]);
- }
- else
- tuple->dst.u.udp.init_cookie = 0;
return 1;
}
@@ -35,7 +23,6 @@ static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
{
tuple->src.u.udp.port = orig->dst.u.udp.port;
tuple->dst.u.udp.port = orig->src.u.udp.port;
- tuple->dst.u.udp.init_cookie = orig->dst.u.udp.init_cookie;
return 1;
}
@@ -60,21 +47,16 @@ static int udp_packet(struct ip_conntrack *conntrack,
struct iphdr *iph, size_t len,
enum ip_conntrack_info conntrackinfo)
{
- u_int16_t *portptr;
- portptr = &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
- if (conntrack->status & IPS_SEEN_REPLY)
- {
- if(ntohs(*portptr) == 500)
- ip_ct_refresh(conntrack, ip_ct_udp_isakmp_timeout);
- else
- ip_ct_refresh(conntrack, sysctl_ip_conntrack_udp_timeouts[UDP_STREAM_TIMEOUT]);
+ if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
+ ip_ct_refresh(conntrack,
+ sysctl_ip_conntrack_udp_timeouts[UDP_STREAM_TIMEOUT]);
/* Also, more likely to be important, and not a probe */
set_bit(IPS_ASSURED_BIT, &conntrack->status);
- }
- else
- ip_ct_refresh(conntrack, sysctl_ip_conntrack_udp_timeouts[UDP_TIMEOUT]);
+ } else
+ ip_ct_refresh(conntrack,
+ sysctl_ip_conntrack_udp_timeouts[UDP_TIMEOUT]);
return NF_ACCEPT;
}
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c
index c7e31931..78c3062c 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -32,503 +32,6 @@
struct module *ip_conntrack_module = THIS_MODULE;
MODULE_LICENSE("GPL");
-#define CLEAR_IP_CONNTRACK
-#define DEL_IP_CONNTRACK_ENTRY 1
-#ifdef DEL_IP_CONNTRACK_ENTRY
-/*
- *
- *This part of code add for delete an entry in ip_conntrack table.
- *
- */
-
-
-#define DEL_LIST_PATH "/tmp/.del_ip_conntrack"
-#define printkerrline() printk("del_ip_conntrack error : %s %s %d\n", __FILE__, __func__, __LINE__)
-
-struct del_list
-{
- unsigned short proto;
- unsigned int begin_port;
- unsigned int end_port;
- unsigned int ip;
- struct del_list *next;
-};
-
-void free_del_list(struct del_list *head);
-void print_del_list(struct del_list *head);
-static struct del_list * malloc_new_node(const char *buf, struct del_list * head);
-struct del_list * init_del_list(const char *buf, size_t size);
-static int read_del_file(char * buf, unsigned int size, char *path);
-static int del_match_method(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * pList);
-static int del_conntrack_check(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * head);
-void pf_del_ip_conntrack(void);
-static int proc_read_del_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context);
-static int proc_write_del_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data);
-static int end_proc_read(const char *p, char *page, off_t off, int count, char **start, int *eof);
-
-void pf_del_ip_conntrack(void)
-{
-#define MAX_BUF_SIZE 1024*2
- int i;
- char buf[MAX_BUF_SIZE];
- struct del_list * del_head = NULL;
- struct list_head *head, *temp_head;
- struct ip_conntrack_tuple_hash *tuple_hash;
-
- //printk("pf_del_ip_conntrack---------------------------------------1\n");
- memset(buf, 0, MAX_BUF_SIZE);
-
- if(read_del_file(buf, MAX_BUF_SIZE, DEL_LIST_PATH) == -1)
- {
- goto final_return;
- }
-
- buf[MAX_BUF_SIZE - 1] = '\0';
- del_head = init_del_list(buf, MAX_BUF_SIZE - 1);
- //print_del_list(del_head);
- READ_LOCK(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- {
- head = &ip_conntrack_hash[i];
- temp_head = head;
- while(1)
- {
- temp_head = temp_head->next;
- if(temp_head == head)
- {
- head = NULL;
- temp_head = NULL;
- break;
- }
- tuple_hash = (struct ip_conntrack_tuple_hash *)temp_head;
- if(del_conntrack_check(tuple_hash, del_head) == 1)
- {
- del_selected_conntrack(tuple_hash);
- }
- }
- }
- READ_UNLOCK(&ip_conntrack_lock);
- free_del_list(del_head);
-
-final_return:
-
- //printk("pf_del_ip_conntrack---------------------------------------2\n");
- return;
-#undef MAX_BUF_SIZE
-}
-
-static int del_conntrack_check(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * head)
-{
- int ret;
- const struct del_list * p;
-
- ret = 0;
-
- if(pConn == NULL || head == NULL)
- {
- ret = -1;
- goto final_return;
- }
-
- for(p = head; p; p = p->next)
- {
- if(del_match_method(pConn, p) == 1)
- {
- //Match,jump out
- ret = 1;
- break;
- }
- }
-
-final_return:
- return ret;
-}
-
-static int del_match_method(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * pList)
-{
- int ret;
- typedef enum
- {
- TCP_PROTO = 0x06,
- UDP_PROTO = 0x11,
- }proto_type;
- proto_type pt[2] = {TCP_PROTO, UDP_PROTO};
-
- ret = 0;
- //Check tcp and udp only
- if(pConn->tuple.dst.protonum == TCP_PROTO || pConn->tuple.dst.protonum == UDP_PROTO)
- {
- //Check proto match
- if((pList->proto == 3) ||
- ((pList->proto == 0 || pList->proto == 1) && (pConn->tuple.dst.protonum == pt[pList->proto])))
- {
- //Chcek ip address match
- if(pConn->ctrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == pList->ip)
- {
- //Check port match
- unsigned int tport;
- if(pConn->tuple.dst.protonum == TCP_PROTO)
- {
- //TCP
- tport = pConn->ctrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
- }
- else
- {
- //UDP
- tport = pConn->ctrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.udp.port;
- }
- tport = htons(tport);
- if(tport >= pList->begin_port && tport <= pList->end_port)
- {
- ret = 1;
- }
- }
- }
- }
- return ret;
-}
-
-static int read_del_file(char * buf, unsigned int size, char *path)
-{
- int retval, orgfsuid, orgfsgid;
- mm_segment_t orgfs;
- struct file *srcf;
-
- // Save uid and gid used for filesystem access.
- // Set user and group to 0 (root)
- orgfsuid = current->fsuid;
- orgfsgid = current->fsgid;
- current->fsuid=current->fsgid = 0;
- orgfs = get_fs();
- set_fs(KERNEL_DS);
-
- if(path && *path)
- {
- srcf = filp_open(path, O_RDONLY, 0);
- if(IS_ERR(srcf))
- {
- printkerrline();
- retval = -1;
- goto final_return;
- }
- else
- {
- if(srcf->f_op && srcf->f_op->read)
- {
- memset(buf, 0x00, size);
- retval=srcf->f_op->read(srcf, buf, size, &srcf->f_pos);
- if(retval < 0)
- {
- printkerrline();
- retval = -1;
- goto final_return;
- }
- else
- {
- //Success,go!
- retval = 0;
- goto final_return;
- }
- }
- else
- {
- printkerrline();
- retval = -1;
- goto final_return;
- }
- }
- }
- else
- {
- printkerrline();
- retval = -1;
- goto final_return;
- }
-
-final_return:
- if(!IS_ERR(srcf))
- {
- retval=filp_close(srcf,NULL);
- if(retval)
- {
- printkerrline();
- retval = -1;
- }
- }
- set_fs(orgfs);
- current->fsuid = orgfsuid;
- current->fsgid = orgfsgid;
-
- return retval;
-}
-
-struct del_list * init_del_list(const char *buf, size_t size)
-{
-#define LINE_FEED "\n"
-#define TMP_BUF_SIZE 100
- const char *begin, *end;
- char tmpbuf[TMP_BUF_SIZE];
- struct del_list * head = NULL, *tmp_p;
-
- if(buf == NULL || size <= 0 || buf[size] != '\0')
- {
- head = NULL;
- goto final_return;
- }
-
- for(begin = end = buf; begin && (begin - buf < size); begin = end + strlen(LINE_FEED))
- {
- end = strstr(begin, LINE_FEED);
- if(end)
- {
- if((end - begin) > (TMP_BUF_SIZE - 1))
- {
- //Too large,go on
- continue;
- }
- else
- {
- memcpy(tmpbuf, begin, end - begin);
- tmpbuf[end - begin] = '\0';
- //printk("obtain string : %s\n", tmpbuf);
- if((tmp_p = malloc_new_node(tmpbuf, head)) == NULL)
- {
- //Invalid format or malloc fail,go on
- continue;
- }
- else
- {
- head = tmp_p;
- }
- }
- }
- else
- {
- //printk("Last string : %s\n", begin);
- if((tmp_p = malloc_new_node(begin, head)) == NULL)
- {
- //Invalid format or malloc fail,jump out
- break;
- }
- else
- {
- head = tmp_p;
- }
- }
- }
-
-final_return:
- return head;
-
-#undef TMP_BUF_SIZE
-#undef LINE_FEED
-}
-
-static struct del_list * malloc_new_node(const char *buf, struct del_list * head)
-{
-#define SSCANF_MATCH_NUM 7
- int i, j, k, c1, c2, c3, c4;
- struct del_list *p = NULL;
-
- if(sscanf(buf, "%d %d.%d.%d.%d %d-%d", &i, &c4, &c3, &c2, &c1, &j, &k) != SSCANF_MATCH_NUM)
- {
- p = NULL;
- goto final_return;
- }
- else
- {
- if(p = (struct del_list *)kmalloc(sizeof(struct del_list), GFP_ATOMIC))
- {
- p->proto = i;
- #if 0
- //Big endian
- ((char *)&(p->ip))[0] = (char)c1;
- ((char *)&(p->ip))[1] = (char)c2;
- ((char *)&(p->ip))[2] = (char)c3;
- ((char *)&(p->ip))[3] = (char)c4;
- #else
- //Little endian
- ((char *)&(p->ip))[3] = (char)c1;
- ((char *)&(p->ip))[2] = (char)c2;
- ((char *)&(p->ip))[1] = (char)c3;
- ((char *)&(p->ip))[0] = (char)c4;
- #endif
- p->begin_port = j;
- p->end_port = k;
- p->next = head;
- }
- else
- {
- p = NULL;
- goto final_return;
- }
- }
-
-final_return:
- return p;
-#undef SSCANF_MATCH_NUM
-}
-
-void print_del_list(struct del_list *head)
-{
- int i;
- struct del_list *tmp_p;
-
- for(i = 1, tmp_p = head; tmp_p; tmp_p = tmp_p->next, i++)
- {
- printk("Node(%d): proto=%d | ip=%0x | port=[%d-%d]\n", i, tmp_p->proto, tmp_p->ip, tmp_p->begin_port, tmp_p->end_port);
- }
-}
-
-void free_del_list(struct del_list *head)
-{
- int i;
- struct del_list *tmp_p;
-
- if(head == NULL)
- {
- goto final_return;
- }
- for(i = 1, tmp_p = head; head; head = tmp_p, i++)
- {
- tmp_p = head->next;
- //printk("Free@Node(%d):proto=%d | ip=%0x | port=[%d-%d]\n", i, head->proto, head->ip, head->begin_port, head->end_port);
- kfree(head);
- }
-
-final_return:
- return;
-}
-
-static int proc_read_del_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context)
-{
- char *p;
-
- p = page;
- p += sprintf(page, "%s\n", "use echo \"1(0)\" to enable or disbable");
- return end_proc_read(p, page, off, count, start, eof);
-}
-
-static int proc_write_del_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data)
-{
- unsigned char tmp[2];
-
- if(buffer)
- {
- memset(tmp, 0, sizeof(tmp));
- copy_from_user(tmp, buffer, count);
- tmp[1] = 0x00;
- switch(*tmp)
- {
- case '0':
- //Do something here
- break;
-
- case '1':
- pf_del_ip_conntrack();
- break;
-
- default:
- printk("<1>invalid args\n");
- }
- return count;
- }
- return 0;
-}
-
-static int end_proc_read(const char *p, char *page, off_t off, int count, char **start, int *eof)
-{
- int len = p - page;
-
- if(len < off + count)
- {
- *eof = 1;
- }
-
- *start = page + off;
- len -= off;
- if(len > count)
- {
- len = count;
- }
-
- if(len < 0)
- {
- len = 0;
- }
-
- return len;
-}
-
-#endif
-
-#ifdef CLEAR_IP_CONNTRACK
-void clear_ip_conntrack(void)
-{
- int i;
- struct list_head *head, *temp_head;
- struct ip_conntrack_tuple_hash *tuple_hash;
-
- printk("warning : %s %d\n", __func__, __LINE__);
-
- READ_LOCK(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- {
- head = &ip_conntrack_hash[i];
- temp_head = head;
- while(1)
- {
- temp_head = temp_head->next;
- if(temp_head == head)
- {
- head = NULL;
- temp_head = NULL;
- break;
- }
- tuple_hash = (struct ip_conntrack_tuple_hash *)temp_head;
- del_selected_conntrack(tuple_hash);
- }
- }
- READ_UNLOCK(&ip_conntrack_lock);
-}
-
-static int proc_read_clear_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context)
-{
- char *p;
-
- p = page;
- p += sprintf(page, "%s\n", "use echo \"1(0)\" to enable or disbable");
- return end_proc_read(p, page, off, count, start, eof);
-}
-
-static int proc_write_clear_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data)
-{
- unsigned char tmp[2];
-
- if(buffer)
- {
- memset(tmp, 0, sizeof(tmp));
- copy_from_user(tmp, buffer, count);
- tmp[1] = 0x00;
- switch(*tmp)
- {
- case '0':
- //Do something here
- break;
-
- case '1':
- clear_ip_conntrack();
- break;
-
- default:
- printk("<1>invalid args\n");
- }
- return count;
- }
- return 0;
-}
-#endif
-
static int kill_proto(const struct ip_conntrack *i, void *data)
{
return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
@@ -575,7 +78,7 @@ print_expect(char *buffer, const struct ip_conntrack_expect *expect)
}
static unsigned int
-print_conntrack(char *buffer, const struct ip_conntrack *conntrack)
+print_conntrack(char *buffer, struct ip_conntrack *conntrack)
{
unsigned int len;
struct ip_conntrack_protocol *proto
@@ -593,15 +96,38 @@ print_conntrack(char *buffer, const struct ip_conntrack *conntrack)
len += print_tuple(buffer + len,
&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
proto);
- if (!(conntrack->status & IPS_SEEN_REPLY))
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
len += sprintf(buffer + len, "[UNREPLIED] ");
len += print_tuple(buffer + len,
&conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
proto);
- if (conntrack->status & IPS_ASSURED)
+ if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
len += sprintf(buffer + len, "[ASSURED] ");
len += sprintf(buffer + len, "use=%u ",
atomic_read(&conntrack->ct_general.use));
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+ len += sprintf(buffer + len, "mark=%ld ", conntrack->mark);
+#endif
+#if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE)
+ if(conntrack->layer7.app_proto)
+ len += sprintf(buffer + len, "l7proto=%s ",
+ conntrack->layer7.app_proto);
+#endif
+#if defined(CONFIG_IP_NF_TARGET_MACSAVE) || defined(CONFIG_IP_NF_TARGET_MACSAVE_MODULE)
+ if ((*((u32 *)conntrack->macsave) != 0) || (*((u16*)(conntrack->macsave + 4)) != 0)) {
+ len += sprintf(buffer + len, "macsave=%02X:%02X:%02X:%02X:%02X:%02X ",
+ conntrack->macsave[0], conntrack->macsave[1], conntrack->macsave[2],
+ conntrack->macsave[3], conntrack->macsave[4], conntrack->macsave[5]);
+ }
+#endif
+#if defined(CONFIG_IP_NF_TARGET_BCOUNT) || defined(CONFIG_IP_NF_TARGET_BCOUNT_MODULE)
+#if 0
+ if (conntrack->bcount != 0) {
+// len += sprintf(buffer + len, "bcount=0x%lx ", conntrack->bcount);
+ len += sprintf(buffer + len, "bcount=%ldK ", conntrack->bcount / 1024);
+ }
+#endif
+#endif
len += sprintf(buffer + len, "\n");
return len;
@@ -748,30 +274,6 @@ static int init_or_cleanup(int init)
if (ret < 0)
goto cleanup_nothing;
-#ifdef DEL_IP_CONNTRACK_ENTRY
- proc = proc_net_create("del_ip_conntrack", S_IFREG | S_IRUGO | S_IWUSR, proc_read_del_ip_conntrack);
- if(proc)
- {
- proc->write_proc = proc_write_del_ip_conntrack;
- proc->owner = THIS_MODULE;
- }
- else
- {
- //Maybe we can just let it go!
- }
-#endif
-#ifdef CLEAR_IP_CONNTRACK
- proc = proc_net_create("clear_ip_conntrack", S_IFREG | S_IRUGO | S_IWUSR, proc_read_clear_ip_conntrack);
- if(proc)
- {
- proc->write_proc = proc_write_clear_ip_conntrack;
- proc->owner = THIS_MODULE;
- }
- else
- {
- //Maybe we can just let it go!
- }
-#endif
proc = proc_net_create("ip_conntrack",0,list_conntracks);
if (!proc) goto cleanup_init;
proc->owner = THIS_MODULE;
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c
index f64ddabf..de6b4925 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c
@@ -763,12 +763,11 @@ do_bindings(struct ip_conntrack *ct,
/* if this expectation is already established, skip */
if (exp->sibling)
{
- //lzh add 2007/3/16 for fix sip alg CDROUTE test
- exp = NULL;
- //lzh end
+#if 0 // removed 1.11 forward bug test
+ exp = NULL; // lzh add 2007/3/16 for fix sip alg CDROUTE test
+#endif
continue;
}
-
if (exp_for_packet(exp, pskb)) {
DEBUGP("calling nat helper (exp=%p) for packet\n",
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c
index 2c080cf3..bcf886e8 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c
@@ -129,7 +129,7 @@ h225_nat_expected(struct sk_buff **pskb,
mr.range[0].flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
mr.range[0].min = mr.range[0].max
= ((union ip_conntrack_manip_proto)
- { port });
+ { .tcp = { port } });
}
ret = ip_nat_setup_info(ct, &mr, hooknum);
@@ -184,12 +184,14 @@ static int h323_signal_address_fixup(struct ip_conntrack *ct,
if (!between(info->seq[i] + 6, ntohl(tcph->seq),
ntohl(tcph->seq) + datalen)) {
/* Partial retransmisison. It's a cracker being funky. */
+#if 0 // ... or a miss id? zzz
if (net_ratelimit()) {
printk("H.323_NAT: partial packet %u/6 in %u/%u\n",
info->seq[i],
ntohl(tcph->seq),
ntohl(tcph->seq) + datalen);
}
+#endif
return 0;
}
@@ -252,18 +254,18 @@ static int h323_data_fixup(struct ip_ct_h225_expect *info,
DEBUGP("h323_data_fixup: offset %u + 6 in %u\n", info->offset, tcplen);
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- memset(&newtuple, 0, sizeof(newtuple));
if (!between(expect->seq + 6, ntohl(tcph->seq),
ntohl(tcph->seq) + tcplen - tcph->doff * 4)) {
/* Partial retransmisison. It's a cracker being funky. */
+#if 1 // also caused by bad id?
if (net_ratelimit()) {
printk("H.323_NAT: partial packet %u/6 in %u/%u\n",
expect->seq,
ntohl(tcph->seq),
ntohl(tcph->seq) + tcplen - tcph->doff * 4);
}
+#endif
return 0;
}
@@ -392,9 +394,9 @@ static struct ip_nat_helper h225 =
"H.225", /* name */
IP_NAT_HELPER_F_ALWAYS, /* flags */
THIS_MODULE, /* module */
- { { 0, { __constant_htons(H225_PORT) } }, /* tuple */
+ { { 0, { .tcp = { __constant_htons(H225_PORT) } } }, /* tuple */
{ 0, { 0 }, IPPROTO_TCP } },
- { { 0, { 0xFFFF } }, /* mask */
+ { { 0, { .tcp = { 0xFFFF } } }, /* mask */
{ 0, { 0 }, 0xFFFF } },
h225_nat_help, /* helper */
h225_nat_expected /* expectfn */
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c
index ffde5133..e7987430 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c
@@ -79,7 +79,6 @@ ip_nat_resize_packet(struct sk_buff **skb,
iph = (*skb)->nh.iph;
if (iph->protocol == IPPROTO_TCP) {
struct tcphdr *tcph = (void *)iph + iph->ihl*4;
- void *data = (void *)tcph + tcph->doff*4;
DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
DUMP_OFFSET(this_way);
@@ -354,54 +353,49 @@ sack_adjust(struct tcphdr *tcph,
}
-/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */
-static inline int
+/* TCP SACK sequence number adjustment. */
+static inline void
ip_nat_sack_adjust(struct sk_buff *skb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
- struct iphdr *iph;
struct tcphdr *tcph;
- unsigned char *ptr;
- int length, dir, sack_adjusted = 0;
+ unsigned char *ptr, *optend;
+ unsigned int dir;
- iph = skb->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
- length = (tcph->doff*4)-sizeof(struct tcphdr);
+ tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+ optend = (unsigned char *)tcph + tcph->doff*4;
ptr = (unsigned char *)(tcph+1);
dir = CTINFO2DIR(ctinfo);
- while (length > 0) {
- int opcode = *ptr++;
+ while (ptr < optend) {
+ int opcode = ptr[0];
int opsize;
switch (opcode) {
case TCPOPT_EOL:
- return !sack_adjusted;
+ return;
case TCPOPT_NOP:
- length--;
+ ptr++;
continue;
default:
- opsize = *ptr++;
- if (opsize > length) /* no partial opts */
- return !sack_adjusted;
+ opsize = ptr[1];
+ /* no partial opts */
+ if (ptr + opsize > optend || opsize < 2)
+ return;
if (opcode == TCPOPT_SACK) {
/* found SACK */
if((opsize >= (TCPOLEN_SACK_BASE
+TCPOLEN_SACK_PERBLOCK)) &&
!((opsize - TCPOLEN_SACK_BASE)
% TCPOLEN_SACK_PERBLOCK))
- sack_adjust(tcph, ptr-2,
+ sack_adjust(tcph, ptr,
&ct->nat.info.seq[!dir]);
-
- sack_adjusted = 1;
}
- ptr += opsize-2;
- length -= opsize;
+ ptr += opsize;
}
}
- return !sack_adjusted;
}
/* TCP sequence number adjustment */
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c
index 71077933..358a4579 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c
@@ -1,5 +1,5 @@
/*
- * ip_nat_pptp.c - Version 1.11
+ * ip_nat_pptp.c - Version 1.5
*
* NAT support for PPTP (Point to Point Tunneling Protocol).
* PPTP is a a protocol for creating virtual private networks.
@@ -9,7 +9,7 @@
* GRE is defined in RFC 1701 and RFC 1702. Documentation of
* PPTP can be found in RFC 2637
*
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
@@ -17,7 +17,18 @@
* (needs netfilter newnat code)
* - NAT to a unique tuple, not to TCP source port
* (needs netfilter tuple reservation)
- * - Support other NAT scenarios than SNAT of PNS
+ *
+ * Changes:
+ * 2002-02-10 - Version 1.3
+ * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
+ * in local connections (Philip Craig <philipc@snapgear.com>)
+ * - add checks for magicCookie and pptp version
+ * - make argument list of pptp_{out,in}bound_packet() shorter
+ * - move to C99 style initializers
+ * - print version number at module loadtime
+ * 2003-09-22 - Version 1.5
+ * - use SNATed tcp sourceport as callid, since we get called before
+ * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
*
*/
@@ -34,6 +45,8 @@
#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+#define IP_NAT_PPTP_VERSION "1.5"
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
@@ -57,7 +70,7 @@ pptp_nat_expected(struct sk_buff **pskb,
struct ip_nat_multi_range mr;
struct ip_ct_pptp_master *ct_pptp_info;
struct ip_nat_pptp *nat_pptp_info;
- u_int32_t newsrcip, newdstip, newcid;
+ u_int32_t newip, newcid;
int ret;
IP_NF_ASSERT(info);
@@ -72,7 +85,7 @@ pptp_nat_expected(struct sk_buff **pskb,
/* need to alter GRE tuple because conntrack expectfn() used 'wrong'
* (unmanipulated) values */
- if (hooknum == NF_IP_PRE_ROUTING) {
+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
DEBUGP("completing tuples with NAT info \n");
/* we can do this, since we're unconfirmed */
if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
@@ -80,68 +93,43 @@ pptp_nat_expected(struct sk_buff **pskb,
/* assume PNS->PAC */
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
htonl(nat_pptp_info->pns_call_id);
-// ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.gre.key =
-// htonl(nat_pptp_info->pac_call_id);
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
htonl(nat_pptp_info->pns_call_id);
+ newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ newcid = htonl(nat_pptp_info->pac_call_id);
} else {
/* assume PAC->PNS */
- DEBUGP("WRONG DIRECTION\n");
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
htonl(nat_pptp_info->pac_call_id);
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
htonl(nat_pptp_info->pac_call_id);
- }
- }
-
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST)
- {
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == htonl(ct_pptp_info->pac_call_id))
- {
- /* assume PNS->PAC */
- newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- newcid = htonl(nat_pptp_info->pac_call_id);
- }
- else
- {
- /* assume PAC->PNS */
- newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
newcid = htonl(nat_pptp_info->pns_call_id);
}
- mr.rangesize = 1;
- mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED;
- mr.range[0].min_ip = mr.range[0].max_ip = newdstip;
- mr.range[0].min = mr.range[0].max =
- ((union ip_conntrack_manip_proto ) { newcid });
- DEBUGP("change dest ip to %u.%u.%u.%u\n",
- NIPQUAD(newdstip));
- DEBUGP("change dest key to 0x%x\n", ntohl(newcid));
- ret = ip_nat_setup_info(ct, &mr, hooknum);
} else {
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == htonl(ct_pptp_info->pac_call_id))
- {
- newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
+ htonl(ct_pptp_info->pac_call_id)) {
+ /* assume PNS->PAC */
+ newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
newcid = htonl(ct_pptp_info->pns_call_id);
}
- else
- {
+ else {
/* assume PAC->PNS */
- newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+ newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
newcid = htonl(ct_pptp_info->pac_call_id);
}
-
- mr.rangesize = 1;
- mr.range[0].flags = IP_NAT_RANGE_MAP_IPS
- |IP_NAT_RANGE_PROTO_SPECIFIED;
- mr.range[0].min_ip = mr.range[0].max_ip = newsrcip;
- mr.range[0].min = mr.range[0].max =
- ((union ip_conntrack_manip_proto ) { newcid });
- DEBUGP("change src ip to %u.%u.%u.%u\n",
- NIPQUAD(newsrcip));
- DEBUGP("change 'src' key to 0x%x\n", ntohl(newcid));
- ret = ip_nat_setup_info(ct, &mr, hooknum);
}
+ mr.rangesize = 1;
+ mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED;
+ mr.range[0].min_ip = mr.range[0].max_ip = newip;
+ mr.range[0].min = mr.range[0].max =
+ ((union ip_conntrack_manip_proto ) { newcid });
+ DEBUGP("change ip to %u.%u.%u.%u\n",
+ NIPQUAD(newip));
+ DEBUGP("change key to 0x%x\n", ntohl(newcid));
+ ret = ip_nat_setup_info(ct, &mr, hooknum);
+
UNLOCK_BH(&ip_pptp_lock);
return ret;
@@ -150,13 +138,17 @@ pptp_nat_expected(struct sk_buff **pskb,
/* outbound packets == from PNS to PAC */
static inline unsigned int
-pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
- size_t datalen,
+pptp_outbound_pkt(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *exp)
{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct tcphdr *tcph = (void *) iph + iph->ihl*4;
+ struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *)
+ ((void *)tcph + tcph->doff*4);
+
struct PptpControlHeader *ctlh;
union pptp_ctrl_union pptpReq;
struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
@@ -164,6 +156,7 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
u_int16_t msg, *cid = NULL, new_callid;
+ /* FIXME: size checks !!! */
ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh));
@@ -172,11 +165,18 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
switch (msg = ntohs(ctlh->messageType)) {
case PPTP_OUT_CALL_REQUEST:
cid = &pptpReq.ocreq->callID;
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
/* save original call ID in nat_info */
nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
- new_callid = tcph->source;
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
/* save new call ID in ct info */
ct_pptp_info->pns_call_id = ntohs(new_callid);
break;
@@ -186,10 +186,6 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
case PPTP_CALL_CLEAR_REQUEST:
cid = &pptpReq.clrreq->callID;
break;
- case PPTP_CALL_DISCONNECT_NOTIFY:
- cid = &pptpReq.disc->callID;
- break;
-
default:
DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
(msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
@@ -204,11 +200,6 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* no need to alter packet */
- DEBUGP("outbound control message %s\n", strMName[msg]);
- DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id);
- DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id);
- DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id);
- DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id);
return NF_ACCEPT;
}
@@ -216,27 +207,27 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
DEBUGP("altering call id from 0x%04x to 0x%04x\n",
ntohs(*cid), ntohs(new_callid));
+
/* mangle packet */
- tcph->check = ip_nat_cheat_check(*cid^0xFFFF,
- new_callid, tcph->check);
- *cid = new_callid;
-
- DEBUGP("outbound control message %s\n", strMName[msg]);
- DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id);
- DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id);
- DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id);
- DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id);
+ ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)cid - (void *)pptph,
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid));
+
return NF_ACCEPT;
}
/* inbound packets == from PAC to PNS */
static inline unsigned int
-pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
- size_t datalen,
+pptp_inbound_pkt(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
struct ip_conntrack_expect *oldexp)
{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct tcphdr *tcph = (void *) iph + iph->ihl*4;
+ struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *)
+ ((void *)tcph + tcph->doff*4);
+
struct PptpControlHeader *ctlh;
union pptp_ctrl_union pptpReq;
struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
@@ -245,8 +236,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
u_int32_t old_dst_ip;
- struct ip_conntrack_tuple t;
+ struct ip_conntrack_tuple t, inv_t;
+ struct ip_conntrack_tuple *orig_t, *reply_t;
+ /* FIXME: size checks !!! */
ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh));
@@ -262,23 +255,30 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
}
old_dst_ip = oldexp->tuple.dst.ip;
t = oldexp->tuple;
+ invert_tuplepr(&inv_t, &t);
/* save original PAC call ID in nat_info */
nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
- /* store new callID in ct_info, so conntrack works */
- //ct_pptp_info->pac_call_id = ntohs(tcph->source);
- //new_cid = htons(ct_pptp_info->pac_call_id);
-
/* alter expectation */
- if (t.dst.ip == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip) {
+ orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+ if (t.src.ip == orig_t->src.ip && t.dst.ip == orig_t->dst.ip) {
/* expectation for PNS->PAC direction */
- t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
+ t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
+ inv_t.src.ip = reply_t->src.ip;
+ inv_t.dst.ip = reply_t->dst.ip;
+ inv_t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
+ inv_t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
} else {
/* expectation for PAC->PNS direction */
- t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- DEBUGP("EXPECTATION IN WRONG DIRECTION!!!\n");
+ t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
+ t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
+ inv_t.src.ip = orig_t->src.ip;
+ inv_t.dst.ip = orig_t->dst.ip;
+ inv_t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
+ inv_t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
}
if (!ip_conntrack_change_expect(oldexp, &t)) {
@@ -287,13 +287,7 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
DEBUGP("can't change expect\n");
}
ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_orig, &t);
- /* reply keymap */
- t.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
- t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
- ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &t);
-
+ ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &inv_t);
break;
case PPTP_IN_CALL_CONNECT:
pcid = &pptpReq.iccon->peersCallID;
@@ -323,9 +317,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
case PPTP_WAN_ERROR_NOTIFY:
pcid = &pptpReq.wanerr->peersCallID;
break;
- case PPTP_SET_LINK_INFO:
- pcid = &pptpReq.setlink->peersCallID;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid = &pptpReq.disc->callID;
break;
+
default:
DEBUGP("unknown inbound packet %s\n",
(msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
@@ -334,14 +329,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
case PPTP_START_SESSION_REQUEST:
case PPTP_START_SESSION_REPLY:
case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
case PPTP_ECHO_REQUEST:
case PPTP_ECHO_REPLY:
/* no need to alter packet */
- DEBUGP("inbound control message %s\n", strMName[msg]);
- DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id);
- DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id);
- DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id);
- DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id);
return NF_ACCEPT;
}
@@ -349,25 +340,21 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph,
IP_NF_ASSERT(pcid);
DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(*pcid), ntohs(new_pcid));
- tcph->check = ip_nat_cheat_check(*pcid^0xFFFF,
- new_pcid, tcph->check);
- *pcid = new_pcid;
+ ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)pcid - (void *)pptph,
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid));
if (new_cid) {
IP_NF_ASSERT(cid);
DEBUGP("altering call id from 0x%04x to 0x%04x\n",
ntohs(*cid), ntohs(new_cid));
- tcph->check = ip_nat_cheat_check(*cid^0xFFFF,
- new_cid, tcph->check);
- *cid = new_cid;
+ ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ (void *)cid - (void *)pptph,
+ sizeof(new_cid), (char *)&new_cid,
+ sizeof(new_cid));
}
/* great, at least we don't need to resize packets */
- DEBUGP("inbound control message %s\n", strMName[msg]);
- DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id);
- DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id);
- DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id);
- DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id);
return NF_ACCEPT;
}
@@ -387,12 +374,13 @@ static unsigned int tcp_help(struct ip_conntrack *ct,
DEBUGP("entering\n");
- /* Only mangle things once: original direction in POST_ROUTING
- and reply direction on PRE_ROUTING. */
+ /* Only mangle things once: DST for original direction
+ and SRC for reply direction. */
dir = CTINFO2DIR(ctinfo);
- if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC && dir == IP_CT_DIR_ORIGINAL)
- || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST && dir == IP_CT_DIR_REPLY)))
- {
+ if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
+ && dir == IP_CT_DIR_ORIGINAL)
+ || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST
+ && dir == IP_CT_DIR_REPLY))) {
DEBUGP("Not touching dir %s at hook %s\n",
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY",
hooknum == NF_IP_POST_ROUTING ? "POSTROUTING"
@@ -409,13 +397,11 @@ static unsigned int tcp_help(struct ip_conntrack *ct,
return NF_ACCEPT;
}
-
pptph = (struct pptp_pkt_hdr *) ((void *)tcph + tcph->doff*4);
/* if it's not a control message, we can't handle it */
if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE)
- {
+ ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
DEBUGP("not a pptp control packet\n");
return NF_ACCEPT;
}
@@ -424,10 +410,10 @@ static unsigned int tcp_help(struct ip_conntrack *ct,
if (dir == IP_CT_DIR_ORIGINAL) {
/* reuqests sent by client to server (PNS->PAC) */
- pptp_outbound_pkt(tcph, pptph, datalen, ct, ctinfo, exp);
+ pptp_outbound_pkt(pskb, ct, ctinfo, exp);
} else {
/* response from the server to the client (PAC->PNS) */
- pptp_inbound_pkt(tcph, pptph, datalen, ct, ctinfo, exp);
+ pptp_inbound_pkt(pskb, ct, ctinfo, exp);
}
UNLOCK_BH(&ip_pptp_lock);
@@ -437,29 +423,52 @@ static unsigned int tcp_help(struct ip_conntrack *ct,
/* nat helper struct for control connection */
static struct ip_nat_helper pptp_tcp_helper = {
- { NULL, NULL },
- "pptp", IP_NAT_HELPER_F_ALWAYS, THIS_MODULE,
- { { 0, { tcp: { port: __constant_htons(PPTP_CONTROL_PORT) } } },
- { 0, { 0 }, IPPROTO_TCP } },
- { { 0, { tcp: { port: 0xFFFF } } },
- { 0, { 0 }, 0xFFFF } },
- tcp_help, pptp_nat_expected };
+ .list = { NULL, NULL },
+ .name = "pptp",
+ .flags = IP_NAT_HELPER_F_ALWAYS,
+ .me = THIS_MODULE,
+ .tuple = { .src = { .ip = 0,
+ .u = { .tcp = { .port =
+ __constant_htons(PPTP_CONTROL_PORT) }
+ }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = IPPROTO_TCP
+ }
+ },
+
+ .mask = { .src = { .ip = 0,
+ .u = { .tcp = { .port = 0xFFFF } }
+ },
+ .dst = { .ip = 0,
+ .u = { .all = 0 },
+ .protonum = 0xFFFF
+ }
+ },
+ .help = tcp_help,
+ .expect = pptp_nat_expected
+};
static int __init init(void)
{
- DEBUGP("init_module\n" );
-
- if (ip_nat_helper_register(&pptp_tcp_helper))
+ DEBUGP("%s: registering NAT helper\n", __FILE__);
+ if (ip_nat_helper_register(&pptp_tcp_helper)) {
+ printk(KERN_ERR "Unable to register NAT application helper "
+ "for pptp\n");
return -EIO;
+ }
- return 0;
+ printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
+ return 0;
}
static void __exit fini(void)
{
DEBUGP("cleanup_module\n" );
- ip_nat_helper_unregister(&pptp_tcp_helper);
+ ip_nat_helper_unregister(&pptp_tcp_helper);
+ printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
}
module_init(init);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c
index a985539e..a985539e 100755..100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c
index 25aa1786..9be95857 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -1,5 +1,5 @@
/*
- * ip_nat_proto_gre.c - Version 1.11
+ * ip_nat_proto_gre.c - Version 1.2
*
* NAT protocol helper module for GRE.
*
@@ -17,7 +17,7 @@
*
* Documentation about PPTP can be found in RFC 2637
*
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
@@ -35,7 +35,12 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
+ ": " format, ## args)
+#else
#define DEBUGP(x, args...)
+#endif
/* is key in given range between min and max */
static int
@@ -44,8 +49,15 @@ gre_in_range(const struct ip_conntrack_tuple *tuple,
const union ip_conntrack_manip_proto *min,
const union ip_conntrack_manip_proto *max)
{
- return ntohl(tuple->src.u.gre.key) >= ntohl(min->gre.key)
- && ntohl(tuple->src.u.gre.key) <= ntohl(max->gre.key);
+ u_int32_t key;
+
+ if (maniptype == IP_NAT_MANIP_SRC)
+ key = tuple->src.u.gre.key;
+ else
+ key = tuple->dst.u.gre.key;
+
+ return ntohl(key) >= ntohl(min->gre.key)
+ && ntohl(key) <= ntohl(max->gre.key);
}
/* generate unique tuple ... */
@@ -122,6 +134,7 @@ gre_manip_pkt(struct iphdr *iph, size_t len,
break;
}
if (greh->csum) {
+ /* FIXME: Never tested this code... */
*(gre_csum(greh)) =
ip_nat_cheat_check(~*(gre_key(greh)),
manip->u.gre.key,
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c
index 05aefcd4..622aee05 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -40,9 +40,6 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
else
portptr = &tuple->dst.u.udp.port;
- if(ntohs(*portptr) == 500)
- return 0;//must not be "return 1"
-
/* If no range specified... */
if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
/* If it's dst rewrite, can't change port */
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c b/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c
index 99438ca0..2e3004db 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c
@@ -332,7 +332,7 @@ ipt_do_table(struct sk_buff **pskb,
continue;
}
if (table_base + v
- != (void *)e + e->next_offset) {
+ != (void *)e + e->next_offset && !(e->ip.flags & IPT_F_GOTO)) {
/* Save old back ptr in next entry */
struct ipt_entry *next
= (void *)e + e->next_offset;
@@ -374,6 +374,12 @@ ipt_do_table(struct sk_buff **pskb,
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
+ else if (verdict == IPT_RETURN) { // added -- zzz
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
else
/* Verdict */
break;
@@ -1169,13 +1175,11 @@ do_add_counters(void *user, unsigned int len)
goto free;
write_lock_bh(&t->lock);
- /*************************************
- * modify by tanghui @ 2006-10-11
- * for a RACE CONDITION in the "do_add_counters()" function
- *************************************/
- //if (t->private->number != paddc->num_counters) {
- if (t->private->number != tmp.num_counters) {
- /*************************************/
+
+#if 0 // removed 1.11 forward bug test
+ // if (t->private->number != tmp.num_counters) { // 43011: modify by tanghui @ 2006-10-11 for a RACE CONDITION in the "do_add_counters()" function
+#endif
+ if (t->private->number != paddc->num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1676,7 +1680,7 @@ static struct ipt_match icmp_matchstruct
= { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL };
#ifdef CONFIG_PROC_FS
-static inline int print_name(const char *i,
+static int print_name(const char *i,
off_t start_offset, char *buffer, int length,
off_t *pos, unsigned int *count)
{
@@ -1694,6 +1698,15 @@ static inline int print_name(const char *i,
return 0;
}
+static inline int print_target(const struct ipt_target *t,
+ off_t start_offset, char *buffer, int length,
+ off_t *pos, unsigned int *count)
+{
+ if (t == &ipt_standard_target || t == &ipt_error_target)
+ return 0;
+ return print_name((char *)t, start_offset, buffer, length, pos, count);
+}
+
static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
{
off_t pos = 0;
@@ -1720,7 +1733,7 @@ static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
if (down_interruptible(&ipt_mutex) != 0)
return 0;
- LIST_FIND(&ipt_target, print_name, void *,
+ LIST_FIND(&ipt_target, print_target, struct ipt_target *,
offset, buffer, length, &pos, &count);
up(&ipt_mutex);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c
new file mode 100644
index 00000000..b40e7e2e
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c
@@ -0,0 +1,63 @@
+/*
+
+ BCOUNT target
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2 or later.
+
+*/
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ipt_BCOUNT.h>
+
+// #define DEBUG_BCOUNT
+
+static unsigned int target(struct sk_buff **pskb, unsigned int hooknum,
+ const struct net_device *in, const struct net_device *out,
+ const void *targinfo, void *userinfo)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ if (ct) {
+ ct->bcount += (*pskb)->len;
+ if (ct->bcount >= 0x0FFFFFFF) ct->bcount = 0x0FFFFFFF;
+#ifdef DEBUG_BCOUNT
+ if (net_ratelimit())
+ printf(KERN_DEBUG "BCOUNT %lx %lx\n", (*pskb)->len, ct->bcount);
+#endif
+ }
+ return IPT_CONTINUE;
+}
+
+static int checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo,
+ unsigned int targinfosize, unsigned int hook_mask)
+{
+ return (targinfosize == IPT_ALIGN(sizeof(struct ipt_BCOUNT_target)));
+}
+
+static struct ipt_target BCOUNT_target
+= { { NULL, NULL }, "BCOUNT", target, checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_target(&BCOUNT_target);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&BCOUNT_target);
+}
+
+module_init(init);
+module_exit(fini);
+
+
+MODULE_AUTHOR("Jonathan Zarate");
+MODULE_DESCRIPTION("BCOUNT target");
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c
new file mode 100644
index 00000000..f7320721
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -0,0 +1,82 @@
+/*
+ * This is a module which is used for setting the skb->priority field
+ * of an skb for qdisc classification.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLASSIFY.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables qdisc classification target module");
+
+static unsigned int
+target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_classify_target_info *clinfo = targinfo;
+
+ if((*pskb)->priority != clinfo->priority) {
+ (*pskb)->priority = clinfo->priority;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){
+ printk(KERN_ERR "CLASSIFY: invalid size (%u != %u).\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_classify_target_info)));
+ return 0;
+ }
+
+ if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
+ printk(KERN_ERR "CLASSIFY: only valid in POST_ROUTING.\n");
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "CLASSIFY: can only be called from "
+ "\"mangle\" table, not \"%s\".\n",
+ tablename);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_classify_reg
+= { { NULL, NULL }, "CLASSIFY", target, checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ if (ipt_register_target(&ipt_classify_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_classify_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c
new file mode 100644
index 00000000..a6038378
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -0,0 +1,128 @@
+/* This kernel module is used to modify the connection mark values, or
+ * to optionally restore the skb nfmark from the connection mark
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables CONNMARK matching module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static unsigned int
+target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_connmark_target_info *markinfo = targinfo;
+ unsigned long diff;
+ unsigned long nfmark;
+ unsigned long newmark;
+
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
+ if (ct) {
+ switch(markinfo->mode) {
+ case IPT_CONNMARK_SET:
+ newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != ct->mark)
+ ct->mark = newmark;
+ break;
+ case IPT_CONNMARK_SET_RETURN:
+ // Set connmark and nfmark, apply mask to nfmark, do IPT_RETURN - zzz
+ newmark = ct->mark = markinfo->mark;
+ newmark &= markinfo->mask;
+ nfmark = (*pskb)->nfmark;
+ if (newmark != nfmark) {
+ (*pskb)->nfmark = newmark;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ return IPT_RETURN;
+ case IPT_CONNMARK_SAVE:
+ newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+ if (ct->mark != newmark)
+ ct->mark = newmark;
+ break;
+ case IPT_CONNMARK_RESTORE:
+ nfmark = (*pskb)->nfmark;
+ diff = (ct->mark ^ nfmark) & markinfo->mask; // zzz
+ if (diff != 0) {
+ (*pskb)->nfmark = nfmark ^ diff;
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+ break;
+ }
+ }
+
+ return IPT_CONTINUE;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_connmark_target_info *matchinfo = targinfo;
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) {
+ printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_connmark_target_info)));
+ return 0;
+ }
+
+ if (matchinfo->mode == IPT_CONNMARK_RESTORE) {
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_connmark_reg = {
+ .name = "CONNMARK",
+ .target = &target,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_connmark_reg);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_connmark_reg);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c
new file mode 100644
index 00000000..2ba068b3
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c
@@ -0,0 +1,78 @@
+/* This target marks packets to be enqueued to an imq device */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_IMQ.h>
+#include <linux/imq.h>
+
+static unsigned int imq_target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ipt_imq_info *mr = (struct ipt_imq_info*)targinfo;
+
+ (*pskb)->imq_flags = mr->todev | IMQ_F_ENQUEUE;
+ (*pskb)->nfcache |= NFC_ALTERED;
+
+ return IPT_CONTINUE;
+}
+
+static int imq_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_imq_info *mr;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_imq_info))) {
+ printk(KERN_WARNING "IMQ: invalid targinfosize\n");
+ return 0;
+ }
+ mr = (struct ipt_imq_info*)targinfo;
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING
+ "IMQ: IMQ can only be called from \"mangle\" table, not \"%s\"\n",
+ tablename);
+ return 0;
+ }
+
+ if (mr->todev > IMQ_MAX_DEVS) {
+ printk(KERN_WARNING
+ "IMQ: invalid device specified, highest is %u\n",
+ IMQ_MAX_DEVS);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_imq_reg = {
+ { NULL, NULL},
+ "IMQ",
+ imq_target,
+ imq_checkentry,
+ NULL,
+ THIS_MODULE
+};
+
+static int __init init(void)
+{
+ if (ipt_register_target(&ipt_imq_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_imq_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c
new file mode 100644
index 00000000..62677279
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c
@@ -0,0 +1,65 @@
+/*
+
+ MACSAVE target
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2 or later.
+
+*/
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ipt_MACSAVE.h>
+
+static unsigned int target(struct sk_buff **pskb, unsigned int hooknum,
+ const struct net_device *in, const struct net_device *out,
+ const void *targinfo, void *userinfo)
+{
+// const struct ipt_MACSAVE_target_info *info = targinfo;
+ struct sk_buff *skb = *pskb;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ if ((skb->mac.raw >= skb->head) && ((skb->mac.raw + ETH_HLEN) <= skb->data)) {
+ ct = ip_conntrack_get(skb, &ctinfo);
+ if (ct) {
+ memcpy(ct->macsave, skb->mac.ethernet->h_source, sizeof(ct->macsave));
+ }
+ }
+ return IPT_CONTINUE;
+}
+
+static int checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo,
+ unsigned int targinfosize, unsigned int hook_mask)
+{
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_MACSAVE_target_info))) {
+ printk(KERN_ERR "MACSAVE: Invalid data size\n");
+ return 0;
+ }
+
+ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_IN))) {
+ printk(KERN_ERR "MACSAVE: Valid only in PREROUTING, FORWARD and INPUT\n");
+ return 0;
+ }
+ return 1;
+}
+
+static struct ipt_target macsave_target
+= { { NULL, NULL }, "MACSAVE", target, checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_target(&macsave_target);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&macsave_target);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c
new file mode 100644
index 00000000..b97d7792
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c
@@ -0,0 +1,422 @@
+/*
+ * This implements the ROUTE target, which enables you to setup unusual
+ * routes not supported by the standard kernel routing table.
+ *
+ * Copyright (C) 2002 Cedric de Launois <delaunois@info.ucl.ac.be>
+ *
+ * v 1.11 2004/11/23
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ipt_ROUTE.h>
+#include <linux/netdevice.h>
+#include <linux/route.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/icmp.h>
+#include <net/checksum.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+/* Try to route the packet according to the routing keys specified in
+ * route_info. Keys are :
+ * - ifindex :
+ * 0 if no oif preferred,
+ * otherwise set to the index of the desired oif
+ * - route_info->gw :
+ * 0 if no gateway specified,
+ * otherwise set to the next host to which the pkt must be routed
+ * If success, skb->dev is the output device to which the packet must
+ * be sent and skb->dst is not NULL
+ *
+ * RETURN: -1 if an error occured
+ * 1 if the packet was succesfully routed to the
+ * destination desired
+ * 0 if the kernel routing table could not route the packet
+ * according to the keys specified
+ */
+static int route(struct sk_buff *skb,
+ unsigned int ifindex,
+ const struct ipt_route_target_info *route_info)
+{
+ int err;
+ struct rtable *rt;
+ struct iphdr *iph = skb->nh.iph;
+ struct rt_key key = {
+ dst:iph->daddr,
+ src:0,
+ oif:ifindex,
+ tos:RT_TOS(iph->tos)
+ };
+
+ /* The destination address may be overloaded by the target */
+ if (route_info->gw)
+ key.dst = route_info->gw;
+
+ /* Trying to route the packet using the standard routing table. */
+ if ((err = ip_route_output_key(&rt, &key))) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err);
+ return -1;
+ }
+
+ /* Drop old route. */
+ dst_release(skb->dst);
+ skb->dst = NULL;
+
+ /* Success if no oif specified or if the oif correspond to the
+ * one desired */
+ if (!ifindex || rt->u.dst.dev->ifindex == ifindex) {
+ skb->dst = &rt->u.dst;
+ skb->dev = skb->dst->dev;
+ return 1;
+ }
+
+ /* The interface selected by the routing table is not the one
+ * specified by the user. This may happen because the dst address
+ * is one of our own addresses.
+ */
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n",
+ NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex);
+
+ return 0;
+}
+
+
+/* Stolen from ip_finish_output2
+ * PRE : skb->dev is set to the device we are leaving by
+ * skb->dst is not NULL
+ * POST: the packet is sent with the link layer header pushed
+ * the packet is destroyed
+ */
+static void ip_direct_send(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct hh_cache *hh = dst->hh;
+
+ if (hh) {
+ read_lock_bh(&hh->hh_lock);
+ memcpy(skb->data - 16, hh->hh_data, 16);
+ read_unlock_bh(&hh->hh_lock);
+ skb_push(skb, hh->hh_len);
+ hh->hh_output(skb);
+ } else if (dst->neighbour)
+ dst->neighbour->output(skb);
+ else {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: no hdr & no neighbour cache!\n");
+ kfree_skb(skb);
+ }
+}
+
+
+/* PRE : skb->dev is set to the device we are leaving by
+ * POST: - the packet is directly sent to the skb->dev device, without
+ * pushing the link layer header.
+ * - the packet is destroyed
+ */
+static inline int dev_direct_send(struct sk_buff *skb)
+{
+ return dev_queue_xmit(skb);
+}
+
+
+static unsigned int route_oif(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ unsigned int ifindex = 0;
+ struct net_device *dev_out = NULL;
+
+ /* The user set the interface name to use.
+ * Getting the current interface index.
+ */
+ if ((dev_out = dev_get_by_name(route_info->oif))) {
+ ifindex = dev_out->ifindex;
+ } else {
+ /* Unknown interface name : packet dropped */
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif);
+ return NF_DROP;
+ }
+
+ /* Trying the standard way of routing packets */
+ switch (route(skb, ifindex, route_info)) {
+ case 1:
+ dev_put(dev_out);
+ if (route_info->flags & IPT_ROUTE_CONTINUE)
+ return IPT_CONTINUE;
+
+ ip_direct_send(skb);
+ return NF_STOLEN;
+
+ case 0:
+ /* Failed to send to oif. Trying the hard way */
+ if (route_info->flags & IPT_ROUTE_CONTINUE)
+ return NF_DROP;
+
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: forcing the use of %i\n",
+ ifindex);
+
+ /* We have to force the use of an interface.
+ * This interface must be a tunnel interface since
+ * otherwise we can't guess the hw address for
+ * the packet. For a tunnel interface, no hw address
+ * is needed.
+ */
+ if ((dev_out->type != ARPHRD_TUNNEL)
+ && (dev_out->type != ARPHRD_IPGRE)) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: can't guess the hw addr !\n");
+ dev_put(dev_out);
+ return NF_DROP;
+ }
+
+ /* Send the packet. This will also free skb
+ * Do not go through the POST_ROUTING hook because
+ * skb->dst is not set and because it will probably
+ * get confused by the destination IP address.
+ */
+ skb->dev = dev_out;
+ dev_direct_send(skb);
+ dev_put(dev_out);
+ return NF_STOLEN;
+
+ default:
+ /* Unexpected error */
+ dev_put(dev_out);
+ return NF_DROP;
+ }
+}
+
+
+static unsigned int route_iif(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ struct net_device *dev_in = NULL;
+
+ /* Getting the current interface index. */
+ if (!(dev_in = dev_get_by_name(route_info->iif))) {
+ if (net_ratelimit())
+ DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif);
+ return NF_DROP;
+ }
+
+ skb->dev = dev_in;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+
+ netif_rx(skb);
+ dev_put(dev_in);
+ return NF_STOLEN;
+}
+
+
+static unsigned int route_gw(const struct ipt_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ if (route(skb, 0, route_info)!=1)
+ return NF_DROP;
+
+ if (route_info->flags & IPT_ROUTE_CONTINUE)
+ return IPT_CONTINUE;
+
+ ip_direct_send(skb);
+ return NF_STOLEN;
+}
+
+/* To detect and deter routed packet loopback when using the --tee option,
+ * we take a page out of the raw.patch book: on the copied skb, we set up
+ * a fake ->nfct entry, pointing to the local &route_tee_track. We skip
+ * routing packets when we see they already have that ->nfct.
+ */
+
+static struct ip_conntrack route_tee_track;
+
+static unsigned int ipt_route_target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ipt_route_target_info *route_info = targinfo;
+ struct sk_buff *skb = *pskb;
+ unsigned int res;
+
+ /* If we are at PREROUTING or INPUT hook
+ * the TTL isn't decreased by the IP stack
+ */
+ if (hooknum == NF_IP_PRE_ROUTING ||
+ hooknum == NF_IP_LOCAL_IN) {
+
+ struct iphdr *iph = skb->nh.iph;
+
+ if (iph->ttl <= 1) {
+ struct rtable *rt;
+
+ if (ip_route_output(&rt, iph->saddr, iph->daddr,
+ RT_TOS(iph->tos) | RTO_CONN,
+ 0)) {
+ return NF_DROP;
+ }
+
+ if (skb->dev == rt->u.dst.dev) {
+ /* Drop old route. */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+
+ /* this will traverse normal stack, and
+ * thus call conntrack on the icmp packet */
+ icmp_send(skb, ICMP_TIME_EXCEEDED,
+ ICMP_EXC_TTL, 0);
+ }
+
+ return NF_DROP;
+ }
+
+ /*
+ * If we are at INPUT the checksum must be recalculated since
+ * the length could change as the result of a defragmentation.
+ * -- Rickard Molin
+ */
+ if(hooknum == NF_IP_LOCAL_IN) {
+ iph->ttl = iph->ttl - 1;
+ iph->check = 0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ } else {
+ ip_decrease_ttl(iph);
+ }
+ }
+
+ if ((route_info->flags & IPT_ROUTE_TEE)) {
+ /*
+ * Copy the *pskb, and route the copy. Will later return
+ * IPT_CONTINUE for the original skb, which should continue
+ * on its way as if nothing happened. The copy should be
+ * independantly delivered to the ROUTE --gw.
+ */
+ skb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!skb) {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n");
+ return IPT_CONTINUE;
+ }
+ }
+
+ /* Tell conntrack to forget this packet since it may get confused
+ * when a packet is leaving with dst address == our address.
+ * Good idea ? Dunno. Need advice.
+ *
+ * NEW: mark the skb with our &route_tee_track, so we avoid looping
+ * on any already routed packet.
+ */
+ if (!(route_info->flags & IPT_ROUTE_CONTINUE)) {
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &route_tee_track.infos[IP_CT_NEW];
+ nf_conntrack_get(skb->nfct);
+ skb->nfcache = 0;
+#ifdef CONFIG_NETFILTER_DEBUG
+ skb->nf_debug = 0;
+#endif
+ }
+
+ if (route_info->oif[0]) {
+ res = route_oif(route_info, skb);
+ } else if (route_info->iif[0]) {
+ res = route_iif(route_info, skb);
+ } else if (route_info->gw) {
+ res = route_gw(route_info, skb);
+ } else {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n");
+ res = IPT_CONTINUE;
+ }
+
+ if ((route_info->flags & IPT_ROUTE_TEE))
+ res = IPT_CONTINUE;
+
+ return res;
+}
+
+
+static int ipt_route_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (strcmp(tablename, "mangle") != 0) {
+ printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n",
+ tablename);
+ return 0;
+ }
+
+ if (hook_mask & ~( (1 << NF_IP_PRE_ROUTING)
+ | (1 << NF_IP_LOCAL_IN)
+ | (1 << NF_IP_FORWARD)
+ | (1 << NF_IP_LOCAL_OUT)
+ | (1 << NF_IP_POST_ROUTING))) {
+ printk("ipt_ROUTE: bad hook\n");
+ return 0;
+ }
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_route_target_info))) {
+ printk(KERN_WARNING "ipt_ROUTE: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_route_target_info)));
+ return 0;
+ }
+
+ return 1;
+}
+
+
+static struct ipt_target ipt_route_reg
+= { { NULL, NULL }, "ROUTE", ipt_route_target, ipt_route_checkentry, NULL,
+ THIS_MODULE };
+
+
+static int __init init(void)
+{
+ /* Set up fake conntrack (stolen from raw.patch):
+ - to never be deleted, not in any hashes */
+ atomic_set(&route_tee_track.ct_general.use, 1);
+ /* - and look it like as a confirmed connection */
+ set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status);
+ /* - and prepare the ctinfo field for REJECT/NAT. */
+ route_tee_track.infos[IP_CT_NEW].master =
+ route_tee_track.infos[IP_CT_RELATED].master =
+ route_tee_track.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
+ &route_tee_track.ct_general;
+ /* Initialize fake conntrack so that NAT will skip it */
+ route_tee_track.nat.info.initialized |=
+ (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
+
+ if (ipt_register_target(&ipt_route_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_route_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c
index 99e7dfe7..07103fa5 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c
@@ -62,7 +62,8 @@ LIST_HEAD(trigger_list);
static void trigger_refresh(struct ipt_trigger *trig, unsigned long extra_jiffies)
{
- DEBUGP("%s: \n", __FUNCTION__);
+ DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]);
+
IP_NF_ASSERT(trig);
WRITE_LOCK(&ip_conntrack_lock);
@@ -77,7 +78,8 @@ static void trigger_refresh(struct ipt_trigger *trig, unsigned long extra_jiffie
static void __del_trigger(struct ipt_trigger *trig)
{
- DEBUGP("%s: \n", __FUNCTION__);
+ DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]);
+
IP_NF_ASSERT(trig);
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
@@ -90,7 +92,9 @@ static void trigger_timeout(unsigned long ul_trig)
{
struct ipt_trigger *trig= (void *) ul_trig;
- DEBUGP("trigger list %p timed out\n", trig);
+// DEBUGP("trigger list %p timed out\n", trig);
+ DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]);
+
WRITE_LOCK(&ip_conntrack_lock);
__del_trigger(trig);
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -250,7 +254,7 @@ trigger_dnat(struct sk_buff **pskb,
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW));
DEBUGP("%s: got ", __FUNCTION__);
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ DUMP_TUPLE_RAW(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
/* Alter the destination of imcoming packet. */
newrange = ((struct ip_nat_multi_range)
@@ -310,7 +314,7 @@ trigger_check(const char *tablename,
DEBUGP("trigger_check: size %u.\n", targinfosize);
return 0;
}
- if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD))) {
+ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) {
DEBUGP("trigger_check: bad hooks %x.\n", hook_mask);
return 0;
}
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c
new file mode 100644
index 00000000..2f0a4e7a
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c
@@ -0,0 +1,110 @@
+/* TTL modification target for IP tables
+ * (C) 2000 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Version: 1.8
+ *
+ * This software is distributed under the terms of GNU GPL
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_TTL.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("IP tables TTL modification module");
+MODULE_LICENSE("GPL");
+
+static unsigned int ipt_ttl_target(struct sk_buff **pskb, unsigned int hooknum,
+ const struct net_device *in, const struct net_device *out,
+ const void *targinfo, void *userinfo)
+{
+ struct iphdr *iph = (*pskb)->nh.iph;
+ const struct ipt_TTL_info *info = targinfo;
+ u_int16_t diffs[2];
+ int new_ttl;
+
+ switch (info->mode) {
+ case IPT_TTL_SET:
+ new_ttl = info->ttl;
+ break;
+ case IPT_TTL_INC:
+ new_ttl = iph->ttl + info->ttl;
+ if (new_ttl > 255)
+ new_ttl = 255;
+ break;
+ case IPT_TTL_DEC:
+ new_ttl = iph->ttl - info->ttl;
+ if (new_ttl < 0)
+ new_ttl = 0;
+ break;
+ default:
+ new_ttl = iph->ttl;
+ break;
+ }
+
+ if (new_ttl != iph->ttl) {
+ diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
+ iph->ttl = new_ttl;
+ diffs[1] = htons(((unsigned)iph->ttl) << 8);
+ iph->check = csum_fold(csum_partial((char *)diffs,
+ sizeof(diffs),
+ iph->check^0xFFFF));
+ (*pskb)->nfcache |= NFC_ALTERED;
+ }
+
+ return IPT_CONTINUE;
+}
+
+static int ipt_ttl_checkentry(const char *tablename,
+ const struct ipt_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ipt_TTL_info *info = targinfo;
+
+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) {
+ printk(KERN_WARNING "TTL: targinfosize %u != %Zu\n",
+ targinfosize,
+ IPT_ALIGN(sizeof(struct ipt_TTL_info)));
+ return 0;
+ }
+
+ if (strcmp(tablename, "mangle")) {
+ printk(KERN_WARNING "TTL: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
+ return 0;
+ }
+
+ if (info->mode > IPT_TTL_MAXMODE) {
+ printk(KERN_WARNING "TTL: invalid or unknown Mode %u\n",
+ info->mode);
+ return 0;
+ }
+
+ if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) {
+ printk(KERN_WARNING "TTL: increment/decrement doesn't make sense with value 0\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ipt_target ipt_TTL = { { NULL, NULL }, "TTL",
+ ipt_ttl_target, ipt_ttl_checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_target(&ipt_TTL);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_target(&ipt_TTL);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c
new file mode 100644
index 00000000..7fd34562
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c
@@ -0,0 +1,942 @@
+/*
+ * accounting match (ipt_account.c)
+ * (C) 2003,2004 by Piotr Gasidlo (quaker@barbara.eu.org)
+ *
+ * Version: 0.1.7
+ *
+ * This software is distributed under the terms of GNU GPL
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/ctype.h>
+
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_account.h>
+
+#if defined(CONFIG_IP_NF_MATCH_ACCOUNT_DEBUG)
+ #define dprintk(format,args...) printk(format,##args)
+#else
+ #define dprintk(format,args...)
+#endif
+
+static char version[] =
+KERN_INFO IPT_ACCOUNT_NAME " " IPT_ACCOUNT_VERSION " : Piotr Gasid³o <quaker@barbara.eu.org>, http://www.barbara.eu.org/~quaker/ipt_account/\n";
+
+/* rights for files created in /proc/net/ipt_account/ */
+static int permissions = 0644;
+/* maximal netmask for single table */
+static int netmask = 16;
+
+/* module information */
+MODULE_AUTHOR("Piotr Gasidlo <quaker@barbara.eu.org>");
+MODULE_DESCRIPTION("Traffic accounting modules");
+MODULE_LICENSE("GPL");
+MODULE_PARM(permissions,"i");
+MODULE_PARM_DESC(permissions,"permissions on /proc/net/ipt_account/* files");
+MODULE_PARM(netmask, "i");
+MODULE_PARM_DESC(netmask, "maximum *save* size of one list (netmask)");
+
+/* structure with statistics counters */
+struct t_ipt_account_stat {
+ u_int64_t b_all, b_tcp, b_udp, b_icmp, b_other; /* byte counters for all/tcp/udp/icmp/other traffic */
+ u_int64_t p_all, p_tcp, p_udp, p_icmp, p_other; /* packet counters for all/tcp/udp/icmp/other traffic */
+};
+
+/* stucture with statistics counters, used when table is created with --ashort switch */
+struct t_ipt_account_stat_short {
+ u_int64_t b_all; /* byte counters for all traffic */
+ u_int64_t p_all; /* packet counters for all traffic */
+};
+
+/* structure holding to/from statistics for single ip */
+struct t_ipt_account_ip_list {
+ struct t_ipt_account_stat src;
+ struct t_ipt_account_stat dest;
+ unsigned long time; /* time when this record was last updated */
+
+};
+
+/* same as above, for tables with --ashort switch */
+struct t_ipt_account_ip_list_short {
+ struct t_ipt_account_stat_short src;
+ struct t_ipt_account_stat_short dest;
+ unsigned long time;
+};
+
+/* structure describing single table */
+struct t_ipt_account_table {
+ char name[IPT_ACCOUNT_NAME_LEN]; /* table name ( = filename in /proc/net/ipt_account/) */
+ union { /* table with statistics for each ip in network/netmask */
+ struct t_ipt_account_ip_list *l;
+ struct t_ipt_account_ip_list_short *s;
+ } ip_list;
+ u_int32_t network; /* network/netmask covered by table*/
+ u_int32_t netmask;
+ u_int32_t count;
+ int shortlisting:1; /* show only total columns of counters */
+ int use_count; /* rules counter - counting number of rules using this table */
+ struct t_ipt_account_table *next;
+ spinlock_t ip_list_lock;
+ struct proc_dir_entry *status_file;
+};
+
+/* we must use spinlocks to avoid parallel modifications of table list */
+static spinlock_t account_lock = SPIN_LOCK_UNLOCKED;
+
+static struct proc_dir_entry *proc_net_ipt_account = NULL;
+
+/* root pointer holding list of the tables */
+static struct t_ipt_account_table *account_tables = NULL;
+
+/* convert ascii to ip */
+int atoip(char *buffer, u_int32_t *ip) {
+
+ char *bufferptr = buffer;
+ int part, shift;
+
+ /* zero ip */
+ *ip = 0;
+
+ /* first must be a digit */
+ if (!isdigit(*bufferptr))
+ return 0;
+
+ /* parse first 3 octets (III.III.III.iii) */
+ for (part = 0, shift = 24; *bufferptr && shift; bufferptr++) {
+ if (isdigit(*bufferptr)) {
+ part = part * 10 + (*bufferptr - '0');
+ continue;
+ }
+ if (*bufferptr == '.') {
+ if (part > 255)
+ return 0;
+ *ip |= part << shift;
+ shift -= 8;
+ part = 0;
+ continue;
+ }
+ return 0;
+ }
+
+ /* we expect more digts */
+ if (!*bufferptr)
+ return 0;
+ /* parse last octet (iii.iii.iii.III) */
+ for (; *bufferptr; bufferptr++) {
+ if (isdigit(*bufferptr)) {
+ part = part * 10 + (*bufferptr - '0');
+ continue;
+ } else {
+ if (part > 255)
+ return 0;
+ *ip |= part;
+ break;
+ }
+ }
+ return (bufferptr - buffer);
+}
+
+/* convert ascii to 64bit integer */
+int atoi64(char *buffer, u_int64_t *i) {
+ char *bufferptr = buffer;
+
+ /* zero integer */
+ *i = 0;
+
+ while (isdigit(*bufferptr)) {
+ *i = *i * 10 + (*bufferptr - '0');
+ bufferptr++;
+ }
+ return (bufferptr - buffer);
+}
+
+static void *account_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct t_ipt_account_table *table = pde->data;
+
+ unsigned int *bucket;
+
+ spin_lock_bh(&table->ip_list_lock);
+ if (*pos >= table->count)
+ return NULL;
+
+ bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL);
+ if (!bucket)
+ return ERR_PTR(-ENOMEM);
+ *bucket = *pos;
+ return bucket;
+}
+
+static void *account_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct t_ipt_account_table *table = pde->data;
+
+ unsigned int *bucket = (unsigned int *)v;
+
+ *pos = ++(*bucket);
+ if (*pos >= table->count) {
+ kfree(v);
+ return NULL;
+ }
+ return bucket;
+}
+
+static void account_seq_stop(struct seq_file *s, void *v)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct t_ipt_account_table *table = pde->data;
+ unsigned int *bucket = (unsigned int *)v;
+ kfree(bucket);
+ spin_unlock_bh(&table->ip_list_lock);
+}
+
+static int account_seq_write(struct file *file, const char *ubuffer,
+ size_t ulength, loff_t *pos)
+{
+ struct proc_dir_entry *pde = ((struct seq_file *)file->private_data)->private;
+ struct t_ipt_account_table *table = pde->data;
+ char buffer[1024], *bufferptr;
+ int length;
+
+ u_int32_t ip;
+ int len, i;
+ struct t_ipt_account_ip_list l;
+ struct t_ipt_account_ip_list_short s;
+ u_int64_t *p, dummy;
+
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() entered.\n");
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() ulength = %zi.\n", ulength);
+
+ length = ulength;
+ if (ulength > 1024)
+ length = 1024;
+ if (copy_from_user(buffer, ubuffer, length))
+ return -EFAULT;
+ buffer[length - 1] = 0;
+ bufferptr = buffer;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() buffer = \'%s\' length = %i.\n", buffer, length);
+
+ /* reset table counters */
+ if (!memcmp(buffer, "reset", 5)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"reset\".\n");
+ if (!table->shortlisting) {
+ spin_lock_bh(&table->ip_list_lock);
+ memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count);
+ spin_unlock_bh(&table->ip_list_lock);
+ } else {
+ spin_lock_bh(&table->ip_list_lock);
+ memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count);
+ spin_unlock_bh(&table->ip_list_lock);
+ }
+ return length;
+ }
+
+ if (!memcmp(buffer, "ip", 2)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"ip\".\n");
+ bufferptr += 2;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (*bufferptr != '=') {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer);
+ return length; /* expected equal */
+ }
+ bufferptr += 1;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!(len = atoip(bufferptr, &ip))) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip (%ti).\n", bufferptr - buffer);
+ return length; /* expected ip */
+ }
+ bufferptr += len;
+ if ((ip & table->netmask) != table->network) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip [%u.%u.%u.%u] from table's network/netmask [%u.%u.%u.%u/%u.%u.%u.%u].\n", HIPQUAD(ip), HIPQUAD(table->network), HIPQUAD(table->netmask));
+ return length; /* expected ip from table's network/netmask */
+ }
+ if (!table->shortlisting) {
+ memset(&l, 0, sizeof(struct t_ipt_account_ip_list));
+ while(*bufferptr) {
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!memcmp(bufferptr, "bytes_src", 9)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%ti).\n", bufferptr - buffer);
+ p = &l.src.b_all;
+ bufferptr += 9;
+ } else if (!memcmp(bufferptr, "bytes_dest", 10)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%ti).\n", bufferptr - buffer);
+ p = &l.dest.b_all;
+ bufferptr += 10;
+ } else if (!memcmp(bufferptr, "packets_src", 11)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%ti).\n", bufferptr - buffer);
+ p = &l.src.p_all;
+ bufferptr += 11;
+ } else if (!memcmp(bufferptr, "packets_dest", 12)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%ti).\n", bufferptr - buffer);
+ p = &l.dest.p_all;
+ bufferptr += 12;
+ } else if (!memcmp(bufferptr, "time", 4)) {
+ /* time hack, ignore time tokens */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%ti).\n", bufferptr - buffer);
+ bufferptr += 4;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (*bufferptr != '=') {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer);
+ return length; /* expected equal */
+ }
+ bufferptr += 1;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!(len = atoi64(bufferptr, &dummy))) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer);
+ return length; /* expected int64 */
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", dummy, bufferptr - buffer);
+ bufferptr += len;
+ continue; /* skip time token */
+ } else
+ return length; /* expected token */
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (*bufferptr != '=') {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer);
+ return length; /* expected equal */
+ }
+ bufferptr += 1;
+ for (i = 0; i < 5; i++) {
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!(len = atoi64(bufferptr, p))) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer);
+ return length; /* expected int64 */
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", *p, bufferptr - buffer);
+ bufferptr += len;
+ p++;
+ }
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n");
+ spin_lock_bh(&table->ip_list_lock);
+ /* update counters, do not overwrite time field */
+ memcpy(&table->ip_list.l[ip - table->network], &l, sizeof(struct t_ipt_account_ip_list) - sizeof(unsigned long));
+ spin_unlock_bh(&table->ip_list_lock);
+ } else {
+ memset(&s, 0, sizeof(struct t_ipt_account_ip_list_short));
+ while(*bufferptr) {
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!memcmp(bufferptr, "bytes_src", 9)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%ti).\n", bufferptr - buffer);
+ p = &s.src.b_all;
+ bufferptr += 9;
+ } else if (!memcmp(bufferptr, "bytes_dest", 10)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%ti).\n", bufferptr - buffer);
+ p = &s.dest.b_all;
+ bufferptr += 10;
+ } else if (!memcmp(bufferptr, "packets_src", 11)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%ti).\n", bufferptr - buffer);
+ p = &s.src.p_all;
+ bufferptr += 11;
+ } else if (!memcmp(bufferptr, "packets_dest", 12)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%ti).\n", bufferptr - buffer);
+ p = &s.dest.p_all;
+ bufferptr += 12;
+ } else if (!memcmp(bufferptr, "time", 4)) {
+ /* time hack, ignore time tokens */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%ti).\n", bufferptr - buffer);
+ bufferptr += 4;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (*bufferptr != '=') {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer);
+ return length; /* expected equal */
+ }
+ bufferptr += 1;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!(len = atoi64(bufferptr, &dummy))) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer);
+ return length; /* expected int64 */
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", dummy, bufferptr - buffer);
+ bufferptr += len;
+ continue; /* skip time token */
+ } else {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected token (%ti).\n", bufferptr - buffer);
+ return length; /* expected token */
+ }
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (*bufferptr != '=') {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer);
+ return length; /* expected equal */
+ }
+ bufferptr += 1;
+ if (!isspace(*bufferptr)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer);
+ return length; /* expected space */
+ }
+ bufferptr += 1;
+ if (!(len = atoi64(bufferptr, p))) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer);
+ return length; /* expected int64 */
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", *p, bufferptr - buffer);
+ bufferptr += len;
+ }
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n");
+ spin_lock_bh(&table->ip_list_lock);
+ /* update counters, do not overwrite time field */
+ memcpy(&table->ip_list.s[ip - table->network], &s, sizeof(struct t_ipt_account_ip_list_short) - sizeof(unsigned long));
+ spin_unlock_bh(&table->ip_list_lock);
+ }
+ }
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() left.\n");
+ return length;
+}
+
+
+static int account_seq_show(struct seq_file *s, void *v)
+{
+ struct proc_dir_entry *pde = s->private;
+ struct t_ipt_account_table *table = pde->data;
+ unsigned int *bucket = (unsigned int *)v;
+
+ u_int32_t address = table->network + *bucket;
+ struct timespec last;
+
+ if (!table->shortlisting) {
+ jiffies_to_timespec(jiffies - table->ip_list.l[*bucket].time, &last);
+ seq_printf(s,
+ "ip = %u.%u.%u.%u bytes_src = %llu %llu %llu %llu %llu packets_src = %llu %llu %llu %llu %llu bytes_dest = %llu %llu %llu %llu %llu packets_dest = %llu %llu %llu %llu %llu time = %lu\n",
+ HIPQUAD(address),
+ table->ip_list.l[*bucket].src.b_all,
+ table->ip_list.l[*bucket].src.b_tcp,
+ table->ip_list.l[*bucket].src.b_udp,
+ table->ip_list.l[*bucket].src.b_icmp,
+ table->ip_list.l[*bucket].src.b_other,
+ table->ip_list.l[*bucket].src.p_all,
+ table->ip_list.l[*bucket].src.p_tcp,
+ table->ip_list.l[*bucket].src.p_udp,
+ table->ip_list.l[*bucket].src.p_icmp,
+ table->ip_list.l[*bucket].src.p_other,
+ table->ip_list.l[*bucket].dest.b_all,
+ table->ip_list.l[*bucket].dest.b_tcp,
+ table->ip_list.l[*bucket].dest.b_udp,
+ table->ip_list.l[*bucket].dest.b_icmp,
+ table->ip_list.l[*bucket].dest.b_other,
+ table->ip_list.l[*bucket].dest.p_all,
+ table->ip_list.l[*bucket].dest.p_tcp,
+ table->ip_list.l[*bucket].dest.p_udp,
+ table->ip_list.l[*bucket].dest.p_icmp,
+ table->ip_list.l[*bucket].dest.p_other,
+ last.tv_sec
+ );
+ } else {
+ jiffies_to_timespec(jiffies - table->ip_list.s[*bucket].time, &last);
+ seq_printf(s,
+ "ip = %u.%u.%u.%u bytes_src = %llu packets_src = %llu bytes_dest = %llu packets_dest = %llu time = %lu\n",
+ HIPQUAD(address),
+ table->ip_list.s[*bucket].src.b_all,
+ table->ip_list.s[*bucket].src.p_all,
+ table->ip_list.s[*bucket].dest.b_all,
+ table->ip_list.s[*bucket].dest.p_all,
+ last.tv_sec
+ );
+ }
+ return 0;
+}
+
+static struct seq_operations account_seq_ops = {
+ .start = account_seq_start,
+ .next = account_seq_next,
+ .stop = account_seq_stop,
+ .show = account_seq_show
+};
+
+static int account_seq_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &account_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ sf->private = PDE(inode);
+ }
+ return ret;
+}
+
+static struct file_operations account_file_ops = {
+ .owner = THIS_MODULE,
+ .open = account_seq_open,
+ .read = seq_read,
+ .write = account_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+/* do raw accounting */
+static inline void do_account(struct t_ipt_account_stat *stat, const struct sk_buff *skb) {
+
+ /* update packet & bytes counters in *stat structure */
+ stat->b_all += skb->len;
+ stat->p_all++;
+
+ switch (skb->nh.iph->protocol) {
+ case IPPROTO_TCP:
+ stat->b_tcp += skb->len;
+ stat->p_tcp++;
+ break;
+ case IPPROTO_UDP:
+ stat->b_udp += skb->len;
+ stat->p_udp++;
+ break;
+ case IPPROTO_ICMP:
+ stat->b_icmp += skb->len;
+ stat->p_icmp++;
+ break;
+ default:
+ stat->b_other += skb->len;
+ stat->p_other++;
+ }
+}
+
+static inline void do_account_short(struct t_ipt_account_stat_short *stat, const struct sk_buff *skb) {
+
+ /* update packet & bytes counters in *stat structure */
+ stat->b_all += skb->len;
+ stat->p_all++;
+}
+
+static int match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+
+ const struct t_ipt_account_info *info = (struct t_ipt_account_info*)matchinfo;
+ struct t_ipt_account_table *table;
+ int ret;
+ unsigned long now;
+
+ u_int32_t address;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() entered.\n");
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() match name = %s.\n", info->name);
+
+ spin_lock_bh(&account_lock);
+ /* find the right table */
+ table = account_tables;
+ while (table && strncmp(table->name, info->name, IPT_ACCOUNT_NAME_LEN) && (table = table->next));
+ spin_unlock_bh(&account_lock);
+
+ if (table == NULL) {
+ /* ups, no table with that name */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table %s not found. Leaving.\n", info->name);
+ return 0;
+ }
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table found %s\n", table->name);
+
+ /* lock table while updating statistics */
+ spin_lock_bh(&table->ip_list_lock);
+
+ /* default: no match */
+ ret = 0;
+
+ /* get current time */
+ now = jiffies;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() got packet src = %u.%u.%u.%u, dst = %u.%u.%u.%u, proto = %u.\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->nh.iph->protocol);
+
+ /* check whether traffic from source ip address ... */
+ address = ntohl(skb->nh.iph->saddr);
+ /* ... is being accounted by this table */
+ if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) {
+ /* yes, account this packet */
+ dprintk(KERN_INFO "ipt_account: match() accounting packet src = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol);
+ /* update counters this host */
+ if (!table->shortlisting) {
+ do_account(&table->ip_list.l[address - table->network].src, skb);
+ table->ip_list.l[address - table->network].time = now;
+ /* update also counters for all hosts in this table (network address) */
+ if (table->netmask != INADDR_BROADCAST) {
+ do_account(&table->ip_list.l[0].src, skb);
+ table->ip_list.l[0].time = now;
+ }
+ } else {
+ do_account_short(&table->ip_list.s[address - table->network].src, skb);
+ table->ip_list.s[address - table->network].time = now;
+ /* update also counters for all hosts in this table (network address) */
+ if (table->netmask != INADDR_BROADCAST) {
+ do_account_short(&table->ip_list.s[0].src, skb);
+ table->ip_list.s[0].time = now;
+ }
+ }
+ /* yes, it's a match */
+ ret = 1;
+ }
+
+ /* do the same thing with destination ip address */
+ address = ntohl(skb->nh.iph->daddr);
+ if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() accounting packet dst = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol);
+ if (!table->shortlisting) {
+ do_account(&table->ip_list.l[address - table->network].dest, skb);
+ table->ip_list.l[address - table->network].time = now;
+ if (table->netmask != INADDR_BROADCAST) {
+ do_account(&table->ip_list.l[0].dest, skb);
+ table->ip_list.s[0].time = now;
+ }
+ } else {
+ do_account_short(&table->ip_list.s[address - table->network].dest, skb);
+ table->ip_list.s[address - table->network].time = now;
+ if (table->netmask != INADDR_BROADCAST) {
+ do_account_short(&table->ip_list.s[0].dest, skb);
+ table->ip_list.s[0].time = now;
+ }
+ }
+ ret = 1;
+ }
+ spin_unlock_bh(&table->ip_list_lock);
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() left.\n");
+
+ return ret;
+}
+
+static int checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchinfosize,
+ unsigned int hook_mask)
+{
+ const struct t_ipt_account_info *info = matchinfo;
+ struct t_ipt_account_table *table, *find_table, *last_table;
+ int ret = 0;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() entered.\n");
+
+ if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return 0;
+ if (!info->name || !info->name[0]) return 0;
+
+ /* find whether table with this name already exists */
+ spin_lock_bh(&account_lock);
+ find_table = account_tables;
+ while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) );
+ if (find_table != NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name);
+ /* if table exists, check whether table network/netmask equals rule network/netmask */
+ if (find_table->network != info->network || find_table->netmask != info->netmask || find_table->shortlisting != info->shortlisting) {
+ spin_unlock_bh(&account_lock);
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong parameters (not equals existing table parameters).\n");
+ ret = 0;
+ goto failure;
+ }
+ /* increment table use count */
+ find_table->use_count++;
+ spin_unlock_bh(&account_lock);
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n");
+ ret = 1;
+ goto failure;
+ }
+ spin_unlock_bh(&account_lock);
+
+ /* check netmask first, before allocating memory */
+ if (info->netmask < ((1 << netmask) - 1)) {
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() too big netmask.\n");
+ ret = 0;
+ goto failure;
+ }
+
+ /* table doesn't exist - create new */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for new table %s.\n", sizeof(struct t_ipt_account_table), info->name);
+ table = vmalloc(sizeof(struct t_ipt_account_table));
+ if (table == NULL) {
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for new table %s.\n", sizeof(struct t_ipt_account_table), info->name);
+ ret = 0; /* was -ENOMEM */
+ goto failure;
+ }
+
+ /* setup table parameters */
+ table->ip_list_lock = SPIN_LOCK_UNLOCKED;
+ table->next = NULL;
+ table->use_count = 1;
+ table->network = info->network;
+ table->netmask = info->netmask;
+ table->shortlisting = info->shortlisting;
+ table->count = (~table->netmask) + 1;
+ strncpy(table->name,info->name,IPT_ACCOUNT_NAME_LEN);
+ table->name[IPT_ACCOUNT_NAME_LEN - 1] = '\0';
+
+ /* allocate memory for table->ip_list */
+ if (!table->shortlisting) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count);
+ table->ip_list.l = vmalloc(sizeof(struct t_ipt_account_ip_list) * table->count);
+ if (table->ip_list.l == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count);
+ ret = 0; /* was -ENOMEM */
+ goto failure_table;
+ }
+ memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count);
+ } else {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count);
+ table->ip_list.s = vmalloc(sizeof(struct t_ipt_account_ip_list_short) * table->count);
+ if (table->ip_list.s == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count);
+ ret = 0; /* was -ENOMEM */
+ goto failure_table;
+ }
+ memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count);
+ }
+
+ /* put table into chain */
+ spin_lock_bh(&account_lock);
+ find_table = account_tables;
+ while( (last_table = find_table) && strncmp(info->name, find_table->name, IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) );
+ if (find_table != NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name);
+ if (find_table->network != info->network || find_table->netmask != info->netmask) {
+ spin_unlock_bh(&account_lock);
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong network/netmask.\n");
+ ret = 0;
+ goto failure_ip_list;
+ }
+ find_table->use_count++;
+ spin_unlock_bh(&account_lock);
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n");
+ ret = 1;
+ goto failure_ip_list;
+ }
+ if (!last_table)
+ account_tables = table;
+ else
+ last_table->next = table;
+ spin_unlock_bh(&account_lock);
+
+ /* create procfs status file */
+ table->status_file = create_proc_entry(table->name, permissions, proc_net_ipt_account);
+ if (table->status_file == NULL) {
+ ret = 0; /* was -ENOMEM */
+ goto failure_unlink;
+ }
+ table->status_file->owner = THIS_MODULE;
+ table->status_file->data = table;
+ wmb();
+// if (!table->shortlisting)
+ table->status_file->proc_fops = &account_file_ops;
+// else
+// table->status_file->proc_fops = &account_file_ops_short;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left.\n");
+ /* everything went just okey */
+ return 1;
+
+ /* do cleanup in case of failure */
+failure_unlink:
+ /* remove table from list */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() removing table.\n");
+ spin_lock_bh(&account_lock);
+ last_table = NULL;
+ table = account_tables;
+ if (table == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() no table found. Leaving.\n");
+ spin_unlock_bh(&account_lock);
+ return 0; /* was -ENOMEM */
+ }
+ while (strncmp(info->name, table->name, IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next));
+ if (table == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table already destroyed. Leaving.\n");
+ spin_unlock_bh(&account_lock);
+ return 0; /* was -ENOMEM */
+ }
+ if (last_table)
+ last_table->next = table->next;
+ else
+ account_tables = table->next;
+ spin_unlock_bh(&account_lock);
+failure_ip_list:
+ /* free memory allocated for statistics table */
+ if (!table->shortlisting)
+ vfree(table->ip_list.l);
+ else
+ vfree(table->ip_list.s);
+failure_table:
+ /* free table */
+ vfree(table);
+failure:
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left. Table not created.\n");
+ /* failure return */
+ return ret;
+}
+
+static void destroy(void *matchinfo,
+ unsigned int matchinfosize)
+{
+ const struct t_ipt_account_info *info = matchinfo;
+ struct t_ipt_account_table *table, *last_table;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() entered.\n");
+
+ if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return;
+
+ /* search for table */
+ spin_lock_bh(&account_lock);
+ last_table = NULL;
+ table = account_tables;
+ if(table == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no tables found. Leaving.\n");
+ spin_unlock_bh(&account_lock);
+ return;
+ }
+ while( strncmp(info->name,table->name,IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next) );
+ if (table == NULL) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no table %s not found. Leaving.\n", info->name);
+ spin_unlock_bh(&account_lock);
+ return;
+ }
+
+ /* decrement table use-count */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() decrementing use count.\n");
+ table->use_count--;
+ if (table->use_count) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table still in use. Leaving.\n");
+ spin_unlock_bh(&account_lock);
+ return;
+ }
+
+ /* remove table if use-count is zero */
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table %s not used. Removing.\n", table->name);
+
+ /* unlink table */
+ if(last_table)
+ last_table->next = table->next;
+ else
+ account_tables = table->next;
+ spin_unlock_bh(&account_lock);
+
+ /* wait while table is still in use */
+ spin_lock_bh(&table->ip_list_lock);
+ spin_unlock_bh(&table->ip_list_lock);
+
+ /* remove proc entries */
+ remove_proc_entry(table->name, proc_net_ipt_account);
+
+ /* remove table */
+ if (!table->shortlisting)
+ vfree(table->ip_list.l);
+ else
+ vfree(table->ip_list.s);
+ vfree(table);
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() left.\n");
+ return;
+}
+
+static struct ipt_match account_match = {
+ .name = "account",
+ .match = &match,
+ .checkentry = &checkentry,
+ .destroy = &destroy,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ int err;
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() entered.\n");
+ printk(version);
+ /* check params */
+ if (netmask > 32 || netmask < 0) {
+ printk(KERN_INFO "account: Wrong netmask given by netmask parameter (%i). Valid is 32 to 0.\n", netmask);
+ err = -EINVAL;
+ goto doexit;
+ }
+
+ /* create /proc/net/ipt_account directory */
+ proc_net_ipt_account = proc_mkdir("ipt_account", proc_net);
+ if (!proc_net_ipt_account) {
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to create procfs entry.\n");
+ err = -ENOMEM;
+ goto doexit;
+ }
+ proc_net_ipt_account->owner = THIS_MODULE;
+
+ err = ipt_register_match(&account_match);
+ if (err) {
+ printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to register match.\n");
+ remove_proc_entry("ipt_account", proc_net);
+ }
+doexit:
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() left.\n");
+ return err;
+}
+
+static void __exit fini(void)
+{
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() entered.\n");
+
+ ipt_unregister_match(&account_match);
+ /* remove /proc/net/ipt_account/ directory */
+ remove_proc_entry("ipt_account", proc_net);
+
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() left.\n");
+}
+
+module_init(init);
+module_exit(fini);
+
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c
new file mode 100644
index 00000000..63f93a14
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c
@@ -0,0 +1,59 @@
+/*
+
+ bcount match (experimental)
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2 or later.
+
+*/
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ipt_bcount.h>
+
+// #define LOG printk
+#define LOG(...) do { } while (0);
+
+
+static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out,
+ const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ const struct ipt_bcount_match *info = matchinfo;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+ if (!ct) return !info->invert;
+ return ((ct->bcount >= info->min) && (ct->bcount <= info->max)) ^ info->invert;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo,
+ unsigned int matchsize, unsigned int hook_mask)
+{
+ return (matchsize == IPT_ALIGN(sizeof(struct ipt_bcount_match)));
+}
+
+
+static struct ipt_match bcount_match
+= { { NULL, NULL }, "bcount", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ LOG(KERN_INFO "ipt_bcount <" __DATE__ " " __TIME__ "> loaded\n");
+ return ipt_register_match(&bcount_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&bcount_match);
+}
+
+module_init(init);
+module_exit(fini);
+
+
+MODULE_AUTHOR("Jonathan Zarate");
+MODULE_DESCRIPTION("bcount match");
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c
new file mode 100644
index 00000000..c8ee72d5
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c
@@ -0,0 +1,256 @@
+/*-------------------------------------------*\
+| Netfilter Condition Module |
+| |
+| Description: This module allows firewall |
+| rules to match using condition variables |
+| stored in /proc files. |
+| |
+| Author: Stephane Ouellette 2002-10-22 |
+| <ouellettes@videotron.ca> |
+| |
+| History: |
+| 2003-02-10 Second version with improved |
+| locking and simplified code. |
+| |
+| This software is distributed under the |
+| terms of the GNU GPL. |
+\*-------------------------------------------*/
+
+#include<linux/module.h>
+#include<linux/proc_fs.h>
+#include<linux/spinlock.h>
+#include<linux/string.h>
+#include<asm/atomic.h>
+#include<linux/netfilter_ipv4/ip_tables.h>
+#include<linux/netfilter_ipv4/ipt_condition.h>
+
+
+#ifndef CONFIG_PROC_FS
+#error "Proc file system support is required for this module"
+#endif
+
+
+MODULE_AUTHOR("Stephane Ouellette <ouellettes@videotron.ca>");
+MODULE_DESCRIPTION("Allows rules to match against condition variables");
+MODULE_LICENSE("GPL");
+
+
+struct condition_variable {
+ struct condition_variable *next;
+ struct proc_dir_entry *status_proc;
+ atomic_t refcount;
+ int enabled; /* TRUE == 1, FALSE == 0 */
+};
+
+
+static rwlock_t list_lock;
+static struct condition_variable *head = NULL;
+static struct proc_dir_entry *proc_net_condition = NULL;
+
+
+static int
+ipt_condition_read_info(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ struct condition_variable *var =
+ (struct condition_variable *) data;
+
+ if (offset == 0) {
+ *start = buffer;
+ buffer[0] = (var->enabled) ? '1' : '0';
+ buffer[1] = '\n';
+ return 2;
+ }
+
+ *eof = 1;
+ return 0;
+}
+
+
+static int
+ipt_condition_write_info(struct file *file, const char *buffer,
+ unsigned long length, void *data)
+{
+ struct condition_variable *var =
+ (struct condition_variable *) data;
+
+ if (length) {
+ /* Match only on the first character */
+ switch (buffer[0]) {
+ case '0':
+ var->enabled = 0;
+ break;
+ case '1':
+ var->enabled = 1;
+ }
+ }
+
+ return (int) length;
+}
+
+
+static int
+match(const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo, int offset,
+ const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ const struct condition_info *info =
+ (const struct condition_info *) matchinfo;
+ struct condition_variable *var;
+ int condition_status = 0;
+
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ condition_status = var->enabled;
+ break;
+ }
+ }
+
+ read_unlock(&list_lock);
+
+ return condition_status ^ info->invert;
+}
+
+
+
+static int
+checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize, unsigned int hook_mask)
+{
+ struct condition_info *info = (struct condition_info *) matchinfo;
+ struct condition_variable *var, *newvar;
+
+ if (matchsize != IPT_ALIGN(sizeof(struct condition_info)))
+ return 0;
+
+ /* The first step is to check if the condition variable already exists. */
+ /* Here, a read lock is sufficient because we won't change the list */
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ atomic_inc(&var->refcount);
+ read_unlock(&list_lock);
+ return 1;
+ }
+ }
+
+ read_unlock(&list_lock);
+
+ /* At this point, we need to allocate a new condition variable */
+ newvar = kmalloc(sizeof(struct condition_variable), GFP_KERNEL);
+
+ if (!newvar)
+ return -ENOMEM;
+
+ /* Create the condition variable's proc file entry */
+ newvar->status_proc = create_proc_entry(info->name, 0644, proc_net_condition);
+
+ if (!newvar->status_proc) {
+ /*
+ * There are two possibilities:
+ * 1- Another condition variable with the same name has been created, which is valid.
+ * 2- There was a memory allocation error.
+ */
+ kfree(newvar);
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ atomic_inc(&var->refcount);
+ read_unlock(&list_lock);
+ return 1;
+ }
+ }
+
+ read_unlock(&list_lock);
+ return -ENOMEM;
+ }
+
+ atomic_set(&newvar->refcount, 1);
+ newvar->enabled = 0;
+ newvar->status_proc->owner = THIS_MODULE;
+ newvar->status_proc->data = newvar;
+ wmb();
+ newvar->status_proc->read_proc = ipt_condition_read_info;
+ newvar->status_proc->write_proc = ipt_condition_write_info;
+
+ write_lock(&list_lock);
+
+ newvar->next = head;
+ head = newvar;
+
+ write_unlock(&list_lock);
+
+ return 1;
+}
+
+
+static void
+destroy(void *matchinfo, unsigned int matchsize)
+{
+ struct condition_info *info = (struct condition_info *) matchinfo;
+ struct condition_variable *var, *prev = NULL;
+
+ if (matchsize != IPT_ALIGN(sizeof(struct condition_info)))
+ return;
+
+ write_lock(&list_lock);
+
+ for (var = head; var && strcmp(info->name, var->status_proc->name);
+ prev = var, var = var->next);
+
+ if (var && atomic_dec_and_test(&var->refcount)) {
+ if (prev)
+ prev->next = var->next;
+ else
+ head = var->next;
+
+ write_unlock(&list_lock);
+ remove_proc_entry(var->status_proc->name, proc_net_condition);
+ kfree(var);
+ } else
+ write_unlock(&list_lock);
+}
+
+
+static struct ipt_match condition_match = {
+ .name = "condition",
+ .match = &match,
+ .checkentry = &checkentry,
+ .destroy = &destroy,
+ .me = THIS_MODULE
+};
+
+
+static int __init
+init(void)
+{
+ int errorcode;
+
+ rwlock_init(&list_lock);
+ proc_net_condition = proc_mkdir("ipt_condition", proc_net);
+
+ if (proc_net_condition) {
+ errorcode = ipt_register_match(&condition_match);
+
+ if (errorcode)
+ remove_proc_entry("ipt_condition", proc_net);
+ } else
+ errorcode = -EACCES;
+
+ return errorcode;
+}
+
+
+static void __exit
+fini(void)
+{
+ ipt_unregister_match(&condition_match);
+ remove_proc_entry("ipt_condition", proc_net);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c
new file mode 100644
index 00000000..abf8efff
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c
@@ -0,0 +1,222 @@
+/*
+ * netfilter module to limit the number of parallel tcp
+ * connections per IP address.
+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
+ * only ignore TIME_WAIT or gone connections
+ *
+ * based on ...
+ *
+ * Kernel module to match connection tracking information.
+ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connlimit.h>
+
+#define DEBUG 0
+
+MODULE_LICENSE("GPL");
+
+/* we'll save the tuples of all connections we care about */
+struct ipt_connlimit_conn
+{
+ struct list_head list;
+ struct ip_conntrack_tuple tuple;
+};
+
+struct ipt_connlimit_data {
+ spinlock_t lock;
+ struct list_head iphash[256];
+};
+
+static inline unsigned ipt_iphash(const unsigned addr)
+{
+ return ((addr ^ (addr >> 8) ^ (addr >> 16) ^ (addr >> 24)) & 0xff);
+}
+
+static int count_them(struct ipt_connlimit_data *data,
+ u_int32_t addr, u_int32_t mask,
+ struct ip_conntrack *ct)
+{
+#if DEBUG
+ const static char *tcp[] = { "none", "established", "syn_sent", "syn_recv",
+ "fin_wait", "time_wait", "close", "close_wait",
+ "last_ack", "listen" };
+#endif
+ int addit = 1, matches = 0;
+ struct ip_conntrack_tuple tuple;
+ struct ip_conntrack_tuple_hash *found;
+ struct ipt_connlimit_conn *conn;
+ struct list_head *hash,*lh;
+
+ spin_lock_bh(&data->lock);
+ tuple = ct->tuplehash[0].tuple;
+ hash = &data->iphash[ipt_iphash(addr & mask)];
+
+ /* check the saved connections */
+ for (lh = hash->next; lh != hash; lh = lh->next) {
+ conn = list_entry(lh,struct ipt_connlimit_conn,list);
+ found = ip_conntrack_find_get(&conn->tuple,ct);
+ if (found != NULL &&
+ 0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) &&
+ found->ctrack->proto.tcp.state != TCP_CONNTRACK_TIME_WAIT) {
+ /* Just to be sure we have it only once in the list.
+ We should'nt see tuples twice unless someone hooks this
+ into a table without "-p tcp --syn" */
+ addit = 0;
+ }
+#if DEBUG
+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n",
+ ipt_iphash(addr & mask),
+ NIPQUAD(conn->tuple.src.ip), ntohs(conn->tuple.src.u.tcp.port),
+ NIPQUAD(conn->tuple.dst.ip), ntohs(conn->tuple.dst.u.tcp.port),
+ (NULL != found) ? tcp[found->ctrack->proto.tcp.state] : "gone");
+#endif
+ if (NULL == found) {
+ /* this one is gone */
+ lh = lh->prev;
+ list_del(lh->next);
+ kfree(conn);
+ continue;
+ }
+ if (found->ctrack->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT) {
+ /* we don't care about connections which are
+ closed already -> ditch it */
+ lh = lh->prev;
+ list_del(lh->next);
+ kfree(conn);
+ nf_conntrack_put(&found->ctrack->infos[0]);
+ continue;
+ }
+ if ((addr & mask) == (conn->tuple.src.ip & mask)) {
+ /* same source IP address -> be counted! */
+ matches++;
+ }
+ nf_conntrack_put(&found->ctrack->infos[0]);
+ }
+ if (addit) {
+ /* save the new connection in our list */
+#if DEBUG
+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n",
+ ipt_iphash(addr & mask),
+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
+#endif
+ conn = kmalloc(sizeof(*conn),GFP_ATOMIC);
+ if (NULL == conn)
+ return -1;
+ memset(conn,0,sizeof(*conn));
+ INIT_LIST_HEAD(&conn->list);
+ conn->tuple = tuple;
+ list_add(&conn->list,hash);
+ matches++;
+ }
+ spin_unlock_bh(&data->lock);
+ return matches;
+}
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_connlimit_info *info = matchinfo;
+ int connections, match;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+ if (NULL == ct) {
+ printk("ipt_connlimit: Oops: invalid ct state ?\n");
+ *hotdrop = 1;
+ return 0;
+ }
+ connections = count_them(info->data,skb->nh.iph->saddr,info->mask,ct);
+ if (-1 == connections) {
+ printk("ipt_connlimit: Hmm, kmalloc failed :-(\n");
+ *hotdrop = 1; /* let's free some memory :-) */
+ return 0;
+ }
+ match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit);
+#if DEBUG
+ printk("ipt_connlimit: src=%u.%u.%u.%u mask=%u.%u.%u.%u "
+ "connections=%d limit=%d match=%s\n",
+ NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask),
+ connections, info->limit, match ? "yes" : "no");
+#endif
+
+ return match;
+}
+
+static int check(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ struct ipt_connlimit_info *info = matchinfo;
+ int i;
+
+ /* verify size */
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info)))
+ return 0;
+
+ /* refuse anything but tcp */
+ if (ip->proto != IPPROTO_TCP)
+ return 0;
+
+ /* init private data */
+ info->data = kmalloc(sizeof(struct ipt_connlimit_data),GFP_KERNEL);
+ spin_lock_init(&(info->data->lock));
+ for (i = 0; i < 256; i++)
+ INIT_LIST_HEAD(&(info->data->iphash[i]));
+
+ return 1;
+}
+
+static void destroy(void *matchinfo, unsigned int matchinfosize)
+{
+ struct ipt_connlimit_info *info = matchinfo;
+ struct ipt_connlimit_conn *conn;
+ struct list_head *hash;
+ int i;
+
+ /* cleanup */
+ for (i = 0; i < 256; i++) {
+ hash = &(info->data->iphash[i]);
+ while (hash != hash->next) {
+ conn = list_entry(hash->next,struct ipt_connlimit_conn,list);
+ list_del(hash->next);
+ kfree(conn);
+ }
+ }
+ kfree(info->data);
+}
+
+static struct ipt_match connlimit_match
+= { { NULL, NULL }, "connlimit", &match, &check, &destroy, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_match(&connlimit_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&connlimit_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c
new file mode 100644
index 00000000..d795a339
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c
@@ -0,0 +1,83 @@
+/* This kernel module matches connection mark values set by the
+ * CONNMARK target
+ *
+ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+ * by Henrik Nordstrom <hno@marasystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
+MODULE_DESCRIPTION("IP tables connmark match module");
+MODULE_LICENSE("GPL");
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_connmark.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_connmark_info *info = matchinfo;
+ enum ip_conntrack_info ctinfo;
+ struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ipt_match connmark_match = {
+ .name = "connmark",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&connmark_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&connmark_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c
new file mode 100644
index 00000000..1b682b9c
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c
@@ -0,0 +1,57 @@
+/*
+
+ Experimental Netfilter Crap
+ Copyright (C) 2006 Jonathan Zarate
+
+*/
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_exp.h>
+#include "../../bridge/br_private.h"
+
+
+static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out,
+ const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+// const struct ipt_exp_info *info = matchinfo;
+
+ if ((skb->mac.raw >= skb->head) && ((skb->mac.raw + ETH_HLEN) <= skb->data)) {
+ printk(KERN_INFO "exp src=%02X:%02X:%02X:%02X:%02X:%02X dst=%02X:%02X:%02X:%02X:%02X:%02X\n",
+ skb->mac.ethernet->h_source[0], skb->mac.ethernet->h_source[1], skb->mac.ethernet->h_source[2],
+ skb->mac.ethernet->h_source[3], skb->mac.ethernet->h_source[4], skb->mac.ethernet->h_source[5],
+ skb->mac.ethernet->h_dest[0], skb->mac.ethernet->h_dest[1], skb->mac.ethernet->h_dest[2],
+ skb->mac.ethernet->h_dest[3], skb->mac.ethernet->h_dest[4], skb->mac.ethernet->h_dest[5]);
+ return 1;
+ }
+ printk(KERN_INFO "exp mac=%p head=%p in=%p\n", skb->mac.raw, skb->head, in);
+ return 0;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo,
+ unsigned int matchsize, unsigned int hook_mask)
+{
+ return (matchsize == IPT_ALIGN(sizeof(struct ipt_exp_info)));
+}
+
+static struct ipt_match exp_match
+ = { { NULL, NULL }, "exp", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ printk(KERN_INFO "exp init " __DATE__ " " __TIME__ "\n");
+ return ipt_register_match(&exp_match);
+}
+
+static void __exit fini(void)
+{
+ printk(KERN_INFO "exp fini\n");
+ ipt_unregister_match(&exp_match);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c
new file mode 100644
index 00000000..fbd1a95c
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c
@@ -0,0 +1,272 @@
+/* netfilter's kernel module for the geoip match
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Copyright (c) 2004 Cookinglinux
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter_ipv4/ipt_geoip.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Samuel Jean, Nicolas Bouliane");
+MODULE_DESCRIPTION("iptables/netfilter's geoip match");
+
+struct geoip_info *head = NULL;
+static spinlock_t geoip_lock = SPIN_LOCK_UNLOCKED;
+
+static struct geoip_info *add_node(struct geoip_info *memcpy)
+{
+ struct geoip_info *p =
+ (struct geoip_info *)kmalloc(sizeof(struct geoip_info), GFP_KERNEL);
+
+ struct geoip_subnet *s;
+
+ if ((p == NULL) || (copy_from_user(p, memcpy, sizeof(struct geoip_info)) != 0))
+ return NULL;
+
+ s = (struct geoip_subnet *)kmalloc(p->count * sizeof(struct geoip_subnet), GFP_KERNEL);
+ if ((s == NULL) || (copy_from_user(s, p->subnets, p->count * sizeof(struct geoip_subnet)) != 0))
+ return NULL;
+
+ spin_lock_bh(&geoip_lock);
+
+ p->subnets = s;
+ p->ref = 1;
+ p->next = head;
+ p->prev = NULL;
+ if (p->next) p->next->prev = p;
+ head = p;
+
+ spin_unlock_bh(&geoip_lock);
+ return p;
+}
+
+static void remove_node(struct geoip_info *p)
+ {
+ spin_lock_bh(&geoip_lock);
+
+ if (p->next) { /* Am I following a node ? */
+ p->next->prev = p->prev;
+ if (p->prev) p->prev->next = p->next; /* Is there a node behind me ? */
+ else head = p->next; /* No? Then I was the head */
+ }
+
+ else
+ if (p->prev) /* Is there a node behind me ? */
+ p->prev->next = NULL;
+ else
+ head = NULL; /* No, we're alone */
+
+ /* So now am unlinked or the only one alive, right ?
+ * What are you waiting ? Free up some memory!
+ */
+
+ kfree(p->subnets);
+ kfree(p);
+
+ spin_unlock_bh(&geoip_lock);
+ return;
+}
+
+static struct geoip_info *find_node(u_int16_t cc)
+{
+ struct geoip_info *p = head;
+ spin_lock_bh(&geoip_lock);
+
+ while (p) {
+ if (p->cc == cc) {
+ spin_unlock_bh(&geoip_lock);
+ return p;
+ }
+ p = p->next;
+ }
+ spin_unlock_bh(&geoip_lock);
+ return NULL;
+}
+
+static int match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_geoip_info *info = matchinfo;
+ const struct geoip_info *node; /* This keeps the code sexy */
+ const struct iphdr *iph = skb->nh.iph;
+ u_int32_t ip, j;
+ u_int8_t i;
+
+ if (info->flags & IPT_GEOIP_SRC)
+ ip = ntohl(iph->saddr);
+ else
+ ip = ntohl(iph->daddr);
+
+ spin_lock_bh(&geoip_lock);
+ for (i = 0; i < info->count; i++) {
+ if ((node = info->mem[i]) == NULL) {
+ printk(KERN_ERR "ipt_geoip: what the hell ?? '%c%c' isn't loaded into memory... skip it!\n",
+ COUNTRY(info->cc[i]));
+
+ continue;
+ }
+
+ for (j = 0; j < node->count; j++)
+ if ((ip > node->subnets[j].begin) && (ip < node->subnets[j].end)) {
+ spin_unlock_bh(&geoip_lock);
+ return (info->flags & IPT_GEOIP_INV) ? 0 : 1;
+ }
+ }
+
+ spin_unlock_bh(&geoip_lock);
+ return (info->flags & IPT_GEOIP_INV) ? 1 : 0;
+}
+
+static int geoip_checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ struct ipt_geoip_info *info = matchinfo;
+ struct geoip_info *node;
+ u_int8_t i;
+
+ /* FIXME: Call a function to free userspace allocated memory.
+ * As Martin J. said; this match might eat lot of memory
+ * if commited with iptables-restore --noflush
+ void (*gfree)(struct geoip_info *oldmem);
+ gfree = info->fini;
+ */
+
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_geoip_info))) {
+ printk(KERN_ERR "ipt_geoip: matchsize differ, you may have forgotten to recompile me\n");
+ return 0;
+ }
+
+ /* If info->refcount isn't NULL, then
+ * it means that checkentry() already
+ * initialized this entry. Increase a
+ * refcount to prevent destroy() of
+ * this entry. */
+ if (info->refcount != NULL) {
+ atomic_inc((atomic_t *)info->refcount);
+ return 1;
+ }
+
+
+ for (i = 0; i < info->count; i++) {
+
+ if ((node = find_node(info->cc[i])) != NULL)
+ atomic_inc((atomic_t *)&node->ref); //increase the reference
+ else
+ if ((node = add_node(info->mem[i])) == NULL) {
+ printk(KERN_ERR
+ "ipt_geoip: unable to load '%c%c' into memory\n",
+ COUNTRY(info->cc[i]));
+ return 0;
+ }
+
+ /* Free userspace allocated memory for that country.
+ * FIXME: It's a bit odd to call this function everytime
+ * we process a country. Would be nice to call
+ * it once after all countries've been processed.
+ * - SJ
+ * *not implemented for now*
+ gfree(info->mem[i]);
+ */
+
+ /* Overwrite the now-useless pointer info->mem[i] with
+ * a pointer to the node's kernelspace structure.
+ * This avoids searching for a node in the match() and
+ * destroy() functions.
+ */
+ info->mem[i] = node;
+ }
+
+ /* We allocate some memory and give info->refcount a pointer
+ * to this memory. This prevents checkentry() from increasing a refcount
+ * different from the one used by destroy().
+ * For explanation, see http://www.mail-archive.com/netfilter-devel@lists.samba.org/msg00625.html
+ */
+ info->refcount = kmalloc(sizeof(u_int8_t), GFP_KERNEL);
+ if (info->refcount == NULL) {
+ printk(KERN_ERR "ipt_geoip: failed to allocate `refcount' memory\n");
+ return 0;
+ }
+ *(info->refcount) = 1;
+
+ return 1;
+}
+
+static void geoip_destroy(void *matchinfo, unsigned int matchsize)
+{
+ struct ipt_geoip_info *info = matchinfo;
+ struct geoip_info *node; /* this keeps the code sexy */
+ u_int8_t i;
+
+ /* Decrease the previously increased refcount in checkentry()
+ * If it's equal to 1, we know this entry is just moving
+ * but not removed. We simply return to avoid useless destroy()
+ * processing.
+ */
+ atomic_dec((atomic_t *)info->refcount);
+ if (*info->refcount)
+ return;
+
+ /* Don't leak my memory, you idiot.
+ * Bug found with nfsim.. the netfilter's best
+ * friend. --peejix */
+ kfree(info->refcount);
+
+ /* This entry has been removed from the table so
+ * decrease the refcount of all countries it is
+ * using.
+ */
+
+ for (i = 0; i < info->count; i++)
+ if ((node = info->mem[i]) != NULL) {
+ atomic_dec((atomic_t *)&node->ref);
+
+ /* Free up some memory if that node isn't used
+ * anymore. */
+ if (node->ref < 1)
+ remove_node(node);
+ }
+ else
+ /* Something strange happened. There's no memory allocated for this
+ * country. Please send this bug to the mailing list. */
+ printk(KERN_ERR
+ "ipt_geoip: What happened peejix ? What happened acidmen ?\n"
+ "ipt_geoip: please report this bug to the maintainers\n");
+ return;
+}
+
+static struct ipt_match geoip_match
+= { { NULL, NULL }, "geoip", &match, &geoip_checkentry, &geoip_destroy, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_match(&geoip_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&geoip_match);
+ return;
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c
new file mode 100644
index 00000000..c36b2005
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c
@@ -0,0 +1,868 @@
+#if defined(MODVERSIONS)
+#include <linux/modversions.h>
+#endif
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ipp2p.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#define get_u8(X,O) (*(__u8 *)(X + O))
+#define get_u16(X,O) (*(__u16 *)(X + O))
+#define get_u32(X,O) (*(__u32 *)(X + O))
+
+MODULE_AUTHOR("Eicke Friedrich/Klaus Degner <ipp2p@ipp2p.org>");
+MODULE_DESCRIPTION("An extension to iptables to identify P2P traffic.");
+MODULE_LICENSE("GPL");
+
+
+/*Search for UDP eDonkey/eMule/Kad commands*/
+int
+udp_search_edk (unsigned char *haystack, int packet_len)
+{
+ unsigned char *t = haystack;
+ t += 8;
+
+ switch (t[0]) {
+ case 0xe3:
+ { /*edonkey*/
+ switch (t[1])
+ {
+ /* client -> server status request */
+ case 0x96:
+ if (packet_len == 14) return ((IPP2P_EDK * 100) + 50);
+ break;
+ /* server -> client status request */
+ case 0x97: if (packet_len == 42) return ((IPP2P_EDK * 100) + 51);
+ break;
+ /* server description request */
+ /* e3 2a ff f0 .. | size == 6 */
+ case 0xa2: if ( (packet_len == 14) && ( get_u16(t,2) == __constant_htons(0xfff0) ) ) return ((IPP2P_EDK * 100) + 52);
+ break;
+ /* server description response */
+ /* e3 a3 ff f0 .. | size > 40 && size < 200 */
+ //case 0xa3: return ((IPP2P_EDK * 100) + 53);
+ // break;
+ case 0x9a: if (packet_len==26) return ((IPP2P_EDK * 100) + 54);
+ break;
+
+ case 0x92: if (packet_len==18) return ((IPP2P_EDK * 100) + 55);
+ break;
+ }
+ break;
+ }
+ case 0xe4:
+ {
+ switch (t[1])
+ {
+ /* e4 20 .. | size == 43 */
+ case 0x20: if ((packet_len == 43) && (t[2] != 0x00) && (t[34] != 0x00)) return ((IPP2P_EDK * 100) + 60);
+ break;
+ /* e4 00 .. 00 | size == 35 ? */
+ case 0x00: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 61);
+ break;
+ /* e4 10 .. 00 | size == 35 ? */
+ case 0x10: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 62);
+ break;
+ /* e4 18 .. 00 | size == 35 ? */
+ case 0x18: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 63);
+ break;
+ /* e4 52 .. | size = 44 */
+ case 0x52: if (packet_len == 44 ) return ((IPP2P_EDK * 100) + 64);
+ break;
+ /* e4 58 .. | size == 6 */
+ case 0x58: if (packet_len == 14 ) return ((IPP2P_EDK * 100) + 65);
+ break;
+ /* e4 59 .. | size == 2 */
+ case 0x59: if (packet_len == 10 )return ((IPP2P_EDK * 100) + 66);
+ break;
+ /* e4 28 .. | packet_len == 52,77,102,127... */
+ case 0x28: if (((packet_len-52) % 25) == 0) return ((IPP2P_EDK * 100) + 67);
+ break;
+ /* e4 50 xx xx | size == 4 */
+ case 0x50: if (packet_len == 12) return ((IPP2P_EDK * 100) + 68);
+ break;
+ /* e4 40 xx xx | size == 48 */
+ case 0x40: if (packet_len == 56) return ((IPP2P_EDK * 100) + 69);
+ break;
+ }
+ break;
+ }
+ } /* end of switch (t[0]) */
+ return 0;
+}/*udp_search_edk*/
+
+
+/*Search for UDP Gnutella commands*/
+int
+udp_search_gnu (unsigned char *haystack, int packet_len)
+{
+ unsigned char *t = haystack;
+ t += 8;
+
+ if (memcmp(t, "GND", 3) == 0) return ((IPP2P_GNU * 100) + 51);
+ if (memcmp(t, "GNUTELLA ", 9) == 0) return ((IPP2P_GNU * 100) + 52);
+ return 0;
+}/*udp_search_gnu*/
+
+
+/*Search for UDP KaZaA commands*/
+int
+udp_search_kazaa (unsigned char *haystack, int packet_len)
+{
+ unsigned char *t = haystack;
+
+ if (t[packet_len-1] == 0x00){
+ t += (packet_len - 6);
+ if (memcmp(t, "KaZaA", 5) == 0) return (IPP2P_KAZAA * 100 +50);
+ }
+
+ return 0;
+}/*udp_search_kazaa*/
+
+/*Search for UDP DirectConnect commands*/
+int
+udp_search_directconnect (unsigned char *haystack, int packet_len)
+{
+ unsigned char *t = haystack;
+ if ((*(t + 8) == 0x24) && (*(t + packet_len - 1) == 0x7c)) {
+ t+=8;
+ if (memcmp(t, "SR ", 3) == 0) return ((IPP2P_DC * 100) + 60);
+ if (memcmp(t, "Ping ", 5) == 0) return ((IPP2P_DC * 100) + 61);
+ }
+ return 0;
+}/*udp_search_directconnect*/
+
+
+
+/*Search for UDP BitTorrent commands*/
+int
+udp_search_bit (unsigned char *haystack, int packet_len)
+{
+ switch(packet_len)
+ {
+ case 24:
+ /* ^ 00 00 04 17 27 10 19 80 */
+ if ((ntohl(get_u32(haystack, 8)) == 0x00000417) && (ntohl(get_u32(haystack, 12)) == 0x27101980))
+ return (IPP2P_BIT * 100 + 50);
+ break;
+ case 44:
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000400) && get_u32(haystack, 36) == __constant_htonl(0x00000104))
+ return (IPP2P_BIT * 100 + 51);
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000400))
+ return (IPP2P_BIT * 100 + 61);
+ break;
+ case 65:
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000404) && get_u32(haystack, 36) == __constant_htonl(0x00000104))
+ return (IPP2P_BIT * 100 + 52);
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000404))
+ return (IPP2P_BIT * 100 + 62);
+ break;
+ case 67:
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000406) && get_u32(haystack, 36) == __constant_htonl(0x00000104))
+ return (IPP2P_BIT * 100 + 53);
+ if (get_u32(haystack, 16) == __constant_htonl(0x00000406))
+ return (IPP2P_BIT * 100 + 63);
+ break;
+ case 211:
+ if (get_u32(haystack, 8) == __constant_htonl(0x00000405))
+ return (IPP2P_BIT * 100 + 54);
+ break;
+ case 29:
+ if ((get_u32(haystack, 8) == __constant_htonl(0x00000401)))
+ return (IPP2P_BIT * 100 + 55);
+ break;
+ case 52:
+ if (get_u32(haystack,8) == __constant_htonl(0x00000827) &&
+ get_u32(haystack,12) == __constant_htonl(0x37502950))
+ return (IPP2P_BIT * 100 + 80);
+ break;
+ default:
+ /* this packet does not have a constant size */
+ if (packet_len >= 40 && get_u32(haystack, 16) == __constant_htonl(0x00000402) && get_u32(haystack, 36) == __constant_htonl(0x00000104))
+ return (IPP2P_BIT * 100 + 56);
+ break;
+ }
+
+ /* some extra-bitcomet rules:
+ * "d1:" [a|r] "d2:id20:"
+ */
+ if (packet_len > 30 && get_u8(haystack, 8) == 'd' && get_u8(haystack, 9) == '1' && get_u8(haystack, 10) == ':' )
+ {
+ if (get_u8(haystack, 11) == 'a' || get_u8(haystack, 11) == 'r')
+ {
+ if (memcmp(haystack+12,"d2:id20:",8)==0)
+ return (IPP2P_BIT * 100 + 57);
+ }
+ }
+
+#if 0
+ /* bitlord rules */
+ /* packetlen must be bigger than 40 */
+ /* first 4 bytes are zero */
+ if (packet_len > 40 && get_u32(haystack, 8) == 0x00000000)
+ {
+ /* first rule: 00 00 00 00 01 00 00 xx xx xx xx 00 00 00 00*/
+ if (get_u32(haystack, 12) == 0x00000000 &&
+ get_u32(haystack, 16) == 0x00010000 &&
+ get_u32(haystack, 24) == 0x00000000 )
+ return (IPP2P_BIT * 100 + 71);
+
+ /* 00 01 00 00 0d 00 00 xx xx xx xx 00 00 00 00*/
+ if (get_u32(haystack, 12) == 0x00000001 &&
+ get_u32(haystack, 16) == 0x000d0000 &&
+ get_u32(haystack, 24) == 0x00000000 )
+ return (IPP2P_BIT * 100 + 71);
+
+
+ }
+#endif
+
+ return 0;
+}/*udp_search_bit*/
+
+
+
+/*Search for Ares commands*/
+//#define IPP2P_DEBUG_ARES
+int
+search_ares (const unsigned char *payload, const u16 plen)
+//int search_ares (unsigned char *haystack, int packet_len, int head_len)
+{
+// const unsigned char *t = haystack + head_len;
+
+ /* all ares packets start with */
+ if (payload[1] == 0 && (plen - payload[0]) == 3)
+ {
+ switch (payload[2])
+ {
+ case 0x5a:
+ /* ares connect */
+ if ( plen == 6 && payload[5] == 0x05 ) return ((IPP2P_ARES * 100) + 1);
+ break;
+ case 0x09:
+ /* ares search, min 3 chars --> 14 bytes
+ * lets define a search can be up to 30 chars --> max 34 bytes
+ */
+ if ( plen >= 14 && plen <= 34 ) return ((IPP2P_ARES * 100) + 1);
+ break;
+#ifdef IPP2P_DEBUG_ARES
+ default:
+ printk(KERN_DEBUG "Unknown Ares command %x recognized, len: %u \n", (unsigned int) payload[2],plen);
+#endif /* IPP2P_DEBUG_ARES */
+ }
+ }
+
+#if 0
+ /* found connect packet: 03 00 5a 04 03 05 */
+ /* new version ares 1.8: 03 00 5a xx xx 05 */
+ if ((plen) == 6){ /* possible connect command*/
+ if ((payload[0] == 0x03) && (payload[1] == 0x00) && (payload[2] == 0x5a) && (payload[5] == 0x05))
+ return ((IPP2P_ARES * 100) + 1);
+ }
+ if ((plen) == 60){ /* possible download command*/
+ if ((payload[59] == 0x0a) && (payload[58] == 0x0a)){
+ if (memcmp(t, "PUSH SHA1:", 10) == 0) /* found download command */
+ return ((IPP2P_ARES * 100) + 2);
+ }
+ }
+#endif
+
+ return 0;
+} /*search_ares*/
+
+/*Search for SoulSeek commands*/
+int
+search_soul (const unsigned char *payload, const u16 plen)
+{
+//#define IPP2P_DEBUG_SOUL
+ /* match: xx xx xx xx | xx = sizeof(payload) - 4 */
+ if (get_u32(payload, 0) == (plen - 4)){
+ const __u32 m=get_u32(payload, 4);
+ /* match 00 yy yy 00, yy can be everything */
+ if ( get_u8(payload, 4) == 0x00 && get_u8(payload, 7) == 0x00 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "0: Soulseek command 0x%x recognized\n",get_u32(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 1);
+ }
+
+ /* next match: 01 yy 00 00 | yy can be everything */
+ if ( get_u8(payload, 4) == 0x01 && get_u16(payload, 6) == 0x0000 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "1: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 2);
+ }
+
+ /* other soulseek commandos are: 1-5,7,9,13-18,22,23,26,28,35-37,40-46,50,51,60,62-69,91,92,1001 */
+ /* try to do this in an intelligent way */
+ /* get all small commandos */
+ switch(m)
+ {
+ case 7:
+ case 9:
+ case 22:
+ case 23:
+ case 26:
+ case 28:
+ case 50:
+ case 51:
+ case 60:
+ case 91:
+ case 92:
+ case 1001:
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "2: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 3);
+ }
+
+ if (m > 0 && m < 6 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "3: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 4);
+ }
+ if (m > 12 && m < 19 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "4: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 5);
+ }
+
+ if (m > 34 && m < 38 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "5: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 6);
+ }
+
+ if (m > 39 && m < 47 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "6: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 7);
+ }
+
+ if (m > 61 && m < 70 )
+ {
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "7: Soulseek command 0x%x recognized\n",get_u16(payload, 4));
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 8);
+ }
+
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "unknown SOULSEEK command: 0x%x, first 16 bit: 0x%x, first 8 bit: 0x%x ,soulseek ???\n",get_u32(payload, 4),get_u16(payload, 4) >> 16,get_u8(payload, 4) >> 24);
+#endif /* IPP2P_DEBUG_SOUL */
+ }
+
+ /* match 14 00 00 00 01 yy 00 00 00 STRING(YY) 01 00 00 00 00 46|50 00 00 00 00 */
+ /* without size at the beginning !!! */
+ if ( get_u32(payload, 0) == 0x14 && get_u8(payload, 4) == 0x01 )
+ {
+ __u32 y=get_u32(payload, 5);
+ /* we need 19 chars + string */
+ if ( (y + 19) <= (plen) )
+ {
+ const unsigned char *w=payload+9+y;
+ if (get_u32(w, 0) == 0x01 && ( get_u16(w, 4) == 0x4600 || get_u16(w, 4) == 0x5000) && get_u32(w, 6) == 0x00);
+#ifdef IPP2P_DEBUG_SOUL
+ printk(KERN_DEBUG "Soulssek special client command recognized\n");
+#endif /* IPP2P_DEBUG_SOUL */
+ return ((IPP2P_SOUL * 100) + 9);
+ }
+ }
+ return 0;
+}
+
+
+/*Search for WinMX commands*/
+int
+search_winmx (const unsigned char *payload, const u16 plen)
+{
+//#define IPP2P_DEBUG_WINMX
+ if (((plen) == 4) && (memcmp(payload, "SEND", 4) == 0)) return ((IPP2P_WINMX * 100) + 1);
+ if (((plen) == 3) && (memcmp(payload, "GET", 3) == 0)) return ((IPP2P_WINMX * 100) + 2);
+ //if (packet_len < (head_len + 10)) return 0;
+ if (plen < 10) return 0;
+
+ if ((memcmp(payload, "SEND", 4) == 0) || (memcmp(payload, "GET", 3) == 0)){
+ u16 c=4;
+ const u16 end=plen-2;
+ u8 count=0;
+ while (c < end)
+ {
+ if (payload[c]== 0x20 && payload[c+1] == 0x22)
+ {
+ c++;
+ count++;
+ if (count>=2) return ((IPP2P_WINMX * 100) + 3);
+ }
+ c++;
+ }
+ }
+
+ if ( plen == 149 && payload[0] == '8' )
+ {
+#ifdef IPP2P_DEBUG_WINMX
+ printk(KERN_INFO "maybe WinMX\n");
+#endif
+ if (get_u32(payload,17) == 0 && get_u32(payload,21) == 0 && get_u32(payload,25) == 0 &&
+// get_u32(payload,33) == __constant_htonl(0x71182b1a) && get_u32(payload,37) == __constant_htonl(0x05050000) &&
+// get_u32(payload,133) == __constant_htonl(0x31097edf) && get_u32(payload,145) == __constant_htonl(0xdcb8f792))
+ get_u16(payload,39) == 0 && get_u16(payload,135) == __constant_htons(0x7edf) && get_u16(payload,147) == __constant_htons(0xf792))
+
+ {
+#ifdef IPP2P_DEBUG_WINMX
+ printk(KERN_INFO "got WinMX\n");
+#endif
+ return ((IPP2P_WINMX * 100) + 4);
+ }
+ }
+ return 0;
+} /*search_winmx*/
+
+
+/*Search for appleJuice commands*/
+int
+search_apple (const unsigned char *payload, const u16 plen)
+{
+ if ( (plen > 7) && (payload[6] == 0x0d) && (payload[7] == 0x0a) && (memcmp(payload, "ajprot", 6) == 0)) return (IPP2P_APPLE * 100);
+
+ return 0;
+}
+
+
+/*Search for BitTorrent commands*/
+int
+search_bittorrent (const unsigned char *payload, const u16 plen)
+{
+ if (plen > 20)
+ {
+ /* test for match 0x13+"BitTorrent protocol" */
+ if (payload[0] == 0x13)
+ {
+ if (memcmp(payload+1, "BitTorrent protocol", 19) == 0) return (IPP2P_BIT * 100);
+ }
+
+ /* get tracker commandos, all starts with GET /
+ * then it can follow: scrape| announce
+ * and then ?hash_info=
+ */
+ if (memcmp(payload,"GET /",5) == 0)
+ {
+ /* message scrape */
+ if ( memcmp(payload+5,"scrape?info_hash=",17)==0 ) return (IPP2P_BIT * 100 + 1);
+ /* message announce */
+ if ( memcmp(payload+5,"announce?info_hash=",19)==0 ) return (IPP2P_BIT * 100 + 2);
+ }
+ }
+ else
+ {
+ /* bitcomet encryptes the first packet, so we have to detect another
+ * one later in the flow */
+ /* first try failed, too many missdetections */
+ //if ( size == 5 && get_u32(t,0) == __constant_htonl(1) && t[4] < 3) return (IPP2P_BIT * 100 + 3);
+
+ /* second try: block request packets */
+ if ( plen == 17 && get_u32(payload,0) == __constant_htonl(0x0d) && payload[4] == 0x06 && get_u32(payload,13) == __constant_htonl(0x4000) ) return (IPP2P_BIT * 100 + 3);
+ }
+
+ return 0;
+}
+
+
+
+/*check for Kazaa get command*/
+int
+search_kazaa (const unsigned char *payload, const u16 plen)
+
+{
+ if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a) && memcmp(payload, "GET /.hash=", 11) == 0)
+ return (IPP2P_DATA_KAZAA * 100);
+
+ return 0;
+}
+
+
+/*check for gnutella get command*/
+int
+search_gnu (const unsigned char *payload, const u16 plen)
+{
+ if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a))
+ {
+ if (memcmp(payload, "GET /get/", 9) == 0) return ((IPP2P_DATA_GNU * 100) + 1);
+ if (memcmp(payload, "GET /uri-res/", 13) == 0) return ((IPP2P_DATA_GNU * 100) + 2);
+ }
+ return 0;
+}
+
+
+/*check for gnutella get commands and other typical data*/
+int
+search_all_gnu (const unsigned char *payload, const u16 plen)
+{
+
+ if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a))
+ {
+
+ if (memcmp(payload, "GNUTELLA CONNECT/", 17) == 0) return ((IPP2P_GNU * 100) + 1);
+ if (memcmp(payload, "GNUTELLA/", 9) == 0) return ((IPP2P_GNU * 100) + 2);
+
+
+ if ((memcmp(payload, "GET /get/", 9) == 0) || (memcmp(payload, "GET /uri-res/", 13) == 0))
+ {
+ u16 c=8;
+ const u16 end=plen-22;
+ while (c < end) {
+ if ( payload[c] == 0x0a && payload[c+1] == 0x0d && ((memcmp(&payload[c+2], "X-Gnutella-", 11) == 0) || (memcmp(&payload[c+2], "X-Queue:", 8) == 0)))
+ return ((IPP2P_GNU * 100) + 3);
+ c++;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/*check for KaZaA download commands and other typical data*/
+int
+search_all_kazaa (const unsigned char *payload, const u16 plen)
+{
+ if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a))
+ {
+
+ if (memcmp(payload, "GIVE ", 5) == 0) return ((IPP2P_KAZAA * 100) + 1);
+
+ if (memcmp(payload, "GET /", 5) == 0) {
+ u16 c = 8;
+ const u16 end=plen-22;
+ while (c < end) {
+ if ( payload[c] == 0x0a && payload[c+1] == 0x0d && ((memcmp(&payload[c+2], "X-Kazaa-Username: ", 18) == 0) || (memcmp(&payload[c+2], "User-Agent: PeerEnabler/", 24) == 0)))
+ return ((IPP2P_KAZAA * 100) + 2);
+ c++;
+ }
+ }
+ }
+ return 0;
+}
+
+/*fast check for edonkey file segment transfer command*/
+int
+search_edk (const unsigned char *payload, const u16 plen)
+{
+ if (payload[0] != 0xe3)
+ return 0;
+ else {
+ if (payload[5] == 0x47)
+ return (IPP2P_DATA_EDK * 100);
+ else
+ return 0;
+ }
+}
+
+
+
+/*intensive but slower search for some edonkey packets including size-check*/
+int
+search_all_edk (const unsigned char *payload, const u16 plen)
+{
+ if (payload[0] != 0xe3)
+ return 0;
+ else {
+ //t += head_len;
+ const u16 cmd = get_u16(payload, 1);
+ if (cmd == (plen - 5)) {
+ switch (payload[5]) {
+ case 0x01: return ((IPP2P_EDK * 100) + 1); /*Client: hello or Server:hello*/
+ case 0x4c: return ((IPP2P_EDK * 100) + 9); /*Client: Hello-Answer*/
+ }
+ }
+ return 0;
+ }
+}
+
+
+/*fast check for Direct Connect send command*/
+int
+search_dc (const unsigned char *payload, const u16 plen)
+{
+
+ if (payload[0] != 0x24 )
+ return 0;
+ else {
+ if (memcmp(&payload[1], "Send|", 5) == 0)
+ return (IPP2P_DATA_DC * 100);
+ else
+ return 0;
+ }
+
+}
+
+
+/*intensive but slower check for all direct connect packets*/
+int
+search_all_dc (const unsigned char *payload, const u16 plen)
+{
+// unsigned char *t = haystack;
+
+ if (payload[0] == 0x24 && payload[plen-1] == 0x7c)
+ {
+ const unsigned char *t=&payload[1];
+ /* Client-Hub-Protocol */
+ if (memcmp(t, "Lock ", 5) == 0) return ((IPP2P_DC * 100) + 1);
+ /* Client-Client-Protocol, some are already recognized by client-hub (like lock) */
+ if (memcmp(t, "MyNick ", 7) == 0) return ((IPP2P_DC * 100) + 38);
+ }
+ return 0;
+}
+
+/*check for mute*/
+int
+search_mute (const unsigned char *payload, const u16 plen)
+{
+ if ( plen == 209 || plen == 345 || plen == 473 || plen == 609 || plen == 1121 )
+ {
+ //printk(KERN_DEBUG "size hit: %u",size);
+ if (memcmp(payload,"PublicKey: ",11) == 0 )
+ {
+ return ((IPP2P_MUTE * 100) + 0);
+
+/* if (memcmp(t+size-14,"\x0aEndPublicKey\x0a",14) == 0)
+ {
+ printk(KERN_DEBUG "end pubic key hit: %u",size);
+
+ }*/
+ }
+ }
+ return 0;
+}
+
+
+/* check for xdcc */
+int
+search_xdcc (const unsigned char *payload, const u16 plen)
+{
+ /* search in small packets only */
+ if (plen > 20 && plen < 200 && payload[plen-1] == 0x0a && payload[plen-2] == 0x0d && memcmp(payload,"PRIVMSG ",8) == 0)
+ {
+
+ u16 x=10;
+ const u16 end=plen - 13;
+
+ /* is seems to be a irc private massage, chedck for xdcc command */
+ while (x < end)
+ {
+ if (payload[x] == ':')
+ {
+ if ( memcmp(&payload[x+1],"xdcc send #",11) == 0 )
+ return ((IPP2P_XDCC * 100) + 0);
+ }
+ x++;
+ }
+ }
+ return 0;
+}
+
+/* search for waste */
+int search_waste(const unsigned char *payload, const u16 plen)
+{
+ if ( plen >= 8 && memcmp(payload,"GET.sha1:",9) == 0)
+ return ((IPP2P_WASTE * 100) + 0);
+
+ return 0;
+}
+
+
+static struct {
+ int command;
+ __u8 short_hand; /*for fucntions included in short hands*/
+ int packet_len;
+ int (*function_name) (const unsigned char *, const u16);
+} matchlist[] = {
+ {IPP2P_EDK,SHORT_HAND_IPP2P,20, &search_all_edk},
+// {IPP2P_DATA_KAZAA,SHORT_HAND_DATA,200, &search_kazaa},
+// {IPP2P_DATA_EDK,SHORT_HAND_DATA,60, &search_edk},
+// {IPP2P_DATA_DC,SHORT_HAND_DATA,26, &search_dc},
+ {IPP2P_DC,SHORT_HAND_IPP2P,5, search_all_dc},
+// {IPP2P_DATA_GNU,SHORT_HAND_DATA,40, &search_gnu},
+ {IPP2P_GNU,SHORT_HAND_IPP2P,5, &search_all_gnu},
+ {IPP2P_KAZAA,SHORT_HAND_IPP2P,5, &search_all_kazaa},
+ {IPP2P_BIT,SHORT_HAND_IPP2P,20, &search_bittorrent},
+ {IPP2P_APPLE,SHORT_HAND_IPP2P,5, &search_apple},
+ {IPP2P_SOUL,SHORT_HAND_IPP2P,5, &search_soul},
+ {IPP2P_WINMX,SHORT_HAND_IPP2P,2, &search_winmx},
+ {IPP2P_ARES,SHORT_HAND_IPP2P,5, &search_ares},
+ {IPP2P_MUTE,SHORT_HAND_NONE,200, &search_mute},
+ {IPP2P_WASTE,SHORT_HAND_NONE,5, &search_waste},
+ {IPP2P_XDCC,SHORT_HAND_NONE,5, &search_xdcc},
+ {0,0,0,NULL}
+};
+
+
+static struct {
+ int command;
+ __u8 short_hand; /*for fucntions included in short hands*/
+ int packet_len;
+ int (*function_name) (unsigned char *, int);
+} udp_list[] = {
+ {IPP2P_KAZAA,SHORT_HAND_IPP2P,14, &udp_search_kazaa},
+ {IPP2P_BIT,SHORT_HAND_IPP2P,23, &udp_search_bit},
+ {IPP2P_GNU,SHORT_HAND_IPP2P,11, &udp_search_gnu},
+ {IPP2P_EDK,SHORT_HAND_IPP2P,9, &udp_search_edk},
+ {IPP2P_DC,SHORT_HAND_IPP2P,12, &udp_search_directconnect},
+ {0,0,0,NULL}
+};
+
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ const void *hdr,
+ u_int16_t datalen,
+#endif
+
+ int *hotdrop)
+{
+ const struct ipt_p2p_info *info = matchinfo;
+ unsigned char *haystack;
+ struct iphdr *ip = skb->nh.iph;
+ int p2p_result = 0, i = 0;
+// int head_len;
+ int hlen = ntohs(ip->tot_len)-(ip->ihl*4); /*hlen = packet-data length*/
+
+ /*must not be a fragment*/
+ if (offset) {
+ if (info->debug) printk("IPP2P.match: offset found %i \n",offset);
+ return 0;
+ }
+
+ /*make sure that skb is linear*/
+ if(skb_is_nonlinear(skb)){
+ if (info->debug) printk("IPP2P.match: nonlinear skb found\n");
+ return 0;
+ }
+
+
+ haystack=(char *)ip+(ip->ihl*4); /*haystack = packet data*/
+
+ switch (ip->protocol){
+ case IPPROTO_TCP: /*what to do with a TCP packet*/
+ {
+ struct tcphdr *tcph = (void *) ip + ip->ihl * 4;
+
+ if (tcph->fin) return 0; /*if FIN bit is set bail out*/
+ if (tcph->syn) return 0; /*if SYN bit is set bail out*/
+ if (tcph->rst) return 0; /*if RST bit is set bail out*/
+
+ haystack += tcph->doff * 4; /*get TCP-Header-Size*/
+ hlen -= tcph->doff * 4;
+ while (matchlist[i].command) {
+ if ((((info->cmd & matchlist[i].command) == matchlist[i].command) ||
+ ((info->cmd & matchlist[i].short_hand) == matchlist[i].short_hand)) &&
+ (hlen > matchlist[i].packet_len)) {
+ p2p_result = matchlist[i].function_name(haystack, hlen);
+ if (p2p_result)
+ {
+ if (info->debug) printk("IPP2P.debug:TCP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n",
+ p2p_result, NIPQUAD(ip->saddr),ntohs(tcph->source), NIPQUAD(ip->daddr),ntohs(tcph->dest),hlen);
+ return p2p_result;
+ }
+ }
+ i++;
+ }
+ return p2p_result;
+ }
+
+ case IPPROTO_UDP: /*what to do with an UDP packet*/
+ {
+ struct udphdr *udph = (void *) ip + ip->ihl * 4;
+
+ while (udp_list[i].command){
+ if ((((info->cmd & udp_list[i].command) == udp_list[i].command) ||
+ ((info->cmd & udp_list[i].short_hand) == udp_list[i].short_hand)) &&
+ (hlen > udp_list[i].packet_len)) {
+ p2p_result = udp_list[i].function_name(haystack, hlen);
+ if (p2p_result){
+ if (info->debug) printk("IPP2P.debug:UDP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n",
+ p2p_result, NIPQUAD(ip->saddr),ntohs(udph->source), NIPQUAD(ip->daddr),ntohs(udph->dest),hlen);
+ return p2p_result;
+ }
+ }
+ i++;
+ }
+ return p2p_result;
+ }
+
+ default: return 0;
+ }
+}
+
+
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ /* Must specify -p tcp */
+/* if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) {
+ * printk("ipp2p: Only works on TCP packets, use -p tcp\n");
+ * return 0;
+ * }*/
+ return 1;
+}
+
+
+
+
+static struct ipt_match ipp2p_match = {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+ { NULL, NULL },
+ "ipp2p",
+ &match,
+ &checkentry,
+ NULL,
+ THIS_MODULE
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ .name = "ipp2p",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE,
+#endif
+};
+
+
+static int __init init(void)
+{
+ printk(KERN_INFO "IPP2P v%s loading\n", IPP2P_VERSION);
+ return ipt_register_match(&ipp2p_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&ipp2p_match);
+ printk(KERN_INFO "IPP2P v%s unloaded\n", IPP2P_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
+
+
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c
new file mode 100644
index 00000000..38902524
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c
@@ -0,0 +1,101 @@
+/*
+ * iptables module to match IP address ranges
+ * (c) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * Released under the terms of GNU GPLv2.
+ *
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_iprange.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_DESCRIPTION("iptables arbitrary IP range match module");
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_iprange_info *info = matchinfo;
+ const struct iphdr *iph = skb->nh.iph;
+
+
+ if (info->flags & IPRANGE_SRC) {
+ if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
+ || (ntohl(iph->saddr) > ntohl(info->src.max_ip)))
+ ^ !!(info->flags & IPRANGE_SRC_INV)) {
+ DEBUGP("src IP %u.%u.%u.%u NOT in range %s"
+ "%u.%u.%u.%u-%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr),
+ info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
+ NIPQUAD(info->src.min_ip),
+ NIPQUAD(info->src.max_ip));
+ return 0;
+ }
+ }
+ if (info->flags & IPRANGE_DST) {
+ if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip))
+ || (ntohl(iph->daddr) > ntohl(info->dst.max_ip)))
+ ^ !!(info->flags & IPRANGE_DST_INV)) {
+ DEBUGP("dst IP %u.%u.%u.%u NOT in range %s"
+ "%u.%u.%u.%u-%u.%u.%u.%u\n",
+ NIPQUAD(iph->daddr),
+ info->flags & IPRANGE_DST_INV ? "(INV) " : "",
+ NIPQUAD(info->dst.min_ip),
+ NIPQUAD(info->dst.max_ip));
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int check(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ /* verify size */
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_iprange_info)))
+ return 0;
+
+ return 1;
+}
+
+static struct ipt_match iprange_match =
+{
+ .list = { NULL, NULL },
+ .name = "iprange",
+ .match = &match,
+ .checkentry = &check,
+ .destroy = NULL,
+ .me = THIS_MODULE
+};
+
+static int __init init(void)
+{
+ return ipt_register_match(&iprange_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&iprange_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c
new file mode 100644
index 00000000..567e3847
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c
@@ -0,0 +1,570 @@
+/*
+ Kernel module to match application layer (OSI layer 7)
+ data in connections.
+
+ http://l7-filter.sf.net
+
+ By Matthew Strait and Ethan Sommer, 2003-2005.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version
+ 2 of the License, or (at your option) any later version.
+ http://www.gnu.org/licenses/gpl.txt
+
+ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>
+ and cls_layer7.c (C) 2003 Matthew Strait, Ethan Sommer, Justin Levandoski
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <linux/netfilter_ipv4/lockhelp.h>
+
+#include "regexp/regexp.c"
+
+#include <linux/netfilter_ipv4/ipt_layer7.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables application layer match module");
+
+static int maxdatalen = 2048; // this is the default
+MODULE_PARM(maxdatalen,"i");
+MODULE_PARM_DESC(maxdatalen,"maximum bytes of data looked at by l7-filter");
+
+#if defined(CONFIG_IP_NF_MATCH_LAYER7_DEBUG)
+ #define DPRINTK(format,args...) printk(format,##args)
+#else
+ #define DPRINTK(format,args...)
+#endif
+
+#define TOTAL_PACKETS master_conntrack->layer7.numpackets
+
+/* Number of packets whose data we look at.
+This can be modified through /proc/net/layer7_numpackets */
+static int num_packets = 10;
+
+static struct pattern_cache {
+ char * regex_string;
+ regexp * pattern;
+ struct pattern_cache * next;
+} * first_pattern_cache = NULL;
+
+/* I'm new to locking. Here are my assumptions:
+
+- No one will write to /proc/net/layer7_numpackets over and over very fast;
+ if they did, nothing awful would happen.
+
+- This code will never be processing the same packet twice at the same time,
+ because iptables rules are traversed in order.
+
+- It doesn't matter if two packets from different connections are in here at
+ the same time, because they don't share any data.
+
+- It _does_ matter if two packets from the same connection are here at the same
+ time. In this case, we have to protect the conntracks and the list of
+ compiled patterns.
+*/
+DECLARE_RWLOCK(ct_lock);
+DECLARE_LOCK(list_lock);
+
+#if CONFIG_IP_NF_MATCH_LAYER7_DEBUG
+/* Converts an unfriendly string into a friendly one by
+replacing unprintables with periods and all whitespace with " ". */
+static char * friendly_print(unsigned char * s)
+{
+ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
+ int i;
+
+ if(!f) {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in friendly_print, bailing.\n");
+ return NULL;
+ }
+
+ for(i = 0; i < strlen(s); i++){
+ if(isprint(s[i]) && s[i] < 128) f[i] = s[i];
+ else if(isspace(s[i])) f[i] = ' ';
+ else f[i] = '.';
+ }
+ f[i] = '\0';
+ return f;
+}
+
+static char dec2hex(int i)
+{
+ switch (i) {
+ case 0 ... 9:
+ return (char)(i + '0');
+ break;
+ case 10 ... 15:
+ return (char)(i - 10 + 'a');
+ break;
+ default:
+ if (net_ratelimit())
+ printk("Problem in dec2hex\n");
+ return '\0';
+ }
+}
+
+static char * hex_print(unsigned char * s)
+{
+ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
+ int i;
+
+ if(!g) {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in hex_print, bailing.\n");
+ return NULL;
+ }
+
+ for(i = 0; i < strlen(s); i++) {
+ g[i*3 ] = dec2hex(s[i]/16);
+ g[i*3 + 1] = dec2hex(s[i]%16);
+ g[i*3 + 2] = ' ';
+ }
+ g[i*3] = '\0';
+
+ return g;
+}
+#endif // DEBUG
+
+/* Use instead of regcomp. As we expect to be seeing the same regexps over and
+over again, it make sense to cache the results. */
+static regexp * compile_and_cache(char * regex_string, char * protocol)
+{
+ struct pattern_cache * node = first_pattern_cache;
+ struct pattern_cache * last_pattern_cache = first_pattern_cache;
+ struct pattern_cache * tmp;
+ unsigned int len;
+
+ while (node != NULL) {
+ if (!strcmp(node->regex_string, regex_string))
+ return node->pattern;
+
+ last_pattern_cache = node;/* points at the last non-NULL node */
+ node = node->next;
+ }
+
+ /* If we reach the end of the list, then we have not yet cached
+ the pattern for this regex. Let's do that now.
+ Be paranoid about running out of memory to avoid list corruption. */
+ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
+
+ if(!tmp) {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n");
+ return NULL;
+ }
+
+ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
+ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC);
+ tmp->next = NULL;
+
+ if(!tmp->regex_string || !tmp->pattern) {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n");
+ kfree(tmp->regex_string);
+ kfree(tmp->pattern);
+ kfree(tmp);
+ return NULL;
+ }
+
+ /* Ok. The new node is all ready now. */
+ node = tmp;
+
+ if(first_pattern_cache == NULL) /* list is empty */
+ first_pattern_cache = node; /* make node the beginning */
+ else
+ last_pattern_cache->next = node; /* attach node to the end */
+
+ /* copy the string and compile the regex */
+ len = strlen(regex_string);
+ DPRINTK("About to compile this: \"%s\"\n", regex_string);
+ node->pattern = regcomp(regex_string, &len);
+ if ( !node->pattern ) {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: Error compiling regexp \"%s\" (%s)\n", regex_string, protocol);
+ /* pattern is now cached as NULL, so we won't try again. */
+ }
+
+ strcpy(node->regex_string, regex_string);
+ return node->pattern;
+}
+
+static int can_handle(const struct sk_buff *skb)
+{
+ if(!skb->nh.iph) /* not IP */
+ return 0;
+ if(skb->nh.iph->protocol != IPPROTO_TCP &&
+ skb->nh.iph->protocol != IPPROTO_UDP &&
+ skb->nh.iph->protocol != IPPROTO_ICMP)
+ return 0;
+ return 1;
+}
+
+/* Returns offset the into the skb->data that the application data starts */
+static int app_data_offset(const struct sk_buff *skb)
+{
+ /* In case we are ported somewhere (ebtables?) where skb->nh.iph
+ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
+ int ip_hl = 4*skb->nh.iph->ihl;
+
+ if( skb->nh.iph->protocol == IPPROTO_TCP ) {
+ /* 12 == offset into TCP header for the header length field.
+ Can't get this with skb->h.th->doff because the tcphdr
+ struct doesn't get set when routing (this is confirmed to be
+ true in Netfilter as well as QoS.) */
+ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
+
+ return ip_hl + tcp_hl;
+ } else if( skb->nh.iph->protocol == IPPROTO_UDP ) {
+ return ip_hl + 8; /* UDP header is always 8 bytes */
+ } else if( skb->nh.iph->protocol == IPPROTO_ICMP ) {
+ return ip_hl + 8; /* ICMP header is 8 bytes */
+ } else {
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: tried to handle unknown protocol!\n");
+ return ip_hl + 8; /* something reasonable */
+ }
+}
+
+/* handles whether there's a match when we aren't appending data anymore */
+static int match_no_append(struct ip_conntrack * conntrack, struct ip_conntrack * master_conntrack,
+ enum ip_conntrack_info ctinfo, enum ip_conntrack_info master_ctinfo,
+ struct ipt_layer7_info * info)
+{
+ /* If we're in here, throw the app data away */
+ WRITE_LOCK(&ct_lock);
+ if(master_conntrack->layer7.app_data != NULL) {
+
+ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
+ if(!master_conntrack->layer7.app_proto) {
+ char * f = friendly_print(master_conntrack->layer7.app_data);
+ char * g = hex_print(master_conntrack->layer7.app_data);
+ DPRINTK("\nl7-filter gave up after %d bytes (%d packets):\n%s\n",
+ strlen(f),
+ TOTAL_PACKETS, f);
+ kfree(f);
+ DPRINTK("In hex: %s\n", g);
+ kfree(g);
+ }
+ #endif
+
+ kfree(master_conntrack->layer7.app_data);
+ master_conntrack->layer7.app_data = NULL; /* don't free again */
+ }
+ WRITE_UNLOCK(&ct_lock);
+
+ if(master_conntrack->layer7.app_proto){
+ /* Here child connections set their .app_proto (for /proc/net/ip_conntrack) */
+ WRITE_LOCK(&ct_lock);
+ if(!conntrack->layer7.app_proto) {
+ conntrack->layer7.app_proto = kmalloc(strlen(master_conntrack->layer7.app_proto)+1, GFP_ATOMIC);
+ if(!conntrack->layer7.app_proto){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n");
+ WRITE_UNLOCK(&ct_lock);
+ return 1;
+ }
+ strcpy(conntrack->layer7.app_proto, master_conntrack->layer7.app_proto);
+ }
+ WRITE_UNLOCK(&ct_lock);
+
+ return (!strcmp(master_conntrack->layer7.app_proto, info->protocol));
+ }
+ else {
+ /* If not classified, set to "unknown" to distinguish from
+ connections that are still being tested. */
+ WRITE_LOCK(&ct_lock);
+ master_conntrack->layer7.app_proto = kmalloc(strlen("unknown")+1, GFP_ATOMIC);
+ if(!master_conntrack->layer7.app_proto){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n");
+ WRITE_UNLOCK(&ct_lock);
+ return 1;
+ }
+ strcpy(master_conntrack->layer7.app_proto, "unknown");
+ WRITE_UNLOCK(&ct_lock);
+ return 0;
+ }
+}
+
+/* add the new app data to the conntrack. Return number of bytes added. */
+static int add_data(struct ip_conntrack * master_conntrack,
+ char * app_data, int appdatalen)
+{
+ int length = 0, i;
+ int oldlength = master_conntrack->layer7.app_data_len;
+
+ /* Strip nulls. Make everything lower case (our regex lib doesn't
+ do case insensitivity). Add it to the end of the current data. */
+ for(i = 0; i < maxdatalen-oldlength-1 && i < appdatalen; i++) {
+ if(app_data[i] != '\0') {
+ master_conntrack->layer7.app_data[length+oldlength] =
+ /* the kernel version of tolower mungs 'upper ascii' */
+ isascii(app_data[i])? tolower(app_data[i]) : app_data[i];
+ length++;
+ }
+ }
+
+ master_conntrack->layer7.app_data[length+oldlength] = '\0';
+ master_conntrack->layer7.app_data_len = length + oldlength;
+
+ return length;
+}
+
+/* Returns true on match and false otherwise. */
+static int match(/* const */struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo,
+ int offset, int *hotdrop)
+{
+ struct ipt_layer7_info * info = (struct ipt_layer7_info *)matchinfo;
+ enum ip_conntrack_info master_ctinfo, ctinfo;
+ struct ip_conntrack *master_conntrack, *conntrack;
+ unsigned char * app_data;
+ unsigned int pattern_result, appdatalen;
+ regexp * comppattern;
+
+ if(!can_handle(skb)){
+ DPRINTK("layer7: This is some protocol I can't handle.\n");
+ return info->invert;
+ }
+
+ /* Treat the parent and all its children together as one connection,
+ except for the purpose of setting conntrack->layer7.app_proto in the
+ actual connection. This makes /proc/net/ip_conntrack somewhat more
+ satisfying. */
+ if(!(conntrack = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) ||
+ !(master_conntrack = ip_conntrack_get((struct sk_buff *)skb, &master_ctinfo))) {
+ DPRINTK("layer7: packet is not from a known connection, giving up.\n");
+ return info->invert;
+ }
+
+ /* Try to get a master conntrack (and its master etc) for FTP, etc. */
+ while (master_ct(master_conntrack) != NULL)
+ master_conntrack = master_ct(master_conntrack);
+
+ if(!skb->cb[0]){
+ WRITE_LOCK(&ct_lock);
+ master_conntrack->layer7.numpackets++;/*starts at 0 via memset*/
+ WRITE_UNLOCK(&ct_lock);
+ }
+
+ /* if we've classified it or seen too many packets */
+ if(TOTAL_PACKETS > num_packets ||
+ master_conntrack->layer7.app_proto) {
+
+ pattern_result = match_no_append(conntrack, master_conntrack, ctinfo, master_ctinfo, info);
+
+ /* skb->cb[0] == seen. Avoid doing things twice if there are two l7
+ rules. I'm not sure that using cb for this purpose is correct, although
+ it says "put your private variables there". But it doesn't look like it
+ is being used for anything else in the skbs that make it here. How can
+ I write to cb without making the compiler angry? */
+ skb->cb[0] = 1; /* marking it seen here is probably irrelevant, but consistant */
+
+ return (pattern_result ^ info->invert);
+ }
+
+ if(skb_is_nonlinear(skb)){
+ if(skb_linearize(skb, GFP_ATOMIC) != 0){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: failed to linearize packet, bailing.\n");
+ return info->invert;
+ }
+ }
+
+ /* now that the skb is linearized, it's safe to set these. */
+ app_data = skb->data + app_data_offset(skb);
+ appdatalen = skb->tail - app_data;
+
+ LOCK_BH(&list_lock);
+ /* the return value gets checked later, when we're ready to use it */
+ comppattern = compile_and_cache(info->pattern, info->protocol);
+ UNLOCK_BH(&list_lock);
+
+ /* On the first packet of a connection, allocate space for app data */
+ WRITE_LOCK(&ct_lock);
+ if(TOTAL_PACKETS == 1 && !skb->cb[0] && !master_conntrack->layer7.app_data) {
+ master_conntrack->layer7.app_data = kmalloc(maxdatalen, GFP_ATOMIC);
+ if(!master_conntrack->layer7.app_data){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in match, bailing.\n");
+ WRITE_UNLOCK(&ct_lock);
+ return info->invert;
+ }
+
+ master_conntrack->layer7.app_data[0] = '\0';
+ }
+ WRITE_UNLOCK(&ct_lock);
+
+ /* Can be here, but unallocated, if numpackets is increased near
+ the beginning of a connection */
+ if(master_conntrack->layer7.app_data == NULL)
+ return (info->invert); /* unmatched */
+
+ if(!skb->cb[0]){
+ int newbytes;
+ WRITE_LOCK(&ct_lock);
+ newbytes = add_data(master_conntrack, app_data, appdatalen);
+ WRITE_UNLOCK(&ct_lock);
+
+ if(newbytes == 0) { /* didn't add any data */
+ skb->cb[0] = 1;
+ /* Didn't match before, not going to match now */
+ return info->invert;
+ }
+ }
+
+ /* If looking for "unknown", then never match. "Unknown" means that
+ we've given up; we're still trying with these packets. */
+ if(!strcmp(info->protocol, "unknown")) {
+ pattern_result = 0;
+ /* If the regexp failed to compile, don't bother running it */
+ } else if(comppattern && regexec(comppattern, master_conntrack->layer7.app_data)) {
+ DPRINTK("layer7: regexec positive: %s!\n", info->protocol);
+ pattern_result = 1;
+ } else pattern_result = 0;
+
+ if(pattern_result) {
+ WRITE_LOCK(&ct_lock);
+ master_conntrack->layer7.app_proto = kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
+ if(!master_conntrack->layer7.app_proto){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory in match, bailing.\n");
+ WRITE_UNLOCK(&ct_lock);
+ return (pattern_result ^ info->invert);
+ }
+ strcpy(master_conntrack->layer7.app_proto, info->protocol);
+ WRITE_UNLOCK(&ct_lock);
+ }
+
+ /* mark the packet seen */
+ skb->cb[0] = 1;
+
+ return (pattern_result ^ info->invert);
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize, unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_layer7_info)))
+ return 0;
+ return 1;
+}
+
+static struct ipt_match layer7_match = {
+ .name = "layer7",
+ .match = &match,
+ .checkentry = &checkentry,
+ .me = THIS_MODULE
+};
+
+/* taken from drivers/video/modedb.c */
+static int my_atoi(const char *s)
+{
+ int val = 0;
+
+ for (;; s++) {
+ switch (*s) {
+ case '0'...'9':
+ val = 10*val+(*s-'0');
+ break;
+ default:
+ return val;
+ }
+ }
+}
+
+/* write out num_packets to userland. */
+static int layer7_read_proc(char* page, char ** start, off_t off, int count,
+ int* eof, void * data)
+{
+ if(num_packets > 99 && net_ratelimit())
+ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
+
+ page[0] = num_packets/10 + '0';
+ page[1] = num_packets%10 + '0';
+ page[2] = '\n';
+ page[3] = '\0';
+
+ *eof=1;
+
+ return 3;
+}
+
+/* Read in num_packets from userland */
+static int layer7_write_proc(struct file* file, const char* buffer,
+ unsigned long count, void *data)
+{
+ char * foo = kmalloc(count, GFP_ATOMIC);
+
+ if(!foo){
+ if (net_ratelimit())
+ printk(KERN_ERR "layer7: out of memory, bailing. num_packets unchanged.\n");
+ return count;
+ }
+
+ copy_from_user(foo, buffer, count);
+
+ num_packets = my_atoi(foo);
+ kfree (foo);
+
+ /* This has an arbitrary limit to make the math easier. I'm lazy.
+ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
+ if(num_packets > 99) {
+ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
+ num_packets = 99;
+ } else if(num_packets < 1) {
+ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
+ num_packets = 1;
+ }
+
+ return count;
+}
+
+/* register the proc file */
+static void layer7_init_proc(void)
+{
+ struct proc_dir_entry* entry;
+ entry = create_proc_entry("layer7_numpackets", 0644, proc_net);
+ entry->read_proc = layer7_read_proc;
+ entry->write_proc = layer7_write_proc;
+}
+
+static void layer7_cleanup_proc(void)
+{
+ remove_proc_entry("layer7_numpackets", proc_net);
+}
+
+static int __init init(void)
+{
+ layer7_init_proc();
+ if(maxdatalen < 1) {
+ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, using 1\n");
+ maxdatalen = 1;
+ }
+ /* This is not a hard limit. It's just here to prevent people from
+ bringing their slow machines to a grinding halt. */
+ else if(maxdatalen > 65536) {
+ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, using 65536\n");
+ maxdatalen = 65536;
+ }
+ return ipt_register_match(&layer7_match);
+}
+
+static void __exit fini(void)
+{
+ layer7_cleanup_proc();
+ ipt_unregister_match(&layer7_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c
index b320e29b..d0475155 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c
@@ -19,7 +19,8 @@ match(const struct sk_buff *skb,
const struct ipt_mac_info *info = matchinfo;
/* Is mac pointer valid? */
- return (skb->mac.raw >= skb->head
+ return (in != NULL // added for OUTPUT experiment -- zzz
+ && skb->mac.raw >= skb->head
&& (skb->mac.raw + ETH_HLEN) <= skb->data
/* If so, compare... */
&& ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN)
@@ -33,6 +34,7 @@ ipt_mac_checkentry(const char *tablename,
unsigned int matchsize,
unsigned int hook_mask)
{
+#if 0 // removed for OUTPUT experiment --jz
/* FORWARD isn't always valid, but it's nice to be able to do --RR */
if (hook_mask
& ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN)
@@ -40,7 +42,7 @@ ipt_mac_checkentry(const char *tablename,
printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n");
return 0;
}
-
+#endif
if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info)))
return 0;
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c
new file mode 100644
index 00000000..25fa26a4
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c
@@ -0,0 +1,62 @@
+/*
+
+ macsave match
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2 or later.
+
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ipt_macsave.h>
+
+#define DEBUG 1
+
+#ifdef DEBUG
+#define DLOG printk
+#else
+#define DLOG(...) do { } while (0);
+#endif
+
+
+static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out,
+ const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ const struct ipt_macsave_match_info *info = matchinfo;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); // note about cast: ip_conntrack_get() will not modify skb
+ if (ct) return (memcmp(ct->macsave, info->mac, sizeof(ct->macsave)) == 0) ^ info->invert;
+ return info->invert;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo,
+ unsigned int matchsize, unsigned int hook_mask)
+{
+ return (matchsize == IPT_ALIGN(sizeof(struct ipt_macsave_match_info)));
+}
+
+
+static struct ipt_match macsave_match
+= { { NULL, NULL }, "macsave", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ DLOG(KERN_INFO "macsave match init " __DATE__ " " __TIME__ "\n");
+ return ipt_register_match(&macsave_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&macsave_match);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c
index ca99b764..836d3f2f 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c
@@ -10,7 +10,11 @@
MODULE_LICENSE("GPL");
+#if 0
+#define duprintf(format, args...) printk(format , ## args)
+#else
#define duprintf(format, args...)
+#endif
/* Returns 1 if the port is matched by the test, 0 otherwise. */
static inline int
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c
new file mode 100644
index 00000000..d7ab39cf
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c
@@ -0,0 +1,88 @@
+/*
+ * netfilter module to enforce network quotas
+ *
+ * Sam Johnston <samj@samj.net>
+ *
+ * 30/01/05: Fixed on SMP --Pablo Neira <pablo@eurodev.net>
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_quota.h>
+
+MODULE_LICENSE("GPL");
+
+static spinlock_t quota_lock = SPIN_LOCK_UNLOCKED;
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset, const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ struct ipt_quota_info *q =
+ ((struct ipt_quota_info *) matchinfo)->master;
+
+ spin_lock_bh(&quota_lock);
+
+ if (q->quota >= datalen) {
+ /* we can afford this one */
+ q->quota -= datalen;
+ spin_unlock_bh(&quota_lock);
+
+#ifdef DEBUG_IPT_QUOTA
+ printk("IPT Quota OK: %llu datlen %d \n", q->quota, datalen);
+#endif
+ return 1;
+ }
+
+ /* so we do not allow even small packets from now on */
+ q->quota = 0;
+
+#ifdef DEBUG_IPT_QUOTA
+ printk("IPT Quota Failed: %llu datlen %d \n", q->quota, datalen);
+#endif
+
+ spin_unlock_bh(&quota_lock);
+ return 0;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo, unsigned int matchsize, unsigned int hook_mask)
+{
+ /* TODO: spinlocks? sanity checks? */
+ struct ipt_quota_info *q = (struct ipt_quota_info *) matchinfo;
+
+ if (matchsize != IPT_ALIGN(sizeof (struct ipt_quota_info)))
+ return 0;
+
+ /* For SMP, we only want to use one set of counters. */
+ q->master = q;
+
+ return 1;
+}
+
+static struct ipt_match quota_match
+ = { {NULL, NULL}, "quota", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init
+init(void)
+{
+ return ipt_register_match(&quota_match);
+}
+
+static void __exit
+fini(void)
+{
+ ipt_unregister_match(&quota_match);
+}
+
+module_init(init);
+module_exit(fini);
+
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c
new file mode 100644
index 00000000..808ae78f
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c
@@ -0,0 +1,998 @@
+/* Kernel module to check if the source address has been seen recently. */
+/* Copyright 2002-2003, Stephen Frost */
+/* Author: Stephen Frost <sfrost@snowman.net> */
+/* Project Page: http://snowman.net/projects/ipt_recent/ */
+/* This software is distributed under the terms of the GPL, Version 2 */
+/* This copyright does not cover user programs that use kernel services
+ * by normal system calls. */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
+#include <linux/ip.h>
+#include <linux/vmalloc.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_recent.h>
+
+#undef DEBUG
+#define HASH_LOG 9
+
+/* Defaults, these can be overridden on the module command-line. */
+static int ip_list_tot = 100;
+static int ip_pkt_list_tot = 20;
+static int ip_list_hash_size = 0;
+static int ip_list_perms = 0644;
+#ifdef DEBUG
+static int debug = 1;
+#endif
+
+static char version[] =
+KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. http://snowman.net/projects/ipt_recent/\n";
+
+MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
+MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
+MODULE_LICENSE("GPL");
+MODULE_PARM(ip_list_tot,"i");
+MODULE_PARM(ip_pkt_list_tot,"i");
+MODULE_PARM(ip_list_hash_size,"i");
+MODULE_PARM(ip_list_perms,"i");
+#ifdef DEBUG
+MODULE_PARM(debug,"i");
+MODULE_PARM_DESC(debug,"debugging level, defaults to 1");
+#endif
+MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
+MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
+MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs");
+MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files");
+
+/* Structure of our list of recently seen addresses. */
+struct recent_ip_list {
+ u_int32_t addr;
+ u_int8_t ttl;
+ u_int32_t last_seen;
+ u_int32_t *last_pkts;
+ u_int32_t oldest_pkt;
+ u_int32_t hash_entry;
+ u_int32_t time_pos;
+};
+
+struct time_info_list {
+ u_int32_t position;
+ u_int32_t time;
+};
+
+/* Structure of our linked list of tables of recent lists. */
+struct recent_ip_tables {
+ char name[IPT_RECENT_NAME_LEN];
+ int count;
+ int time_pos;
+ struct recent_ip_list *table;
+ struct recent_ip_tables *next;
+ spinlock_t list_lock;
+ int *hash_table;
+ struct time_info_list *time_info;
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *status_proc;
+#endif /* CONFIG_PROC_FS */
+};
+
+/* Our current list of addresses we have recently seen.
+ * Only added to on a --set, and only updated on --set || --update
+ */
+static struct recent_ip_tables *r_tables = NULL;
+
+/* We protect r_list with this spinlock so two processors are not modifying
+ * the list at the same time.
+ */
+static spinlock_t recent_lock = SPIN_LOCK_UNLOCKED;
+
+/* Our /proc/net/ipt_recent entry */
+static struct proc_dir_entry *proc_net_ipt_recent = NULL;
+
+/* Function declaration for later. */
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop);
+
+/* Function to hash a given address into the hash table of table_size size */
+int hash_func(unsigned int addr, int table_size)
+{
+ int result = 0;
+ unsigned int value = addr;
+ do { result ^= value; } while((value >>= HASH_LOG));
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n",
+ result & (table_size - 1),
+ addr,
+ table_size);
+#endif
+
+ return(result & (table_size - 1));
+}
+
+#ifdef CONFIG_PROC_FS
+/* This is the function which produces the output for our /proc output
+ * interface which lists each IP address, the last seen time and the
+ * other recent times the address was seen.
+ */
+
+static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
+{
+ int len = 0, count, last_len = 0, pkt_count;
+ off_t pos = 0;
+ off_t begin = 0;
+ struct recent_ip_tables *curr_table;
+
+ curr_table = (struct recent_ip_tables*) data;
+
+ spin_lock_bh(&curr_table->list_lock);
+ for(count = 0; count < ip_list_tot; count++) {
+ if(!curr_table->table[count].addr) continue;
+ last_len = len;
+ len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr));
+ len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl);
+ len += sprintf(buffer+len,"last_seen: %u ",curr_table->table[count].last_seen);
+ len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt);
+ len += sprintf(buffer+len,"last_pkts: %u",curr_table->table[count].last_pkts[0]);
+ for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) {
+ if(!curr_table->table[count].last_pkts[pkt_count]) break;
+ len += sprintf(buffer+len,", %u",curr_table->table[count].last_pkts[pkt_count]);
+ }
+ len += sprintf(buffer+len,"\n");
+ pos = begin + len;
+ if(pos < offset) { len = 0; begin = pos; }
+ if(pos > offset + length) { len = last_len; break; }
+ }
+
+ *start = buffer + (offset - begin);
+ len -= (offset - begin);
+ if(len > length) len = length;
+
+ spin_unlock_bh(&curr_table->list_lock);
+ return len;
+}
+
+/* ip_recent_ctrl provides an interface for users to modify the table
+ * directly. This allows adding entries, removing entries, and
+ * flushing the entire table.
+ * This is done by opening up the appropriate table for writing and
+ * sending one of:
+ * xx.xx.xx.xx -- Add entry to table with current time
+ * +xx.xx.xx.xx -- Add entry to table with current time
+ * -xx.xx.xx.xx -- Remove entry from table
+ * clear -- Flush table, remove all entries
+ */
+
+static int ip_recent_ctrl(struct file *file, const char *input, unsigned long size, void *data)
+{
+ static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff };
+ u_int32_t val;
+ int base, used = 0;
+ char c, *cp;
+ union iaddr {
+ uint8_t bytes[4];
+ uint32_t word;
+ } res;
+ uint8_t *pp = res.bytes;
+ int digit;
+
+ char buffer[20];
+ int len, check_set = 0, count;
+ u_int32_t addr = 0;
+ struct sk_buff *skb;
+ struct ipt_recent_info *info;
+ struct recent_ip_tables *curr_table;
+
+ curr_table = (struct recent_ip_tables*) data;
+
+ if(size > 20) len = 20; else len = size;
+
+ if(copy_from_user(buffer,input,len)) return -EFAULT;
+
+ if(len < 20) buffer[len] = '\0';
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer);
+#endif
+
+ cp = buffer;
+ while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; }
+
+ /* Check if we are asked to flush the entire table */
+ if(!memcmp(cp,"clear",5)) {
+ used += 5;
+ spin_lock_bh(&curr_table->list_lock);
+ curr_table->time_pos = 0;
+ for(count = 0; count < ip_list_hash_size; count++) {
+ curr_table->hash_table[count] = -1;
+ }
+ for(count = 0; count < ip_list_tot; count++) {
+ curr_table->table[count].last_seen = 0;
+ curr_table->table[count].addr = 0;
+ curr_table->table[count].ttl = 0;
+ memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+ curr_table->table[count].oldest_pkt = 0;
+ curr_table->table[count].time_pos = 0;
+ curr_table->time_info[count].position = count;
+ curr_table->time_info[count].time = 0;
+ }
+ spin_unlock_bh(&curr_table->list_lock);
+ return used;
+ }
+
+ check_set = IPT_RECENT_SET;
+ switch(*cp) {
+ case '+': check_set = IPT_RECENT_SET; cp++; used++; break;
+ case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break;
+ default: if(!isdigit(*cp)) return (used+1); break;
+ }
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set);
+#endif
+ /* Get addr (effectively inet_aton()) */
+ /* Shamelessly stolen from libc, a function in the kernel for doing
+ * this would, of course, be greatly preferred, but our options appear
+ * to be rather limited, so we will just do it ourselves here.
+ */
+ res.word = 0;
+
+ c = *cp;
+ for(;;) {
+ if(!isdigit(c)) return used;
+ val = 0; base = 10; digit = 0;
+ if(c == '0') {
+ c = *++cp;
+ if(c == 'x' || c == 'X') base = 16, c = *++cp;
+ else { base = 8; digit = 1; }
+ }
+ for(;;) {
+ if(isascii(c) && isdigit(c)) {
+ if(base == 8 && (c == '8' || c == '0')) return used;
+ val = (val * base) + (c - '0');
+ c = *++cp;
+ digit = 1;
+ } else if(base == 16 && isascii(c) && isxdigit(c)) {
+ val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A'));
+ c = *++cp;
+ digit = 1;
+ } else break;
+ }
+ if(c == '.') {
+ if(pp > res.bytes + 2 || val > 0xff) return used;
+ *pp++ = val;
+ c = *++cp;
+ } else break;
+ }
+ used = cp - buffer;
+ if(c != '\0' && (!isascii(c) || !isspace(c))) return used;
+ if(c == '\n') used++;
+ if(!digit) return used;
+
+ if(val > max[pp - res.bytes]) return used;
+ addr = res.word | htonl(val);
+
+ if(!addr && check_set == IPT_RECENT_SET) return used;
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used);
+#endif
+
+ /* Set up and just call match */
+ info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL);
+ if(!info) { return -ENOMEM; }
+ info->seconds = 0;
+ info->hit_count = 0;
+ info->check_set = check_set;
+ info->invert = 0;
+ info->side = IPT_RECENT_SOURCE;
+ strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN);
+ info->name[IPT_RECENT_NAME_LEN-1] = '\0';
+
+ skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL);
+ if (!skb) {
+ used = -ENOMEM;
+ goto out_free_info;
+ }
+ skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL);
+ if (!skb->nh.iph) {
+ used = -ENOMEM;
+ goto out_free_skb;
+ }
+
+ skb->nh.iph->saddr = addr;
+ skb->nh.iph->daddr = 0;
+ /* Clear ttl since we have no way of knowing it */
+ skb->nh.iph->ttl = 0;
+ match(skb,NULL,NULL,info,0,NULL,sizeof(struct ipt_recent_info),NULL);
+
+ kfree(skb->nh.iph);
+out_free_skb:
+ kfree(skb);
+out_free_info:
+ kfree(info);
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used);
+#endif
+ return used;
+}
+
+#endif /* CONFIG_PROC_FS */
+
+/* 'match' is our primary function, called by the kernel whenever a rule is
+ * hit with our module as an option to it.
+ * What this function does depends on what was specifically asked of it by
+ * the user:
+ * --set -- Add or update last seen time of the source address of the packet
+ * -- matchinfo->check_set == IPT_RECENT_SET
+ * --rcheck -- Just check if the source address is in the list
+ * -- matchinfo->check_set == IPT_RECENT_CHECK
+ * --update -- If the source address is in the list, update last_seen
+ * -- matchinfo->check_set == IPT_RECENT_UPDATE
+ * --remove -- If the source address is in the list, remove it
+ * -- matchinfo->check_set == IPT_RECENT_REMOVE
+ * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds
+ * -- matchinfo->seconds
+ * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times
+ * -- matchinfo->hit_count
+ * --seconds and --hitcount can be combined
+ */
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ int pkt_count, hits_found, ans;
+ unsigned long now;
+ const struct ipt_recent_info *info = matchinfo;
+ u_int32_t addr = 0, time_temp;
+ u_int8_t ttl = skb->nh.iph->ttl;
+ int *hash_table;
+ int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1;
+ struct time_info_list *time_info;
+ struct recent_ip_tables *curr_table;
+ struct recent_ip_tables *last_table;
+ struct recent_ip_list *r_list;
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n");
+#endif
+
+ /* Default is false ^ info->invert */
+ ans = info->invert;
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name);
+#endif
+
+ /* if out != NULL then routing has been done and TTL changed.
+ * We change it back here internally for match what came in before routing. */
+ if(out) ttl++;
+
+ /* Find the right table */
+ spin_lock_bh(&recent_lock);
+ curr_table = r_tables;
+ while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) );
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name);
+#endif
+
+ spin_unlock_bh(&recent_lock);
+
+ /* Table with this name not found, match impossible */
+ if(!curr_table) { return ans; }
+
+ /* Make sure no one is changing the list while we work with it */
+ spin_lock_bh(&curr_table->list_lock);
+
+ r_list = curr_table->table;
+ if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr;
+
+ if(!addr) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr);
+#endif
+ spin_unlock_bh(&curr_table->list_lock);
+ return ans;
+ }
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl);
+#endif
+
+ /* Get jiffies now in case they changed while we were waiting for a lock */
+ now = jiffies;
+ hash_table = curr_table->hash_table;
+ time_info = curr_table->time_info;
+
+ orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size);
+ /* Hash entry at this result used */
+ /* Check for TTL match if requested. If TTL is zero then a match would never
+ * happen, so match regardless of existing TTL in that case. Zero means the
+ * entry was added via the /proc interface anyway, so we will just use the
+ * first TTL we get for that IP address. */
+ if(info->check_set & IPT_RECENT_TTL) {
+ while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr &&
+ (!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) {
+ /* Collision in hash table */
+ hash_result = (hash_result + 1) % ip_list_hash_size;
+ }
+ } else {
+ while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) {
+ /* Collision in hash table */
+ hash_result = (hash_result + 1) % ip_list_hash_size;
+ }
+ }
+
+ if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) {
+ /* IP not in list and not asked to SET */
+ spin_unlock_bh(&curr_table->list_lock);
+ return ans;
+ }
+
+ /* Check if we need to handle the collision, do not need to on REMOVE */
+ if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n",
+ orig_hash_result,
+ hash_result,
+ r_list[hash_table[orig_hash_result]].addr,
+ addr);
+#endif
+
+ /* We had a collision.
+ * orig_hash_result is where we started, hash_result is where we ended up.
+ * So, swap them because we are likely to see the same guy again sooner */
+#ifdef DEBUG
+ if(debug) {
+ printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]);
+ printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n",
+ r_list[hash_table[orig_hash_result]].hash_entry);
+ }
+#endif
+
+ r_list[hash_table[orig_hash_result]].hash_entry = hash_result;
+
+
+ temp = hash_table[orig_hash_result];
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]);
+#endif
+ hash_table[orig_hash_result] = hash_table[hash_result];
+ hash_table[hash_result] = temp;
+ temp = hash_result;
+ hash_result = orig_hash_result;
+ orig_hash_result = temp;
+ time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result];
+ if(hash_table[hash_result] != -1) {
+ r_list[hash_table[hash_result]].hash_entry = hash_result;
+ time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+ }
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n");
+#endif
+ }
+
+ if(hash_table[hash_result] == -1) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n",
+ hash_result, addr);
+#endif
+
+ /* New item found and IPT_RECENT_SET, so we need to add it */
+ location = time_info[curr_table->time_pos].position;
+ hash_table[r_list[location].hash_entry] = -1;
+ hash_table[hash_result] = location;
+ memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+ r_list[location].time_pos = curr_table->time_pos;
+ r_list[location].addr = addr;
+ r_list[location].ttl = ttl;
+ r_list[location].last_seen = now;
+ r_list[location].oldest_pkt = 1;
+ r_list[location].last_pkts[0] = now;
+ r_list[location].hash_entry = hash_result;
+ time_info[curr_table->time_pos].time = r_list[location].last_seen;
+ curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot;
+
+ ans = !info->invert;
+ } else {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n",
+ hash_result,
+ addr);
+#endif
+
+ /* Existing item found */
+ location = hash_table[hash_result];
+ /* We have a match on address, now to make sure it meets all requirements for a
+ * full match. */
+ if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) {
+ if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert;
+ if(info->seconds && !info->hit_count) {
+ if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert;
+ }
+ if(info->seconds && info->hit_count) {
+ for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+ if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
+ }
+ if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
+ }
+ if(info->hit_count && !info->seconds) {
+ for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+ if(r_list[location].last_pkts[pkt_count] == 0) break;
+ hits_found++;
+ }
+ if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
+ }
+ }
+#ifdef DEBUG
+ if(debug) {
+ if(ans)
+ printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr);
+ else
+ printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr);
+ }
+#endif
+
+ /* If and only if we have been asked to SET, or to UPDATE (on match) do we add the
+ * current timestamp to the last_seen. */
+ if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n");
+#endif
+ /* Have to update our time info */
+ time_loc = r_list[location].time_pos;
+ time_info[time_loc].time = now;
+ time_info[time_loc].position = location;
+ while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
+ time_temp = time_info[time_loc].time;
+ time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
+ time_info[(time_loc+1)%ip_list_tot].time = time_temp;
+ time_temp = time_info[time_loc].position;
+ time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
+ time_info[(time_loc+1)%ip_list_tot].position = time_temp;
+ r_list[time_info[time_loc].position].time_pos = time_loc;
+ r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
+ time_loc = (time_loc+1) % ip_list_tot;
+ }
+ r_list[location].time_pos = time_loc;
+ r_list[location].ttl = ttl;
+ r_list[location].last_pkts[r_list[location].oldest_pkt] = now;
+ r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot;
+ r_list[location].last_seen = now;
+ }
+ /* If we have been asked to remove the entry from the list, just set it to 0 */
+ if(info->check_set & IPT_RECENT_REMOVE) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result);
+#endif
+ /* Check if this is part of a collision chain */
+ while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) {
+ orig_hash_result++;
+ if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) {
+ /* Found collision chain, how deep does this rabbit hole go? */
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n");
+#endif
+ end_collision_chain = orig_hash_result;
+ }
+ }
+ if(end_collision_chain != -1) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n");
+#endif
+ /* Part of a collision chain, swap it with the end of the chain
+ * before removing. */
+ r_list[hash_table[end_collision_chain]].hash_entry = hash_result;
+ temp = hash_table[end_collision_chain];
+ hash_table[end_collision_chain] = hash_table[hash_result];
+ hash_table[hash_result] = temp;
+ time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+ hash_result = end_collision_chain;
+ r_list[hash_table[hash_result]].hash_entry = hash_result;
+ time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
+ }
+ location = hash_table[hash_result];
+ hash_table[r_list[location].hash_entry] = -1;
+ time_loc = r_list[location].time_pos;
+ time_info[time_loc].time = 0;
+ time_info[time_loc].position = location;
+ while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
+ time_temp = time_info[time_loc].time;
+ time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
+ time_info[(time_loc+1)%ip_list_tot].time = time_temp;
+ time_temp = time_info[time_loc].position;
+ time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
+ time_info[(time_loc+1)%ip_list_tot].position = time_temp;
+ r_list[time_info[time_loc].position].time_pos = time_loc;
+ r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
+ time_loc = (time_loc+1) % ip_list_tot;
+ }
+ r_list[location].time_pos = time_loc;
+ r_list[location].last_seen = 0;
+ r_list[location].addr = 0;
+ r_list[location].ttl = 0;
+ memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t));
+ r_list[location].oldest_pkt = 0;
+ ans = !info->invert;
+ }
+ spin_unlock_bh(&curr_table->list_lock);
+ return ans;
+ }
+
+ spin_unlock_bh(&curr_table->list_lock);
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n");
+#endif
+ return ans;
+}
+
+/* This function is to verify that the rule given during the userspace iptables
+ * command is correct.
+ * If the command is valid then we check if the table name referred to by the
+ * rule exists, if not it is created.
+ */
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ int flag = 0, c;
+ u_int32_t *hold;
+ const struct ipt_recent_info *info = matchinfo;
+ struct recent_ip_tables *curr_table, *find_table, *last_table;
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n");
+#endif
+
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return 0;
+
+ /* seconds and hit_count only valid for CHECK/UPDATE */
+ if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; }
+ if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; }
+ if(info->check_set & IPT_RECENT_CHECK) flag++;
+ if(info->check_set & IPT_RECENT_UPDATE) flag++;
+
+ /* One and only one of these should ever be set */
+ if(flag != 1) return 0;
+
+ /* Name must be set to something */
+ if(!info->name || !info->name[0]) return 0;
+
+ /* Things look good, create a list for this if it does not exist */
+ /* Lock the linked list while we play with it */
+ spin_lock_bh(&recent_lock);
+
+ /* Look for an entry with this name already created */
+ /* Finds the end of the list and the entry before the end if current name does not exist */
+ find_table = r_tables;
+ while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
+
+ /* If a table already exists just increment the count on that table and return */
+ if(find_table) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name);
+#endif
+ find_table->count++;
+ spin_unlock_bh(&recent_lock);
+ return 1;
+ }
+
+ spin_unlock_bh(&recent_lock);
+
+ /* Table with this name not found */
+ /* Allocate memory for new linked list item */
+
+#ifdef DEBUG
+ if(debug) {
+ printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name);
+ printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables));
+ }
+#endif
+
+ curr_table = vmalloc(sizeof(struct recent_ip_tables));
+ if(curr_table == NULL) return -ENOMEM;
+
+ curr_table->list_lock = SPIN_LOCK_UNLOCKED;
+ curr_table->next = NULL;
+ curr_table->count = 1;
+ curr_table->time_pos = 0;
+ strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN);
+ curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0';
+
+ /* Allocate memory for this table and the list of packets in each entry. */
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n",
+ sizeof(struct recent_ip_list)*ip_list_tot,
+ info->name);
+#endif
+
+ curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot);
+ if(curr_table->table == NULL) { vfree(curr_table); return -ENOMEM; }
+ memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
+ sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
+#endif
+
+ hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot);
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
+#endif
+ if(hold == NULL) {
+ printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n");
+ vfree(curr_table->table);
+ vfree(curr_table);
+ return -ENOMEM;
+ }
+ for(c = 0; c < ip_list_tot; c++) {
+ curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot;
+ }
+
+ /* Allocate memory for the hash table */
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n",
+ sizeof(int)*ip_list_hash_size);
+#endif
+
+ curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size);
+ if(!curr_table->hash_table) {
+ printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n");
+ vfree(hold);
+ vfree(curr_table->table);
+ vfree(curr_table);
+ return -ENOMEM;
+ }
+
+ for(c = 0; c < ip_list_hash_size; c++) {
+ curr_table->hash_table[c] = -1;
+ }
+
+ /* Allocate memory for the time info */
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n",
+ sizeof(struct time_info_list)*ip_list_tot);
+#endif
+
+ curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot);
+ if(!curr_table->time_info) {
+ printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n");
+ vfree(curr_table->hash_table);
+ vfree(hold);
+ vfree(curr_table->table);
+ vfree(curr_table);
+ return -ENOMEM;
+ }
+ for(c = 0; c < ip_list_tot; c++) {
+ curr_table->time_info[c].position = c;
+ curr_table->time_info[c].time = 0;
+ }
+
+ /* Put the new table in place */
+ spin_lock_bh(&recent_lock);
+ find_table = r_tables;
+ while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
+
+ /* If a table already exists just increment the count on that table and return */
+ if(find_table) {
+ find_table->count++;
+ spin_unlock_bh(&recent_lock);
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name);
+#endif
+ vfree(curr_table->time_info);
+ vfree(curr_table->hash_table);
+ vfree(hold);
+ vfree(curr_table->table);
+ vfree(curr_table);
+ return 1;
+ }
+ if(!last_table) r_tables = curr_table; else last_table->next = curr_table;
+
+ spin_unlock_bh(&recent_lock);
+
+#ifdef CONFIG_PROC_FS
+ /* Create our proc 'status' entry. */
+ curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent);
+ if (!curr_table->status_proc) {
+ printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n");
+ /* Destroy the created table */
+ spin_lock_bh(&recent_lock);
+ last_table = NULL;
+ curr_table = r_tables;
+ if(!curr_table) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n");
+#endif
+ spin_unlock_bh(&recent_lock);
+ return -ENOMEM;
+ }
+ while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
+ if(!curr_table) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n");
+#endif
+ spin_unlock_bh(&recent_lock);
+ return -ENOMEM;
+ }
+ if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
+ spin_unlock_bh(&recent_lock);
+ vfree(curr_table->time_info);
+ vfree(curr_table->hash_table);
+ vfree(hold);
+ vfree(curr_table->table);
+ vfree(curr_table);
+ return -ENOMEM;
+ }
+
+ curr_table->status_proc->owner = THIS_MODULE;
+ curr_table->status_proc->data = curr_table;
+ wmb();
+ curr_table->status_proc->read_proc = ip_recent_get_info;
+ curr_table->status_proc->write_proc = ip_recent_ctrl;
+#endif /* CONFIG_PROC_FS */
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n");
+#endif
+
+ return 1;
+}
+
+/* This function is called in the event that a rule matching this module is
+ * removed.
+ * When this happens we need to check if there are no other rules matching
+ * the table given. If that is the case then we remove the table and clean
+ * up its memory.
+ */
+static void
+destroy(void *matchinfo, unsigned int matchsize)
+{
+ const struct ipt_recent_info *info = matchinfo;
+ struct recent_ip_tables *curr_table, *last_table;
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n");
+#endif
+
+ if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return;
+
+ /* Lock the linked list while we play with it */
+ spin_lock_bh(&recent_lock);
+
+ /* Look for an entry with this name already created */
+ /* Finds the end of the list and the entry before the end if current name does not exist */
+ last_table = NULL;
+ curr_table = r_tables;
+ if(!curr_table) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n");
+#endif
+ spin_unlock_bh(&recent_lock);
+ return;
+ }
+ while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
+
+ /* If a table does not exist then do nothing and return */
+ if(!curr_table) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n");
+#endif
+ spin_unlock_bh(&recent_lock);
+ return;
+ }
+
+ curr_table->count--;
+
+ /* If count is still non-zero then there are still rules referenceing it so we do nothing */
+ if(curr_table->count) {
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n");
+#endif
+ spin_unlock_bh(&recent_lock);
+ return;
+ }
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n");
+#endif
+
+ /* Count must be zero so we remove this table from the list */
+ if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
+
+ spin_unlock_bh(&recent_lock);
+
+ /* lock to make sure any late-runners still using this after we removed it from
+ * the list finish up then remove everything */
+ spin_lock_bh(&curr_table->list_lock);
+ spin_unlock_bh(&curr_table->list_lock);
+
+#ifdef CONFIG_PROC_FS
+ if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent);
+#endif /* CONFIG_PROC_FS */
+ vfree(curr_table->table[0].last_pkts);
+ vfree(curr_table->table);
+ vfree(curr_table->hash_table);
+ vfree(curr_table->time_info);
+ vfree(curr_table);
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n");
+#endif
+
+ return;
+}
+
+/* This is the structure we pass to ipt_register to register our
+ * module with iptables.
+ */
+static struct ipt_match recent_match = {
+ .name = "recent",
+ .match = &match,
+ .checkentry = &checkentry,
+ .destroy = &destroy,
+ .me = THIS_MODULE
+};
+
+/* Kernel module initialization. */
+static int __init init(void)
+{
+ int count;
+
+ printk(version);
+ proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net);
+ if(!proc_net_ipt_recent) return -ENOMEM;
+
+ if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) {
+ printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n");
+ ip_list_hash_size = 0;
+ }
+
+ if(!ip_list_hash_size) {
+ ip_list_hash_size = ip_list_tot*3;
+ count = 2*2;
+ while(ip_list_hash_size > count) count = count*2;
+ ip_list_hash_size = count;
+ }
+
+#ifdef DEBUG
+ if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size);
+#endif
+
+ return ipt_register_match(&recent_match);
+}
+
+/* Kernel module destruction. */
+static void __exit fini(void)
+{
+ ipt_unregister_match(&recent_match);
+
+ remove_proc_entry("ipt_recent",proc_net);
+}
+
+/* Register our module with the kernel. */
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c
new file mode 100644
index 00000000..a18b89d0
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c
@@ -0,0 +1,218 @@
+/* Kernel module to match a string into a packet.
+ *
+ * Copyright (C) 2000 Emmanuel Roger <winfield@freegates.be>
+ *
+ * ChangeLog
+ * 19.02.2002: Gianni Tedesco <gianni@ecsc.co.uk>
+ * Fixed SMP re-entrancy problem using per-cpu data areas
+ * for the skip/shift tables.
+ * 02.05.2001: Gianni Tedesco <gianni@ecsc.co.uk>
+ * Fixed kernel panic, due to overrunning boyer moore string
+ * tables. Also slightly tweaked heuristic for deciding what
+ * search algo to use.
+ * 27.01.2001: Gianni Tedesco <gianni@ecsc.co.uk>
+ * Implemented Boyer Moore Sublinear search algorithm
+ * alongside the existing linear search based on memcmp().
+ * Also a quick check to decide which method to use on a per
+ * packet basis.
+ */
+
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/file.h>
+#include <net/sock.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_string.h>
+
+MODULE_LICENSE("GPL");
+
+struct string_per_cpu {
+ int *skip;
+ int *shift;
+ int *len;
+};
+
+struct string_per_cpu *bm_string_data=NULL;
+
+/* Boyer Moore Sublinear string search - VERY FAST */
+char *search_sublinear (char *needle, char *haystack, int needle_len, int haystack_len)
+{
+ int M1, right_end, sk, sh;
+ int ended, j, i;
+
+ int *skip, *shift, *len;
+
+ /* use data suitable for this CPU */
+ shift=bm_string_data[smp_processor_id()].shift;
+ skip=bm_string_data[smp_processor_id()].skip;
+ len=bm_string_data[smp_processor_id()].len;
+
+ /* Setup skip/shift tables */
+ M1 = right_end = needle_len-1;
+ for (i = 0; i < BM_MAX_HLEN; i++) skip[i] = needle_len;
+ for (i = 0; needle[i]; i++) skip[needle[i]] = M1 - i;
+
+ for (i = 1; i < needle_len; i++) {
+ for (j = 0; j < needle_len && needle[M1 - j] == needle[M1 - i - j]; j++);
+ len[i] = j;
+ }
+
+ shift[0] = 1;
+ for (i = 1; i < needle_len; i++) shift[i] = needle_len;
+ for (i = M1; i > 0; i--) shift[len[i]] = i;
+ ended = 0;
+
+ for (i = 0; i < needle_len; i++) {
+ if (len[i] == M1 - i) ended = i;
+ if (ended) shift[i] = ended;
+ }
+
+ /* Do the search*/
+ while (right_end < haystack_len)
+ {
+ for (i = 0; i < needle_len && haystack[right_end - i] == needle[M1 - i]; i++);
+ if (i == needle_len) {
+ return haystack+(right_end - M1);
+ }
+
+ sk = skip[haystack[right_end - i]];
+ sh = shift[i];
+ right_end = max(right_end - i + sk, right_end + sh);
+ }
+
+ return NULL;
+}
+
+/* Linear string search based on memcmp() */
+char *search_linear (char *needle, char *haystack, int needle_len, int haystack_len)
+{
+ char *k = haystack + (haystack_len-needle_len);
+ char *t = haystack;
+
+ while ( t <= k ) {
+ if (memcmp(t, needle, needle_len) == 0)
+ return t;
+ t++;
+ }
+
+ return NULL;
+}
+
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_string_info *info = matchinfo;
+ struct iphdr *ip = skb->nh.iph;
+ int hlen, nlen;
+ char *needle, *haystack;
+ proc_ipt_search search=search_linear;
+
+ if ( !ip ) return 0;
+
+ /* get lenghts, and validate them */
+ nlen=info->len;
+ hlen=ntohs(ip->tot_len)-(ip->ihl*4);
+ if ( nlen > hlen ) return 0;
+
+ needle=(char *)&info->string;
+ haystack=(char *)ip+(ip->ihl*4);
+
+ /* The sublinear search comes in to its own
+ * on the larger packets */
+ if ( (hlen>IPT_STRING_HAYSTACK_THRESH) &&
+ (nlen>IPT_STRING_NEEDLE_THRESH) ) {
+ if ( hlen < BM_MAX_HLEN ) {
+ search=search_sublinear;
+ }else{
+ if (net_ratelimit())
+ printk(KERN_INFO "ipt_string: Packet too big "
+ "to attempt sublinear string search "
+ "(%d bytes)\n", hlen );
+ }
+ }
+
+ return ((search(needle, haystack, nlen, hlen)!=NULL) ^ info->invert);
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
+ return 0;
+
+ return 1;
+}
+
+void string_freeup_data(void)
+{
+ int c;
+
+ if ( bm_string_data ) {
+ for(c=0; c<smp_num_cpus; c++) {
+ if ( bm_string_data[c].shift ) kfree(bm_string_data[c].shift);
+ if ( bm_string_data[c].skip ) kfree(bm_string_data[c].skip);
+ if ( bm_string_data[c].len ) kfree(bm_string_data[c].len);
+ }
+ kfree(bm_string_data);
+ }
+}
+
+static struct ipt_match string_match
+= { { NULL, NULL }, "string", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ int c;
+ size_t tlen;
+ size_t alen;
+
+ tlen=sizeof(struct string_per_cpu)*smp_num_cpus;
+ alen=sizeof(int)*BM_MAX_HLEN;
+
+ /* allocate array of structures */
+ if ( !(bm_string_data=kmalloc(tlen,GFP_KERNEL)) ) {
+ return 0;
+ }
+
+ memset(bm_string_data, 0, tlen);
+
+ /* allocate our skip/shift tables */
+ for(c=0; c<smp_num_cpus; c++) {
+ if ( !(bm_string_data[c].shift=kmalloc(alen, GFP_KERNEL)) )
+ goto alloc_fail;
+ if ( !(bm_string_data[c].skip=kmalloc(alen, GFP_KERNEL)) )
+ goto alloc_fail;
+ if ( !(bm_string_data[c].len=kmalloc(alen, GFP_KERNEL)) )
+ goto alloc_fail;
+ }
+
+ return ipt_register_match(&string_match);
+
+alloc_fail:
+ string_freeup_data();
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&string_match);
+ string_freeup_data();
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c
index 216098fc..d3484bd9 100644
--- a/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c
@@ -10,6 +10,7 @@
2001-26-09 Fabrice MARIE <fabrice@netfilter.org> : force the match to be in LOCAL_IN or PRE_ROUTING only.
2001-30-11 Fabrice : added the possibility to use the match in FORWARD/OUTPUT with a little hack,
added Nguyen Dang Phuoc Dong <dongnd@tlnet.com.vn> patch to support timezones.
+ 2004-05-02 Fabrice : added support for date matching, from an idea of Fabien COELHO.
*/
#include <linux/module.h>
@@ -19,7 +20,7 @@
#include <linux/time.h>
MODULE_AUTHOR("Fabrice MARIE <fabrice@netfilter.org>");
-MODULE_DESCRIPTION("Match arrival timestamp");
+MODULE_DESCRIPTION("Match arrival timestamp/date");
MODULE_LICENSE("GPL");
struct tm
@@ -53,7 +54,8 @@ match(const struct sk_buff *skb,
{
const struct ipt_time_info *info = matchinfo; /* match info for rule */
struct tm currenttime; /* time human readable */
- unsigned int packet_time;
+ u_int8_t days_of_week[7] = {64, 32, 16, 8, 4, 2, 1};
+ u_int16_t packet_time;
struct timeval kerneltimeval;
time_t packet_local_time;
@@ -66,22 +68,21 @@ match(const struct sk_buff *skb,
else
packet_local_time = skb->stamp.tv_sec;
+ /* First we make sure we are in the date start-stop boundaries */
+ if ((packet_local_time < info->date_start) || (packet_local_time > info->date_stop))
+ return 0; /* We are outside the date boundaries */
+
/* Transform the timestamp of the packet, in a human readable form */
localtime(&packet_local_time, &currenttime);
/* check if we match this timestamp, we start by the days... */
- if (!((1 << currenttime.tm_wday) & info->days_match))
+ if ((days_of_week[currenttime.tm_wday] & info->days_match) != days_of_week[currenttime.tm_wday])
return 0; /* the day doesn't match */
/* ... check the time now */
- packet_time = (currenttime.tm_hour * 60 * 60) + (currenttime.tm_min * 60) + currenttime.tm_sec;
- if (info->time_start < info->time_stop) {
- if ((packet_time < info->time_start) || (packet_time > info->time_stop))
- return 0;
- } else {
- if ((packet_time < info->time_start) && (packet_time > info->time_stop))
- return 0;
- }
+ packet_time = (currenttime.tm_hour * 60) + currenttime.tm_min;
+ if ((packet_time < info->time_start) || (packet_time > info->time_stop))
+ return 0;
/* here we match ! */
return 1;
@@ -96,24 +97,25 @@ checkentry(const char *tablename,
{
struct ipt_time_info *info = matchinfo; /* match info for rule */
- /* First, check that we are in the correct hook */
- /* PRE_ROUTING, LOCAL_IN or FROWARD */
+ /* First, check that we are in the correct hooks */
if (hook_mask
& ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)))
{
printk("ipt_time: error, only valid for PRE_ROUTING, LOCAL_IN, FORWARD and OUTPUT)\n");
return 0;
}
-
- /* always use kerneltime */
+ /* we use the kerneltime if we are in forward or output */
info->kerneltime = 1;
+ if (hook_mask & ~((1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)))
+ /* we use the skb time */
+ info->kerneltime = 0;
/* Check the size */
- if (matchsize < IPT_ALIGN(sizeof(struct ipt_time_info)))
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_time_info)))
return 0;
/* Now check the coherence of the data ... */
- if ((info->time_start > 86399) || /* 24*60*60-1 = 86399*/
- (info->time_stop > 86399))
+ if ((info->time_start > 1439) || /* 23*60+59 = 1439*/
+ (info->time_stop > 1439))
{
printk(KERN_WARNING "ipt_time: invalid argument\n");
return 0;
@@ -122,8 +124,12 @@ checkentry(const char *tablename,
return 1;
}
-static struct ipt_match time_match
-= { { NULL, NULL }, "time", &match, &checkentry, NULL, THIS_MODULE };
+static struct ipt_match time_match = {
+ .name = "time",
+ .match = match,
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
static int __init init(void)
{
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c
new file mode 100644
index 00000000..0c749563
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c
@@ -0,0 +1,211 @@
+/* Kernel module to match u32 packet content. */
+
+/*
+U32 tests whether quantities of up to 4 bytes extracted from a packet
+have specified values. The specification of what to extract is general
+enough to find data at given offsets from tcp headers or payloads.
+
+ --u32 tests
+ The argument amounts to a program in a small language described below.
+ tests := location = value | tests && location = value
+ value := range | value , range
+ range := number | number : number
+ a single number, n, is interpreted the same as n:n
+ n:m is interpreted as the range of numbers >=n and <=m
+ location := number | location operator number
+ operator := & | << | >> | @
+
+ The operators &, <<, >>, && mean the same as in c. The = is really a set
+ membership operator and the value syntax describes a set. The @ operator
+ is what allows moving to the next header and is described further below.
+
+ *** Until I can find out how to avoid it, there are some artificial limits
+ on the size of the tests:
+ - no more than 10 ='s (and 9 &&'s) in the u32 argument
+ - no more than 10 ranges (and 9 commas) per value
+ - no more than 10 numbers (and 9 operators) per location
+
+ To describe the meaning of location, imagine the following machine that
+ interprets it. There are three registers:
+ A is of type char*, initially the address of the IP header
+ B and C are unsigned 32 bit integers, initially zero
+
+ The instructions are:
+ number B = number;
+ C = (*(A+B)<<24)+(*(A+B+1)<<16)+(*(A+B+2)<<8)+*(A+B+3)
+ &number C = C&number
+ <<number C = C<<number
+ >>number C = C>>number
+ @number A = A+C; then do the instruction number
+ Any access of memory outside [skb->head,skb->end] causes the match to fail.
+ Otherwise the result of the computation is the final value of C.
+
+ Whitespace is allowed but not required in the tests.
+ However the characters that do occur there are likely to require
+ shell quoting, so it's a good idea to enclose the arguments in quotes.
+
+Example:
+ match IP packets with total length >= 256
+ The IP header contains a total length field in bytes 2-3.
+ --u32 "0&0xFFFF=0x100:0xFFFF"
+ read bytes 0-3
+ AND that with FFFF (giving bytes 2-3),
+ and test whether that's in the range [0x100:0xFFFF]
+
+Example: (more realistic, hence more complicated)
+ match icmp packets with icmp type 0
+ First test that it's an icmp packet, true iff byte 9 (protocol) = 1
+ --u32 "6&0xFF=1 && ...
+ read bytes 6-9, use & to throw away bytes 6-8 and compare the result to 1
+ Next test that it's not a fragment.
+ (If so it might be part of such a packet but we can't always tell.)
+ n.b. This test is generally needed if you want to match anything
+ beyond the IP header.
+ The last 6 bits of byte 6 and all of byte 7 are 0 iff this is a complete
+ packet (not a fragment). Alternatively, you can allow first fragments
+ by only testing the last 5 bits of byte 6.
+ ... 4&0x3FFF=0 && ...
+ Last test: the first byte past the IP header (the type) is 0
+ This is where we have to use the @syntax. The length of the IP header
+ (IHL) in 32 bit words is stored in the right half of byte 0 of the
+ IP header itself.
+ ... 0>>22&0x3C@0>>24=0"
+ The first 0 means read bytes 0-3,
+ >>22 means shift that 22 bits to the right. Shifting 24 bits would give
+ the first byte, so only 22 bits is four times that plus a few more bits.
+ &3C then eliminates the two extra bits on the right and the first four
+ bits of the first byte.
+ For instance, if IHL=5 then the IP header is 20 (4 x 5) bytes long.
+ In this case bytes 0-1 are (in binary) xxxx0101 yyzzzzzz,
+ >>22 gives the 10 bit value xxxx0101yy and &3C gives 010100.
+ @ means to use this number as a new offset into the packet, and read
+ four bytes starting from there. This is the first 4 bytes of the icmp
+ payload, of which byte 0 is the icmp type. Therefore we simply shift
+ the value 24 to the right to throw out all but the first byte and compare
+ the result with 0.
+
+Example:
+ tcp payload bytes 8-12 is any of 1, 2, 5 or 8
+ First we test that the packet is a tcp packet (similar to icmp).
+ --u32 "6&0xFF=6 && ...
+ Next, test that it's not a fragment (same as above).
+ ... 0>>22&0x3C@12>>26&0x3C@8=1,2,5,8"
+ 0>>22&3C as above computes the number of bytes in the IP header.
+ @ makes this the new offset into the packet, which is the start of the
+ tcp header. The length of the tcp header (again in 32 bit words) is
+ the left half of byte 12 of the tcp header. The 12>>26&3C
+ computes this length in bytes (similar to the IP header before).
+ @ makes this the new offset, which is the start of the tcp payload.
+ Finally 8 reads bytes 8-12 of the payload and = checks whether the
+ result is any of 1, 2, 5 or 8
+*/
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter_ipv4/ipt_u32.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+/* #include <asm-i386/timex.h> for timing */
+
+MODULE_AUTHOR("Don Cohen <don@isis.cs3-inc.com>");
+MODULE_DESCRIPTION("IP tables u32 matching module");
+MODULE_LICENSE("GPL");
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ const void *hdr,
+ u_int16_t datalen,
+ int *hotdrop)
+{
+ const struct ipt_u32 *data = matchinfo;
+ int testind, i;
+ unsigned char* origbase = (char*)skb->nh.iph;
+ unsigned char* base = origbase;
+ unsigned char* head = skb->head;
+ unsigned char* end = skb->end;
+ int nnums, nvals;
+ u_int32_t pos, val;
+ /* unsigned long long cycles1, cycles2, cycles3, cycles4;
+ cycles1 = get_cycles(); */
+
+ for (testind=0; testind < data->ntests; testind++) {
+ base = origbase; /* reset for each test */
+ pos = data->tests[testind].location[0].number;
+ if (base+pos+3 > end || base+pos < head)
+ return 0;
+ val = (base[pos]<<24) + (base[pos+1]<<16) +
+ (base[pos+2]<<8) + base[pos+3];
+ nnums = data->tests[testind].nnums;
+ for (i=1; i < nnums; i++) {
+ u_int32_t number = data->tests[testind].location[i].number;
+ switch (data->tests[testind].location[i].nextop) {
+ case IPT_U32_AND:
+ val = val & number;
+ break;
+ case IPT_U32_LEFTSH:
+ val = val << number;
+ break;
+ case IPT_U32_RIGHTSH:
+ val = val >> number;
+ break;
+ case IPT_U32_AT:
+ base = base + val;
+ pos = number;
+ if (base+pos+3 > end || base+pos < head)
+ return 0;
+ val = (base[pos]<<24) + (base[pos+1]<<16) +
+ (base[pos+2]<<8) + base[pos+3];
+ break;
+ }
+ }
+ nvals = data->tests[testind].nvalues;
+ for (i=0; i < nvals; i++) {
+ if ((data->tests[testind].value[i].min <= val) &&
+ (val <= data->tests[testind].value[i].max)) {
+ break;
+ }
+ }
+ if (i >= data->tests[testind].nvalues) {
+ /* cycles2 = get_cycles();
+ printk("failed %d in %d cycles\n", testind,
+ cycles2-cycles1); */
+ return 0;
+ }
+ }
+ /* cycles2 = get_cycles();
+ printk("succeeded in %d cycles\n", cycles2-cycles1); */
+ return 1;
+}
+
+static int
+checkentry(const char *tablename,
+ const struct ipt_ip *ip,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_u32)))
+ return 0;
+ return 1;
+}
+
+static struct ipt_match u32_match
+= { { NULL, NULL }, "u32", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+ return ipt_register_match(&u32_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&u32_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c
new file mode 100644
index 00000000..c32a860a
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c
@@ -0,0 +1,246 @@
+/*
+
+ web (experimental)
+ HTTP client request match
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2 or later.
+
+*/
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_web.h>
+
+MODULE_AUTHOR("Jonathan Zarate");
+MODULE_DESCRIPTION("HTTP client request match (experimental)");
+MODULE_LICENSE("GPL");
+
+
+// #define LOG printk
+#define LOG(...) do { } while (0);
+
+
+static int find(const char *data, const char *tail, const char *text)
+{
+ int n, o;
+ int dlen;
+ const char *p, *e;
+
+ while ((data < tail) && (*data == ' ')) ++data;
+ while ((tail > data) && (*(tail - 1) == ' ')) --tail;
+
+ dlen = tail - data;
+
+#if 0
+ {
+ char tmp[128];
+ int z;
+ z = sizeof(tmp) - 1;
+ if (z > dlen) z = dlen;
+ memcpy(tmp, data, z);
+ tmp[z] = 0;
+ LOG(KERN_INFO "find in '%s'\n", tmp);
+ }
+#endif
+
+ // 012345
+ // text
+ // ^text
+ // text$
+ // ^text$
+ // 012345
+
+ while (*text) {
+ n = o = strlen(text);
+ if (*text == '^') {
+ --n;
+ if (*(text + n) == '$') {
+ // exact
+ --n;
+ if ((dlen == n) && (memcmp(data, text + 1, n) == 0)) {
+ LOG(KERN_INFO "matched %s\n", text);
+ return 1;
+ }
+ }
+ else {
+ // begins with
+ if ((dlen >= n) && (memcmp(data, text + 1, n) == 0)) {
+ LOG(KERN_INFO "matched %s\n", text);
+ return 1;
+ }
+ }
+ }
+ else if (*(text + n - 1) == '$') {
+ // ends with
+ --n;
+ if (memcmp(tail - n, text, n) == 0) {
+ LOG(KERN_INFO "matched %s\n", text);
+ return 1;
+ }
+ }
+ else {
+ // contains
+ p = data;
+ e = tail - n;
+ while (p <= e) {
+ if (memcmp(p, text, n) == 0) {
+ LOG(KERN_INFO "matched %s\n", text);
+ return 1;
+ }
+ ++p;
+ }
+ }
+
+ text += o + 1;
+ }
+ return 0;
+}
+
+static inline const char *findend(const char *data, const char *tail, int min)
+{
+ int n = tail - data;
+ if (n >= min) {
+ while (data < tail) {
+ if (*data == '\r') return data;
+ ++data;
+ }
+ }
+ return NULL;
+}
+
+static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out,
+ const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ const struct ipt_web_info *info;
+ const struct tcphdr *tcph;
+ const char *data;
+ const char *tail;
+ const char *p, *q;
+ int doff, dlen;
+
+ info = matchinfo;
+
+ if (offset != 0) return info->invert;
+
+ tcph = hdr;
+ doff = (tcph->doff * 4);
+ data = (char *)tcph + doff;
+ dlen = datalen - doff;
+
+#if 0
+ printk(KERN_INFO "datalen=%u dlen=%d doff=%d\n", datalen, dlen, doff);
+ char tmp[16];
+ memcpy(tmp, data, sizeof(tmp));
+ tmp[sizeof(tmp) - 1] = 0;
+ printk(KERN_INFO "[%s]\n", tmp);
+#endif
+
+ // POST / HTTP/1.0$$$$
+ // GET / HTTP/1.0$$$$
+ // 1234567890123456789
+ if (dlen < 18) return info->invert;
+
+ // "GET " or "POST"
+ __u32 sig = *(__u32 *)data;
+ if ((sig != __constant_htonl(0x47455420)) && (sig != __constant_htonl(0x504f5354))) {
+ return info->invert;
+ }
+
+ tail = data + dlen;
+ if (dlen > 1024) {
+ dlen = 1024;
+ tail = data + 1024;
+ }
+
+
+ // POST / HTTP/1.0$$$$
+ // GET / HTTP/1.0$$$$ -- minimum
+ // 0123456789012345678
+ // 9876543210
+ if (((p = findend(data + 14, tail, 18)) == NULL) || (memcmp(p - 9, " HTTP/", 6) != 0))
+ return info->invert;
+
+#if 0
+ {
+ const char *qq = info->text;
+ while (*qq) {
+ printk(KERN_INFO "text=%s\n", qq);
+ qq += strlen(qq) + 1;
+ }
+ }
+#endif
+
+ switch (info->mode) {
+ case IPT_WEB_HTTP:
+ return !info->invert;
+ case IPT_WEB_HORE:
+ // entire request line, else host line
+ if (find(data + 4, p - 9, info->text)) return !info->invert;
+ break;
+ case IPT_WEB_PATH:
+ // left side of '?' or entire line
+ q = data += 4;
+ p -= 9;
+ while ((q < p) && (*q != '?')) ++q;
+ return find(data, q, info->text) ^ info->invert;
+ case IPT_WEB_QUERY:
+ // right side of '?' or none
+ q = data + 4;
+ p -= 9;
+ while ((q < p) && (*q != '?')) ++q;
+ if (q >= p) return info->invert;
+ return find(q + 1, p, info->text) ^ info->invert;
+ case IPT_WEB_RURI:
+ // entire request line
+ return find(data + 4, p - 9, info->text) ^ info->invert;
+ default:
+ // shutup compiler
+ break;
+ }
+
+ // else, IPT_WEB_HOST
+
+ while (1) {
+ data = p + 2; // skip previous \r\n
+ p = findend(data, tail, 8); // p = current line's \r
+ if (p == NULL) return 0;
+
+#if 0
+ char tmp[64];
+ memcpy(tmp, data, 32);
+ tmp[32] = 0;
+ printk(KERN_INFO "data=[%s]\n", tmp);
+#endif
+
+ if (memcmp(data, "Host: ", 6) == 0)
+ return find(data + 6, p, info->text) ^ info->invert;
+ }
+
+ return !info->invert;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo,
+ unsigned int matchsize, unsigned int hook_mask)
+{
+ return (matchsize == IPT_ALIGN(sizeof(struct ipt_web_info)));
+}
+
+
+static struct ipt_match web_match
+= { { NULL, NULL }, "web", &match, &checkentry, NULL, THIS_MODULE };
+
+static int __init init(void)
+{
+// LOG(KERN_INFO "ipt_web <" __DATE__ " " __TIME__ "> loaded\n");
+ return ipt_register_match(&web_match);
+}
+
+static void __exit fini(void)
+{
+ ipt_unregister_match(&web_match);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c
new file mode 100644
index 00000000..31ef35b9
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c
@@ -0,0 +1,1195 @@
+/*
+ * regcomp and regexec -- regsub and regerror are elsewhere
+ * @(#)regexp.c 1.3 of 18 April 87
+ *
+ * Copyright (c) 1986 by University of Toronto.
+ * Written by Henry Spencer. Not derived from licensed software.
+ *
+ * Permission is granted to anyone to use this software for any
+ * purpose on any computer system, and to redistribute it freely,
+ * subject to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of
+ * this software, no matter how awful, even if they arise
+ * from defects in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either
+ * by explicit claim or by omission.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not
+ * be misrepresented as being the original software.
+ *
+ * Beware that some of this code is subtly aware of the way operator
+ * precedence is structured in regular expressions. Serious changes in
+ * regular-expression syntax might require a total rethink.
+ *
+ * This code was modified by Ethan Sommer to work within the kernel
+ * (it now uses kmalloc etc..)
+ *
+ * Modified slightly by Matthew Strait to use more modern C.
+ */
+
+#include "regexp.h"
+#include "regmagic.h"
+
+/* added by ethan and matt. Lets it work in both kernel and user space.
+(So iptables can use it, for instance.) Yea, it goes both ways... */
+#if __KERNEL__
+ #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
+#else
+ #define printk(format,args...) printf(format,##args)
+#endif
+
+void regerror(char * s)
+{
+ printk("<3>Regexp: %s\n", s);
+ /* NOTREACHED */
+}
+
+/*
+ * The "internal use only" fields in regexp.h are present to pass info from
+ * compile to execute that permits the execute phase to run lots faster on
+ * simple cases. They are:
+ *
+ * regstart char that must begin a match; '\0' if none obvious
+ * reganch is the match anchored (at beginning-of-line only)?
+ * regmust string (pointer into program) that match must include, or NULL
+ * regmlen length of regmust string
+ *
+ * Regstart and reganch permit very fast decisions on suitable starting points
+ * for a match, cutting down the work a lot. Regmust permits fast rejection
+ * of lines that cannot possibly match. The regmust tests are costly enough
+ * that regcomp() supplies a regmust only if the r.e. contains something
+ * potentially expensive (at present, the only such thing detected is * or +
+ * at the start of the r.e., which can involve a lot of backup). Regmlen is
+ * supplied because the test in regexec() needs it and regcomp() is computing
+ * it anyway.
+ */
+
+/*
+ * Structure for regexp "program". This is essentially a linear encoding
+ * of a nondeterministic finite-state machine (aka syntax charts or
+ * "railroad normal form" in parsing technology). Each node is an opcode
+ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
+ * all nodes except BRANCH implement concatenation; a "next" pointer with
+ * a BRANCH on both ends of it is connecting two alternatives. (Here we
+ * have one of the subtle syntax dependencies: an individual BRANCH (as
+ * opposed to a collection of them) is never concatenated with anything
+ * because of operator precedence.) The operand of some types of node is
+ * a literal string; for others, it is a node leading into a sub-FSM. In
+ * particular, the operand of a BRANCH node is the first node of the branch.
+ * (NB this is *not* a tree structure: the tail of the branch connects
+ * to the thing following the set of BRANCHes.) The opcodes are:
+ */
+
+/* definition number opnd? meaning */
+#define END 0 /* no End of program. */
+#define BOL 1 /* no Match "" at beginning of line. */
+#define EOL 2 /* no Match "" at end of line. */
+#define ANY 3 /* no Match any one character. */
+#define ANYOF 4 /* str Match any character in this string. */
+#define ANYBUT 5 /* str Match any character not in this string. */
+#define BRANCH 6 /* node Match this alternative, or the next... */
+#define BACK 7 /* no Match "", "next" ptr points backward. */
+#define EXACTLY 8 /* str Match this string. */
+#define NOTHING 9 /* no Match empty string. */
+#define STAR 10 /* node Match this (simple) thing 0 or more times. */
+#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
+#define OPEN 20 /* no Mark this point in input as start of #n. */
+ /* OPEN+1 is number 1, etc. */
+#define CLOSE 30 /* no Analogous to OPEN. */
+
+/*
+ * Opcode notes:
+ *
+ * BRANCH The set of branches constituting a single choice are hooked
+ * together with their "next" pointers, since precedence prevents
+ * anything being concatenated to any individual branch. The
+ * "next" pointer of the last BRANCH in a choice points to the
+ * thing following the whole choice. This is also where the
+ * final "next" pointer of each individual branch points; each
+ * branch starts with the operand node of a BRANCH node.
+ *
+ * BACK Normal "next" pointers all implicitly point forward; BACK
+ * exists to make loop structures possible.
+ *
+ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
+ * BRANCH structures using BACK. Simple cases (one character
+ * per match) are implemented with STAR and PLUS for speed
+ * and to minimize recursive plunges.
+ *
+ * OPEN,CLOSE ...are numbered at compile time.
+ */
+
+/*
+ * A node is one char of opcode followed by two chars of "next" pointer.
+ * "Next" pointers are stored as two 8-bit pieces, high order first. The
+ * value is a positive offset from the opcode of the node containing it.
+ * An operand, if any, simply follows the node. (Note that much of the
+ * code generation knows about this implicit relationship.)
+ *
+ * Using two bytes for the "next" pointer is vast overkill for most things,
+ * but allows patterns to get big without disasters.
+ */
+#define OP(p) (*(p))
+#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
+#define OPERAND(p) ((p) + 3)
+
+/*
+ * See regmagic.h for one further detail of program structure.
+ */
+
+
+/*
+ * Utility definitions.
+ */
+#ifndef CHARBITS
+#define UCHARAT(p) ((int)*(unsigned char *)(p))
+#else
+#define UCHARAT(p) ((int)*(p)&CHARBITS)
+#endif
+
+#define FAIL(m) { regerror(m); return(NULL); }
+#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
+#define META "^$.[()|?+*\\"
+
+/*
+ * Flags to be passed up and down.
+ */
+#define HASWIDTH 01 /* Known never to match null string. */
+#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
+#define SPSTART 04 /* Starts with * or +. */
+#define WORST 0 /* Worst case. */
+
+/*
+ * Global work variables for regcomp().
+ */
+static char *regparse; /* Input-scan pointer. */
+static int regnpar; /* () count. */
+static char regdummy;
+static char *regcode; /* Code-emit pointer; &regdummy = don't. */
+static long regsize; /* Code size. */
+
+/*
+ * Forward declarations for regcomp()'s friends.
+ */
+#ifndef STATIC
+#define STATIC static
+#endif
+STATIC char *reg(int paren,int *flagp);
+STATIC char *regbranch(int *flagp);
+STATIC char *regpiece(int *flagp);
+STATIC char *regatom(int *flagp);
+STATIC char *regnode(char op);
+STATIC char *regnext(char *p);
+STATIC void regc(char b);
+STATIC void reginsert(char op, char *opnd);
+STATIC void regtail(char *p, char *val);
+STATIC void regoptail(char *p, char *val);
+
+
+__kernel_size_t my_strcspn(const char *s1,const char *s2)
+{
+ char *scan1;
+ char *scan2;
+ int count;
+
+ count = 0;
+ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
+ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */
+ if (*scan1 == *scan2++)
+ return(count);
+ count++;
+ }
+ return(count);
+}
+
+/*
+ - regcomp - compile a regular expression into internal code
+ *
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code. So we cheat: we compile it twice, once with code
+ * generation turned off and size counting turned on, and once "for real".
+ * This also means that we don't allocate space until we are sure that the
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it. (Note that it has to be in
+ * one piece because free() must be able to free it all.)
+ *
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.
+ */
+regexp *
+regcomp(char *exp,int *patternsize)
+{
+ register regexp *r;
+ register char *scan;
+ register char *longest;
+ register int len;
+ int flags;
+ /* commented out by ethan
+ extern char *malloc();
+ */
+
+ if (exp == NULL)
+ FAIL("NULL argument");
+
+ /* First pass: determine size, legality. */
+ regparse = exp;
+ regnpar = 1;
+ regsize = 0L;
+ regcode = &regdummy;
+ regc(MAGIC);
+ if (reg(0, &flags) == NULL)
+ return(NULL);
+
+ /* Small enough for pointer-storage convention? */
+ if (regsize >= 32767L) /* Probably could be 65535L. */
+ FAIL("regexp too big");
+
+ /* Allocate space. */
+ *patternsize=sizeof(regexp) + (unsigned)regsize;
+ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
+ if (r == NULL)
+ FAIL("out of space");
+
+ /* Second pass: emit code. */
+ regparse = exp;
+ regnpar = 1;
+ regcode = r->program;
+ regc(MAGIC);
+ if (reg(0, &flags) == NULL)
+ return(NULL);
+
+ /* Dig out information for optimizations. */
+ r->regstart = '\0'; /* Worst-case defaults. */
+ r->reganch = 0;
+ r->regmust = NULL;
+ r->regmlen = 0;
+ scan = r->program+1; /* First BRANCH. */
+ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
+ scan = OPERAND(scan);
+
+ /* Starting-point info. */
+ if (OP(scan) == EXACTLY)
+ r->regstart = *OPERAND(scan);
+ else if (OP(scan) == BOL)
+ r->reganch++;
+
+ /*
+ * If there's something expensive in the r.e., find the
+ * longest literal string that must appear and make it the
+ * regmust. Resolve ties in favor of later strings, since
+ * the regstart check works with the beginning of the r.e.
+ * and avoiding duplication strengthens checking. Not a
+ * strong reason, but sufficient in the absence of others.
+ */
+ if (flags&SPSTART) {
+ longest = NULL;
+ len = 0;
+ for (; scan != NULL; scan = regnext(scan))
+ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
+ longest = OPERAND(scan);
+ len = strlen(OPERAND(scan));
+ }
+ r->regmust = longest;
+ r->regmlen = len;
+ }
+ }
+
+ return(r);
+}
+
+/*
+ - reg - regular expression, i.e. main body or parenthesized thing
+ *
+ * Caller must absorb opening parenthesis.
+ *
+ * Combining parenthesis handling with the base level of regular expression
+ * is a trifle forced, but the need to tie the tails of the branches to what
+ * follows makes it hard to avoid.
+ */
+static char *
+reg(int paren, int *flagp /* Parenthesized? */ )
+{
+ register char *ret;
+ register char *br;
+ register char *ender;
+ register int parno = 0; /* 0 makes gcc happy */
+ int flags;
+
+ *flagp = HASWIDTH; /* Tentatively. */
+
+ /* Make an OPEN node, if parenthesized. */
+ if (paren) {
+ if (regnpar >= NSUBEXP)
+ FAIL("too many ()");
+ parno = regnpar;
+ regnpar++;
+ ret = regnode(OPEN+parno);
+ } else
+ ret = NULL;
+
+ /* Pick up the branches, linking them together. */
+ br = regbranch(&flags);
+ if (br == NULL)
+ return(NULL);
+ if (ret != NULL)
+ regtail(ret, br); /* OPEN -> first. */
+ else
+ ret = br;
+ if (!(flags&HASWIDTH))
+ *flagp &= ~HASWIDTH;
+ *flagp |= flags&SPSTART;
+ while (*regparse == '|') {
+ regparse++;
+ br = regbranch(&flags);
+ if (br == NULL)
+ return(NULL);
+ regtail(ret, br); /* BRANCH -> BRANCH. */
+ if (!(flags&HASWIDTH))
+ *flagp &= ~HASWIDTH;
+ *flagp |= flags&SPSTART;
+ }
+
+ /* Make a closing node, and hook it on the end. */
+ ender = regnode((paren) ? CLOSE+parno : END);
+ regtail(ret, ender);
+
+ /* Hook the tails of the branches to the closing node. */
+ for (br = ret; br != NULL; br = regnext(br))
+ regoptail(br, ender);
+
+ /* Check for proper termination. */
+ if (paren && *regparse++ != ')') {
+ FAIL("unmatched ()");
+ } else if (!paren && *regparse != '\0') {
+ if (*regparse == ')') {
+ FAIL("unmatched ()");
+ } else
+ FAIL("junk on end"); /* "Can't happen". */
+ /* NOTREACHED */
+ }
+
+ return(ret);
+}
+
+/*
+ - regbranch - one alternative of an | operator
+ *
+ * Implements the concatenation operator.
+ */
+static char *
+regbranch(int *flagp)
+{
+ register char *ret;
+ register char *chain;
+ register char *latest;
+ int flags;
+
+ *flagp = WORST; /* Tentatively. */
+
+ ret = regnode(BRANCH);
+ chain = NULL;
+ while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
+ latest = regpiece(&flags);
+ if (latest == NULL)
+ return(NULL);
+ *flagp |= flags&HASWIDTH;
+ if (chain == NULL) /* First piece. */
+ *flagp |= flags&SPSTART;
+ else
+ regtail(chain, latest);
+ chain = latest;
+ }
+ if (chain == NULL) /* Loop ran zero times. */
+ (void) regnode(NOTHING);
+
+ return(ret);
+}
+
+/*
+ - regpiece - something followed by possible [*+?]
+ *
+ * Note that the branching code sequences used for ? and the general cases
+ * of * and + are somewhat optimized: they use the same NOTHING node as
+ * both the endmarker for their branch list and the body of the last branch.
+ * It might seem that this node could be dispensed with entirely, but the
+ * endmarker role is not redundant.
+ */
+static char *
+regpiece(int *flagp)
+{
+ register char *ret;
+ register char op;
+ register char *next;
+ int flags;
+
+ ret = regatom(&flags);
+ if (ret == NULL)
+ return(NULL);
+
+ op = *regparse;
+ if (!ISMULT(op)) {
+ *flagp = flags;
+ return(ret);
+ }
+
+ if (!(flags&HASWIDTH) && op != '?')
+ FAIL("*+ operand could be empty");
+ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
+
+ if (op == '*' && (flags&SIMPLE))
+ reginsert(STAR, ret);
+ else if (op == '*') {
+ /* Emit x* as (x&|), where & means "self". */
+ reginsert(BRANCH, ret); /* Either x */
+ regoptail(ret, regnode(BACK)); /* and loop */
+ regoptail(ret, ret); /* back */
+ regtail(ret, regnode(BRANCH)); /* or */
+ regtail(ret, regnode(NOTHING)); /* null. */
+ } else if (op == '+' && (flags&SIMPLE))
+ reginsert(PLUS, ret);
+ else if (op == '+') {
+ /* Emit x+ as x(&|), where & means "self". */
+ next = regnode(BRANCH); /* Either */
+ regtail(ret, next);
+ regtail(regnode(BACK), ret); /* loop back */
+ regtail(next, regnode(BRANCH)); /* or */
+ regtail(ret, regnode(NOTHING)); /* null. */
+ } else if (op == '?') {
+ /* Emit x? as (x|) */
+ reginsert(BRANCH, ret); /* Either x */
+ regtail(ret, regnode(BRANCH)); /* or */
+ next = regnode(NOTHING); /* null. */
+ regtail(ret, next);
+ regoptail(ret, next);
+ }
+ regparse++;
+ if (ISMULT(*regparse))
+ FAIL("nested *?+");
+
+ return(ret);
+}
+
+/*
+ - regatom - the lowest level
+ *
+ * Optimization: gobbles an entire sequence of ordinary characters so that
+ * it can turn them into a single node, which is smaller to store and
+ * faster to run. Backslashed characters are exceptions, each becoming a
+ * separate node; the code is simpler that way and it's not worth fixing.
+ */
+static char *
+regatom(int *flagp)
+{
+ register char *ret;
+ int flags;
+
+ *flagp = WORST; /* Tentatively. */
+
+ switch (*regparse++) {
+ case '^':
+ ret = regnode(BOL);
+ break;
+ case '$':
+ ret = regnode(EOL);
+ break;
+ case '.':
+ ret = regnode(ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ break;
+ case '[': {
+ register int class;
+ register int classend;
+
+ if (*regparse == '^') { /* Complement of range. */
+ ret = regnode(ANYBUT);
+ regparse++;
+ } else
+ ret = regnode(ANYOF);
+ if (*regparse == ']' || *regparse == '-')
+ regc(*regparse++);
+ while (*regparse != '\0' && *regparse != ']') {
+ if (*regparse == '-') {
+ regparse++;
+ if (*regparse == ']' || *regparse == '\0')
+ regc('-');
+ else {
+ class = UCHARAT(regparse-2)+1;
+ classend = UCHARAT(regparse);
+ if (class > classend+1)
+ FAIL("invalid [] range");
+ for (; class <= classend; class++)
+ regc(class);
+ regparse++;
+ }
+ } else
+ regc(*regparse++);
+ }
+ regc('\0');
+ if (*regparse != ']')
+ FAIL("unmatched []");
+ regparse++;
+ *flagp |= HASWIDTH|SIMPLE;
+ }
+ break;
+ case '(':
+ ret = reg(1, &flags);
+ if (ret == NULL)
+ return(NULL);
+ *flagp |= flags&(HASWIDTH|SPSTART);
+ break;
+ case '\0':
+ case '|':
+ case ')':
+ FAIL("internal urp"); /* Supposed to be caught earlier. */
+ break;
+ case '?':
+ case '+':
+ case '*':
+ FAIL("?+* follows nothing");
+ break;
+ case '\\':
+ if (*regparse == '\0')
+ FAIL("trailing \\");
+ ret = regnode(EXACTLY);
+ regc(*regparse++);
+ regc('\0');
+ *flagp |= HASWIDTH|SIMPLE;
+ break;
+ default: {
+ register int len;
+ register char ender;
+
+ regparse--;
+ len = my_strcspn((const char *)regparse, (const char *)META);
+ if (len <= 0)
+ FAIL("internal disaster");
+ ender = *(regparse+len);
+ if (len > 1 && ISMULT(ender))
+ len--; /* Back off clear of ?+* operand. */
+ *flagp |= HASWIDTH;
+ if (len == 1)
+ *flagp |= SIMPLE;
+ ret = regnode(EXACTLY);
+ while (len > 0) {
+ regc(*regparse++);
+ len--;
+ }
+ regc('\0');
+ }
+ break;
+ }
+
+ return(ret);
+}
+
+/*
+ - regnode - emit a node
+ */
+static char * /* Location. */
+regnode(char op)
+{
+ register char *ret;
+ register char *ptr;
+
+ ret = regcode;
+ if (ret == &regdummy) {
+ regsize += 3;
+ return(ret);
+ }
+
+ ptr = ret;
+ *ptr++ = op;
+ *ptr++ = '\0'; /* Null "next" pointer. */
+ *ptr++ = '\0';
+ regcode = ptr;
+
+ return(ret);
+}
+
+/*
+ - regc - emit (if appropriate) a byte of code
+ */
+static void
+regc(char b)
+{
+ if (regcode != &regdummy)
+ *regcode++ = b;
+ else
+ regsize++;
+}
+
+/*
+ - reginsert - insert an operator in front of already-emitted operand
+ *
+ * Means relocating the operand.
+ */
+static void
+reginsert(char op, char* opnd)
+{
+ register char *src;
+ register char *dst;
+ register char *place;
+
+ if (regcode == &regdummy) {
+ regsize += 3;
+ return;
+ }
+
+ src = regcode;
+ regcode += 3;
+ dst = regcode;
+ while (src > opnd)
+ *--dst = *--src;
+
+ place = opnd; /* Op node, where operand used to be. */
+ *place++ = op;
+ *place++ = '\0';
+ *place++ = '\0';
+}
+
+/*
+ - regtail - set the next-pointer at the end of a node chain
+ */
+static void
+regtail(char *p, char *val)
+{
+ register char *scan;
+ register char *temp;
+ register int offset;
+
+ if (p == &regdummy)
+ return;
+
+ /* Find last node. */
+ scan = p;
+ for (;;) {
+ temp = regnext(scan);
+ if (temp == NULL)
+ break;
+ scan = temp;
+ }
+
+ if (OP(scan) == BACK)
+ offset = scan - val;
+ else
+ offset = val - scan;
+ *(scan+1) = (offset>>8)&0377;
+ *(scan+2) = offset&0377;
+}
+
+/*
+ - regoptail - regtail on operand of first argument; nop if operandless
+ */
+static void
+regoptail(char *p, char *val)
+{
+ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
+ if (p == NULL || p == &regdummy || OP(p) != BRANCH)
+ return;
+ regtail(OPERAND(p), val);
+}
+
+/*
+ * regexec and friends
+ */
+
+/*
+ * Global work variables for regexec().
+ */
+static char *reginput; /* String-input pointer. */
+static char *regbol; /* Beginning of input, for ^ check. */
+static char **regstartp; /* Pointer to startp array. */
+static char **regendp; /* Ditto for endp. */
+
+/*
+ * Forwards.
+ */
+STATIC int regtry(regexp *prog, char *string);
+STATIC int regmatch(char *prog);
+STATIC int regrepeat(char *p);
+
+#ifdef DEBUG
+int regnarrate = 0;
+void regdump();
+STATIC char *regprop(char *op);
+#endif
+
+/*
+ - regexec - match a regexp against a string
+ */
+int
+regexec(regexp *prog, char *string)
+{
+ register char *s;
+
+ /* Be paranoid... */
+ if (prog == NULL || string == NULL) {
+ printk("<3>Regexp: NULL parameter\n");
+ return(0);
+ }
+
+ /* Check validity of program. */
+ if (UCHARAT(prog->program) != MAGIC) {
+ printk("<3>Regexp: corrupted program\n");
+ return(0);
+ }
+
+ /* If there is a "must appear" string, look for it. */
+ if (prog->regmust != NULL) {
+ s = string;
+ while ((s = strchr(s, prog->regmust[0])) != NULL) {
+ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
+ break; /* Found it. */
+ s++;
+ }
+ if (s == NULL) /* Not present. */
+ return(0);
+ }
+
+ /* Mark beginning of line for ^ . */
+ regbol = string;
+
+ /* Simplest case: anchored match need be tried only once. */
+ if (prog->reganch)
+ return(regtry(prog, string));
+
+ /* Messy cases: unanchored match. */
+ s = string;
+ if (prog->regstart != '\0')
+ /* We know what char it must start with. */
+ while ((s = strchr(s, prog->regstart)) != NULL) {
+ if (regtry(prog, s))
+ return(1);
+ s++;
+ }
+ else
+ /* We don't -- general case. */
+ do {
+ if (regtry(prog, s))
+ return(1);
+ } while (*s++ != '\0');
+
+ /* Failure. */
+ return(0);
+}
+
+/*
+ - regtry - try match at specific point
+ */
+static int /* 0 failure, 1 success */
+regtry(regexp *prog, char *string)
+{
+ register int i;
+ register char **sp;
+ register char **ep;
+
+ reginput = string;
+ regstartp = prog->startp;
+ regendp = prog->endp;
+
+ sp = prog->startp;
+ ep = prog->endp;
+ for (i = NSUBEXP; i > 0; i--) {
+ *sp++ = NULL;
+ *ep++ = NULL;
+ }
+ if (regmatch(prog->program + 1)) {
+ prog->startp[0] = string;
+ prog->endp[0] = reginput;
+ return(1);
+ } else
+ return(0);
+}
+
+/*
+ - regmatch - main matching routine
+ *
+ * Conceptually the strategy is simple: check to see whether the current
+ * node matches, call self recursively to see whether the rest matches,
+ * and then act accordingly. In practice we make some effort to avoid
+ * recursion, in particular by going through "ordinary" nodes (that don't
+ * need to know whether the rest of the match failed) by a loop instead of
+ * by recursion.
+ */
+static int /* 0 failure, 1 success */
+regmatch(char *prog)
+{
+ register char *scan = prog; /* Current node. */
+ char *next; /* Next node. */
+
+#ifdef DEBUG
+ if (scan != NULL && regnarrate)
+ fprintf(stderr, "%s(\n", regprop(scan));
+#endif
+ while (scan != NULL) {
+#ifdef DEBUG
+ if (regnarrate)
+ fprintf(stderr, "%s...\n", regprop(scan));
+#endif
+ next = regnext(scan);
+
+ switch (OP(scan)) {
+ case BOL:
+ if (reginput != regbol)
+ return(0);
+ break;
+ case EOL:
+ if (*reginput != '\0')
+ return(0);
+ break;
+ case ANY:
+ if (*reginput == '\0')
+ return(0);
+ reginput++;
+ break;
+ case EXACTLY: {
+ register int len;
+ register char *opnd;
+
+ opnd = OPERAND(scan);
+ /* Inline the first character, for speed. */
+ if (*opnd != *reginput)
+ return(0);
+ len = strlen(opnd);
+ if (len > 1 && strncmp(opnd, reginput, len) != 0)
+ return(0);
+ reginput += len;
+ }
+ break;
+ case ANYOF:
+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
+ return(0);
+ reginput++;
+ break;
+ case ANYBUT:
+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
+ return(0);
+ reginput++;
+ break;
+ case NOTHING:
+ case BACK:
+ break;
+ case OPEN+1:
+ case OPEN+2:
+ case OPEN+3:
+ case OPEN+4:
+ case OPEN+5:
+ case OPEN+6:
+ case OPEN+7:
+ case OPEN+8:
+ case OPEN+9: {
+ register int no;
+ register char *save;
+
+ no = OP(scan) - OPEN;
+ save = reginput;
+
+ if (regmatch(next)) {
+ /*
+ * Don't set startp if some later
+ * invocation of the same parentheses
+ * already has.
+ */
+ if (regstartp[no] == NULL)
+ regstartp[no] = save;
+ return(1);
+ } else
+ return(0);
+ }
+ break;
+ case CLOSE+1:
+ case CLOSE+2:
+ case CLOSE+3:
+ case CLOSE+4:
+ case CLOSE+5:
+ case CLOSE+6:
+ case CLOSE+7:
+ case CLOSE+8:
+ case CLOSE+9:
+ {
+ register int no;
+ register char *save;
+
+ no = OP(scan) - CLOSE;
+ save = reginput;
+
+ if (regmatch(next)) {
+ /*
+ * Don't set endp if some later
+ * invocation of the same parentheses
+ * already has.
+ */
+ if (regendp[no] == NULL)
+ regendp[no] = save;
+ return(1);
+ } else
+ return(0);
+ }
+ break;
+ case BRANCH: {
+ register char *save;
+
+ if (OP(next) != BRANCH) /* No choice. */
+ next = OPERAND(scan); /* Avoid recursion. */
+ else {
+ do {
+ save = reginput;
+ if (regmatch(OPERAND(scan)))
+ return(1);
+ reginput = save;
+ scan = regnext(scan);
+ } while (scan != NULL && OP(scan) == BRANCH);
+ return(0);
+ /* NOTREACHED */
+ }
+ }
+ break;
+ case STAR:
+ case PLUS: {
+ register char nextch;
+ register int no;
+ register char *save;
+ register int min;
+
+ /*
+ * Lookahead to avoid useless match attempts
+ * when we know what character comes next.
+ */
+ nextch = '\0';
+ if (OP(next) == EXACTLY)
+ nextch = *OPERAND(next);
+ min = (OP(scan) == STAR) ? 0 : 1;
+ save = reginput;
+ no = regrepeat(OPERAND(scan));
+ while (no >= min) {
+ /* If it could work, try it. */
+ if (nextch == '\0' || *reginput == nextch)
+ if (regmatch(next))
+ return(1);
+ /* Couldn't or didn't -- back up. */
+ no--;
+ reginput = save + no;
+ }
+ return(0);
+ }
+ break;
+ case END:
+ return(1); /* Success! */
+ break;
+ default:
+ printk("<3>Regexp: memory corruption\n");
+ return(0);
+ break;
+ }
+
+ scan = next;
+ }
+
+ /*
+ * We get here only if there's trouble -- normally "case END" is
+ * the terminating point.
+ */
+ printk("<3>Regexp: corrupted pointers\n");
+ return(0);
+}
+
+/*
+ - regrepeat - repeatedly match something simple, report how many
+ */
+static int
+regrepeat(char *p)
+{
+ register int count = 0;
+ register char *scan;
+ register char *opnd;
+
+ scan = reginput;
+ opnd = OPERAND(p);
+ switch (OP(p)) {
+ case ANY:
+ count = strlen(scan);
+ scan += count;
+ break;
+ case EXACTLY:
+ while (*opnd == *scan) {
+ count++;
+ scan++;
+ }
+ break;
+ case ANYOF:
+ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
+ count++;
+ scan++;
+ }
+ break;
+ case ANYBUT:
+ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
+ count++;
+ scan++;
+ }
+ break;
+ default: /* Oh dear. Called inappropriately. */
+ printk("<3>Regexp: internal foulup\n");
+ count = 0; /* Best compromise. */
+ break;
+ }
+ reginput = scan;
+
+ return(count);
+}
+
+/*
+ - regnext - dig the "next" pointer out of a node
+ */
+static char*
+regnext(char *p)
+{
+ register int offset;
+
+ if (p == &regdummy)
+ return(NULL);
+
+ offset = NEXT(p);
+ if (offset == 0)
+ return(NULL);
+
+ if (OP(p) == BACK)
+ return(p-offset);
+ else
+ return(p+offset);
+}
+
+#ifdef DEBUG
+
+STATIC char *regprop();
+
+/*
+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
+ */
+void
+regdump(regexp *r)
+{
+ register char *s;
+ register char op = EXACTLY; /* Arbitrary non-END op. */
+ register char *next;
+ /* extern char *strchr(); */
+
+
+ s = r->program + 1;
+ while (op != END) { /* While that wasn't END last time... */
+ op = OP(s);
+ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
+ next = regnext(s);
+ if (next == NULL) /* Next ptr. */
+ printf("(0)");
+ else
+ printf("(%d)", (s-r->program)+(next-s));
+ s += 3;
+ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
+ /* Literal string, where present. */
+ while (*s != '\0') {
+ putchar(*s);
+ s++;
+ }
+ s++;
+ }
+ putchar('\n');
+ }
+
+ /* Header fields of interest. */
+ if (r->regstart != '\0')
+ printf("start `%c' ", r->regstart);
+ if (r->reganch)
+ printf("anchored ");
+ if (r->regmust != NULL)
+ printf("must have \"%s\"", r->regmust);
+ printf("\n");
+}
+
+/*
+ - regprop - printable representation of opcode
+ */
+static char *
+regprop(char *op)
+{
+#define BUFLEN 50
+ register char *p;
+ static char buf[BUFLEN];
+
+ strcpy(buf, ":");
+
+ switch (OP(op)) {
+ case BOL:
+ p = "BOL";
+ break;
+ case EOL:
+ p = "EOL";
+ break;
+ case ANY:
+ p = "ANY";
+ break;
+ case ANYOF:
+ p = "ANYOF";
+ break;
+ case ANYBUT:
+ p = "ANYBUT";
+ break;
+ case BRANCH:
+ p = "BRANCH";
+ break;
+ case EXACTLY:
+ p = "EXACTLY";
+ break;
+ case NOTHING:
+ p = "NOTHING";
+ break;
+ case BACK:
+ p = "BACK";
+ break;
+ case END:
+ p = "END";
+ break;
+ case OPEN+1:
+ case OPEN+2:
+ case OPEN+3:
+ case OPEN+4:
+ case OPEN+5:
+ case OPEN+6:
+ case OPEN+7:
+ case OPEN+8:
+ case OPEN+9:
+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
+ p = NULL;
+ break;
+ case CLOSE+1:
+ case CLOSE+2:
+ case CLOSE+3:
+ case CLOSE+4:
+ case CLOSE+5:
+ case CLOSE+6:
+ case CLOSE+7:
+ case CLOSE+8:
+ case CLOSE+9:
+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
+ p = NULL;
+ break;
+ case STAR:
+ p = "STAR";
+ break;
+ case PLUS:
+ p = "PLUS";
+ break;
+ default:
+ printk("<3>Regexp: corrupted opcode\n");
+ break;
+ }
+ if (p != NULL)
+ strncat(buf, p, BUFLEN-strlen(buf));
+ return(buf);
+}
+#endif
+
+
diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h
new file mode 100644
index 00000000..fda9a7c4
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h
@@ -0,0 +1,40 @@
+/*
+ * Definitions etc. for regexp(3) routines.
+ *
+ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
+ * not the System V one.
+ */
+
+#ifndef REGEXP_H
+#define REGEXP_H
+
+/*
+http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
+which contains a version of this library, says:
+
+ *
+ * NSUBEXP must be at least 10, and no greater than 117 or the parser
+ * will not work properly.
+ *
+
+However, it looks rather like this library is limited to 10. If you think
+otherwise, let us know.
+*/
+
+#define NSUBEXP 10
+typedef struct regexp {
+ char *startp[NSUBEXP];
+ char *endp[NSUBEXP];
+ char regstart; /* Internal use only. */
+ char reganch; /* Internal use only. */
+ char *regmust; /* Internal use only. */
+ int regmlen; /* Internal use only. */
+ char program[1]; /* Unwarranted chumminess with compiler. */
+} regexp;
+
+regexp * regcomp(char *exp, int *patternsize);
+int regexec(regexp *prog, char *string);
+void regsub(regexp *prog, char *source, char *dest);
+void regerror(char *s);
+
+#endif
diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h b/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h
new file mode 100644
index 00000000..5acf4478
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h
@@ -0,0 +1,5 @@
+/*
+ * The first byte of the regexp internal "program" is actually this magic
+ * number; the start node begins in the second byte.
+ */
+#define MAGIC 0234
diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c b/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c
new file mode 100644
index 00000000..339631f0
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c
@@ -0,0 +1,95 @@
+/*
+ * regsub
+ * @(#)regsub.c 1.3 of 2 April 86
+ *
+ * Copyright (c) 1986 by University of Toronto.
+ * Written by Henry Spencer. Not derived from licensed software.
+ *
+ * Permission is granted to anyone to use this software for any
+ * purpose on any computer system, and to redistribute it freely,
+ * subject to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of
+ * this software, no matter how awful, even if they arise
+ * from defects in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either
+ * by explicit claim or by omission.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not
+ * be misrepresented as being the original software.
+ *
+ *
+ * This code was modified by Ethan Sommer to work within the kernel
+ * (it now uses kmalloc etc..)
+ *
+ */
+#include "regexp.h"
+#include "regmagic.h"
+#include <linux/string.h>
+
+
+#ifndef CHARBITS
+#define UCHARAT(p) ((int)*(unsigned char *)(p))
+#else
+#define UCHARAT(p) ((int)*(p)&CHARBITS)
+#endif
+
+#if 0
+//void regerror(char * s)
+//{
+// printk("regexp(3): %s", s);
+// /* NOTREACHED */
+//}
+#endif
+
+/*
+ - regsub - perform substitutions after a regexp match
+ */
+void
+regsub(regexp * prog, char * source, char * dest)
+{
+ register char *src;
+ register char *dst;
+ register char c;
+ register int no;
+ register int len;
+
+ /* Not necessary and gcc doesn't like it -MLS */
+ /*extern char *strncpy();*/
+
+ if (prog == NULL || source == NULL || dest == NULL) {
+ regerror("NULL parm to regsub");
+ return;
+ }
+ if (UCHARAT(prog->program) != MAGIC) {
+ regerror("damaged regexp fed to regsub");
+ return;
+ }
+
+ src = source;
+ dst = dest;
+ while ((c = *src++) != '\0') {
+ if (c == '&')
+ no = 0;
+ else if (c == '\\' && '0' <= *src && *src <= '9')
+ no = *src++ - '0';
+ else
+ no = -1;
+
+ if (no < 0) { /* Ordinary character. */
+ if (c == '\\' && (*src == '\\' || *src == '&'))
+ c = *src++;
+ *dst++ = c;
+ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
+ len = prog->endp[no] - prog->startp[no];
+ (void) strncpy(dst, prog->startp[no], len);
+ dst += len;
+ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
+ regerror("damaged match string");
+ return;
+ }
+ }
+ }
+ *dst++ = '\0';
+}
diff --git a/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c b/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c
new file mode 100644
index 00000000..a84cab09
--- /dev/null
+++ b/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c
@@ -0,0 +1,181 @@
+/*
+
+ tomato_ct.c
+ Copyright (C) 2006 Jonathan Zarate
+
+ Licensed under GNU GPL v2.
+
+*/
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+// #define TEST_HASHDIST
+
+
+#ifdef TEST_HASHDIST
+static int hashdist_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
+{
+ struct list_head *h;
+ struct list_head *e;
+ int i;
+ int n;
+ int count;
+ char *buf;
+ int max;
+
+ // do this the easy way...
+ max = ip_conntrack_htable_size * sizeof("12345\t12345\n");
+ buf = kmalloc(max + 1, GFP_KERNEL);
+ if (buf == NULL) return 0;
+
+ n = 0;
+ max -= sizeof("12345\t12345\n");
+
+ READ_LOCK(&ip_conntrack_lock);
+
+ for (i = 0; i < ip_conntrack_htable_size; ++i) {
+ count = 0;
+ h = &ip_conntrack_hash[i];
+ if (h) {
+ e = h;
+ while (e->next != h) {
+ ++count;
+ e = e->next;
+ }
+ }
+
+ n += sprintf(buf + n, "%d\t%d\n", i, count);
+ if (n > max) {
+ printk("hashdist: %d > %d\n", n, max);
+ break;
+ }
+ }
+
+ READ_UNLOCK(&ip_conntrack_lock);
+
+ if (offset < n) {
+ n = n - offset;
+ if (n > length) {
+ n = length;
+ *eof = 0;
+ }
+ else {
+ *eof = 1;
+ }
+ memcpy(buffer, buf + offset, n);
+ *start = buffer;
+ }
+ else {
+ n = 0;
+ *eof = 1;
+ }
+
+ kfree(buf);
+ return n;
+}
+#endif
+
+
+static void interate_all(void (*func)(struct ip_conntrack *, unsigned long), unsigned long data)
+{
+ int i;
+ struct list_head *h;
+ struct list_head *e;
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ for (i = 0; i < ip_conntrack_htable_size; ++i) {
+ h = &ip_conntrack_hash[i];
+ if (h) {
+ e = h;
+ while (e->next != h) {
+ e = e->next;
+ func(((struct ip_conntrack_tuple_hash *)e)->ctrack, data);
+ }
+ }
+ }
+ WRITE_UNLOCK(&ip_conntrack_lock);
+}
+
+static void expireearly(struct ip_conntrack *ct, unsigned long data)
+{
+ if (ct->timeout.expires > data) {
+ if (del_timer(&ct->timeout)) {
+ ct->timeout.expires = data;
+ add_timer(&ct->timeout);
+ }
+ }
+}
+
+static int expireearly_write(struct file *file, const char *buffer, unsigned long length, void *data)
+{
+ char s[8];
+ unsigned long n;
+
+ if ((length > 0) && (length < 6)) {
+ memcpy(s, buffer, length);
+ s[length] = 0;
+ n = simple_strtoul(s, NULL, 10);
+ if (n < 10) n = 10;
+ else if (n > 86400) n = 86400;
+
+ interate_all(expireearly, jiffies + (n * HZ));
+ }
+
+/*
+ if ((length > 0) && (buffer[0] == '1')) {
+ interate_all(expireearly, jiffies + (20 * HZ));
+ }
+*/
+
+ return length;
+}
+
+
+static void clearmarks(struct ip_conntrack *ct, unsigned long data)
+{
+ ct->mark = 0;
+}
+
+static int clearmarks_write(struct file *file, const char *buffer, unsigned long length, void *data)
+{
+ if ((length > 0) && (buffer[0] == '1')) {
+ interate_all(clearmarks, 0);
+ }
+ return length;
+}
+
+static int __init init(void)
+{
+ struct proc_dir_entry *p;
+
+ printk(__FILE__ " [" __DATE__ " " __TIME__ "]\n");
+
+#ifdef TEST_HASHDIST
+ p = create_proc_entry("hash_dist", 0400, proc_net);
+ if (p) p->read_proc = hashdist_read;
+#endif
+
+ p = create_proc_entry("expire_early", 0200, proc_net);
+ if (p) p->write_proc = expireearly_write;
+
+ p = create_proc_entry("clear_marks", 0200, proc_net);
+ if (p) p->write_proc = clearmarks_write;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+#ifdef TEST_HASHDIST
+ remove_proc_entry("hash_dist", proc_net);
+#endif
+ remove_proc_entry("expire_early", proc_net);
+ remove_proc_entry("clear_marks", proc_net);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv4/route.c b/release/src/linux/linux/net/ipv4/route.c
index dfae0871..f3cf20df 100644
--- a/release/src/linux/linux/net/ipv4/route.c
+++ b/release/src/linux/linux/net/ipv4/route.c
@@ -2465,6 +2465,7 @@ void __init ip_rt_init(void)
panic("IP: failed to allocate ip_dst_cache\n");
goal = num_physpages >> (26 - PAGE_SHIFT);
+// goal = num_physpages >> (21 - PAGE_SHIFT);
for (order = 0; (1UL << order) < goal; order++)
/* NOTHING */;
@@ -2494,9 +2495,18 @@ void __init ip_rt_init(void)
rt_hash_table[i].chain = NULL;
}
+// ip_rt_max_size = (rt_hash_mask + 1) * 2;
+// ipv4_dst_ops.gc_thresh = (ip_rt_max_size / 4);
+
ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
ip_rt_max_size = (rt_hash_mask + 1) * 16;
+// printk("gc_thresh=%d\n", ipv4_dst_ops.gc_thresh);
+// printk("ip_rt_max_size=%d\n", ip_rt_max_size);
+// printk("rt_hash_mask=%d\n", rt_hash_mask);
+// printk("goal=%d\n", goal);
+
+
devinet_init();
ip_fib_init();
diff --git a/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c b/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c
index 1f4081a9..7fe16445 100644
--- a/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c
+++ b/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c
@@ -221,6 +221,18 @@ ctl_table ipv4_table[] = {
&sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_TW_REUSE, "tcp_tw_reuse",
&sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_TCP_VEGAS, "tcp_vegas_cong_avoid",
+ &sysctl_tcp_vegas_cong_avoid, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_TCP_VEGAS_ALPHA, "tcp_vegas_alpha",
+ &sysctl_tcp_vegas_alpha, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_TCP_VEGAS_BETA, "tcp_vegas_beta",
+ &sysctl_tcp_vegas_beta, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_TCP_VEGAS_GAMMA, "tcp_vegas_gamma",
+ &sysctl_tcp_vegas_gamma, sizeof(int), 0644, NULL,
+ &proc_dointvec},
{0}
};
diff --git a/release/src/linux/linux/net/ipv4/tcp_input.c b/release/src/linux/linux/net/ipv4/tcp_input.c
index 8c99dd52..243e2991 100644
--- a/release/src/linux/linux/net/ipv4/tcp_input.c
+++ b/release/src/linux/linux/net/ipv4/tcp_input.c
@@ -87,6 +87,16 @@ int sysctl_tcp_stdurg = 0;
int sysctl_tcp_rfc1337 = 0;
int sysctl_tcp_max_orphans = NR_FILE;
+int sysctl_tcp_vegas_cong_avoid = 0;
+
+/* Default values of the Vegas variables, in fixed-point representation
+ * with V_PARAM_SHIFT bits to the right of the binary point.
+ */
+#define V_PARAM_SHIFT 1
+int sysctl_tcp_vegas_alpha = 1<<V_PARAM_SHIFT;
+int sysctl_tcp_vegas_beta = 3<<V_PARAM_SHIFT;
+int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT;
+
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
@@ -399,6 +409,42 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b
tcp_grow_window(sk, tp, skb);
}
+/* Set up a new TCP connection, depending on whether it should be
+ * using Vegas or not.
+ */
+void tcp_vegas_init(struct tcp_opt *tp)
+{
+ if (sysctl_tcp_vegas_cong_avoid) {
+ tp->vegas.do_vegas = 1;
+ tp->vegas.baseRTT = 0x7fffffff;
+ tcp_vegas_enable(tp);
+ } else
+ tcp_vegas_disable(tp);
+}
+
+/* Do RTT sampling needed for Vegas.
+ * Basically we:
+ * o min-filter RTT samples from within an RTT to get the current
+ * propagation delay + queuing delay (we are min-filtering to try to
+ * avoid the effects of delayed ACKs)
+ * o min-filter RTT samples from a much longer window (forever for now)
+ * to find the propagation delay (baseRTT)
+ */
+static inline void vegas_rtt_calc(struct tcp_opt *tp, __u32 rtt)
+{
+ __u32 vrtt = rtt + 1; /* Never allow zero rtt or baseRTT */
+
+ /* Filter to find propagation delay: */
+ if (vrtt < tp->vegas.baseRTT)
+ tp->vegas.baseRTT = vrtt;
+
+ /* Find the min RTT during the last RTT to find
+ * the current prop. delay + queuing delay:
+ */
+ tp->vegas.minRTT = min(tp->vegas.minRTT, vrtt);
+ tp->vegas.cntRTT++;
+}
+
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge
@@ -412,6 +458,9 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
{
long m = mrtt; /* RTT */
+ if (tcp_vegas_enabled(tp))
+ vegas_rtt_calc(tp, mrtt);
+
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
* are scaled versions of rtt and mean deviation.
@@ -1013,7 +1062,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_sync_left_out(tp);
tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering);
- tp->ca_state = TCP_CA_Loss;
+ tcp_set_ca_state(tp, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
}
@@ -1375,7 +1424,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp)
tcp_moderate_cwnd(tp);
return 1;
}
- tp->ca_state = TCP_CA_Open;
+ tcp_set_ca_state(tp, TCP_CA_Open);
return 0;
}
@@ -1435,7 +1484,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
tp->retransmits = 0;
tp->undo_marker = 0;
if (!IsReno(tp))
- tp->ca_state = TCP_CA_Open;
+ tcp_set_ca_state(tp, TCP_CA_Open);
return 1;
}
return 0;
@@ -1466,7 +1515,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
state = TCP_CA_Disorder;
if (tp->ca_state != state) {
- tp->ca_state = state;
+ tcp_set_ca_state(tp, state);
tp->high_seq = tp->snd_nxt;
}
tcp_moderate_cwnd(tp);
@@ -1540,7 +1589,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* is ACKed for CWR bit to reach receiver. */
if (tp->snd_una != tp->high_seq) {
tcp_complete_cwr(tp);
- tp->ca_state = TCP_CA_Open;
+ tcp_set_ca_state(tp, TCP_CA_Open);
}
break;
@@ -1551,7 +1600,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* catching for all duplicate ACKs. */
IsReno(tp) || tp->snd_una != tp->high_seq) {
tp->undo_marker = 0;
- tp->ca_state = TCP_CA_Open;
+ tcp_set_ca_state(tp, TCP_CA_Open);
}
break;
@@ -1625,7 +1674,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
}
tp->snd_cwnd_cnt = 0;
- tp->ca_state = TCP_CA_Recovery;
+ tcp_set_ca_state(tp, TCP_CA_Recovery);
}
if (is_dupack || tcp_head_timedout(sk, tp))
@@ -1696,7 +1745,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt)
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
+static __inline__ void reno_cong_avoid(struct tcp_opt *tp)
{
if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* In "safe" area, increase. */
@@ -1716,6 +1765,236 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
+/* This is based on the congestion detection/avoidance scheme described in
+ * Lawrence S. Brakmo and Larry L. Peterson.
+ * "TCP Vegas: End to end congestion avoidance on a global internet."
+ * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
+ * October 1995. Available from:
+ * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
+ *
+ * See http://www.cs.arizona.edu/xkernel/ for their implementation.
+ * The main aspects that distinguish this implementation from the
+ * Arizona Vegas implementation are:
+ * o We do not change the loss detection or recovery mechanisms of
+ * Linux in any way. Linux already recovers from losses quite well,
+ * using fine-grained timers, NewReno, and FACK.
+ * o To avoid the performance penalty imposed by increasing cwnd
+ * only every-other RTT during slow start, we increase during
+ * every RTT during slow start, just like Reno.
+ * o Largely to allow continuous cwnd growth during slow start,
+ * we use the rate at which ACKs come back as the "actual"
+ * rate, rather than the rate at which data is sent.
+ * o To speed convergence to the right rate, we set the cwnd
+ * to achieve the right ("actual") rate when we exit slow start.
+ * o To filter out the noise caused by delayed ACKs, we use the
+ * minimum RTT sample observed during the last RTT to calculate
+ * the actual rate.
+ * o When the sender re-starts from idle, it waits until it has
+ * received ACKs for an entire flight of new data before making
+ * a cwnd adjustment decision. The original Vegas implementation
+ * assumed senders never went idle.
+ */
+static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
+{
+ /* The key players are v_beg_snd_una and v_beg_snd_nxt.
+ *
+ * These are so named because they represent the approximate values
+ * of snd_una and snd_nxt at the beginning of the current RTT. More
+ * precisely, they represent the amount of data sent during the RTT.
+ * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+ * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
+ * bytes of data have been ACKed during the course of the RTT, giving
+ * an "actual" rate of:
+ *
+ * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
+ *
+ * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
+ * because delayed ACKs can cover more than one segment, so they
+ * don't line up nicely with the boundaries of RTTs.
+ *
+ * Another unfortunate fact of life is that delayed ACKs delay the
+ * advance of the left edge of our send window, so that the number
+ * of bytes we send in an RTT is often less than our cwnd will allow.
+ * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+ */
+
+ if (after(ack, tp->vegas.beg_snd_nxt)) {
+ /* Do the Vegas once-per-RTT cwnd adjustment. */
+ u32 old_wnd, old_snd_cwnd;
+
+
+ /* Here old_wnd is essentially the window of data that was
+ * sent during the previous RTT, and has all
+ * been acknowledged in the course of the RTT that ended
+ * with the ACK we just received. Likewise, old_snd_cwnd
+ * is the cwnd during the previous RTT.
+ */
+ old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
+ tp->mss_cache;
+ old_snd_cwnd = tp->vegas.beg_snd_cwnd;
+
+ /* Save the extent of the current window so we can use this
+ * at the end of the next RTT.
+ */
+ tp->vegas.beg_snd_una = tp->vegas.beg_snd_nxt;
+ tp->vegas.beg_snd_nxt = tp->snd_nxt;
+ tp->vegas.beg_snd_cwnd = tp->snd_cwnd;
+
+ /* Take into account the current RTT sample too, to
+ * decrease the impact of delayed acks. This double counts
+ * this sample since we count it for the next window as well,
+ * but that's not too awful, since we're taking the min,
+ * rather than averaging.
+ */
+ vegas_rtt_calc(tp, seq_rtt);
+
+ /* We do the Vegas calculations only if we got enough RTT
+ * samples that we can be reasonably sure that we got
+ * at least one RTT sample that wasn't from a delayed ACK.
+ * If we only had 2 samples total,
+ * then that means we're getting only 1 ACK per RTT, which
+ * means they're almost certainly delayed ACKs.
+ * If we have 3 samples, we should be OK.
+ */
+
+ if (tp->vegas.cntRTT <= 2) {
+ /* We don't have enough RTT samples to do the Vegas
+ * calculation, so we'll behave like Reno.
+ */
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd++;
+ } else {
+ u32 rtt, target_cwnd, diff;
+
+ /* We have enough RTT samples, so, using the Vegas
+ * algorithm, we determine if we should increase or
+ * decrease cwnd, and by how much.
+ */
+
+ /* Pluck out the RTT we are using for the Vegas
+ * calculations. This is the min RTT seen during the
+ * last RTT. Taking the min filters out the effects
+ * of delayed ACKs, at the cost of noticing congestion
+ * a bit later.
+ */
+ rtt = tp->vegas.minRTT;
+
+ /* Calculate the cwnd we should have, if we weren't
+ * going too fast.
+ *
+ * This is:
+ * (actual rate in segments) * baseRTT
+ * We keep it as a fixed point number with
+ * V_PARAM_SHIFT bits to the right of the binary point.
+ */
+ target_cwnd = ((old_wnd * tp->vegas.baseRTT)
+ << V_PARAM_SHIFT) / rtt;
+
+ /* Calculate the difference between the window we had,
+ * and the window we would like to have. This quantity
+ * is the "Diff" from the Arizona Vegas papers.
+ *
+ * Again, this is a fixed point number with
+ * V_PARAM_SHIFT bits to the right of the binary
+ * point.
+ */
+ diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
+
+ if (tp->snd_cwnd < tp->snd_ssthresh) {
+ /* Slow start. */
+ if (diff > sysctl_tcp_vegas_gamma) {
+ /* Going too fast. Time to slow down
+ * and switch to congestion avoidance.
+ */
+ tp->snd_ssthresh = 2;
+
+ /* Set cwnd to match the actual rate
+ * exactly:
+ * cwnd = (actual rate) * baseRTT
+ * Then we add 1 because the integer
+ * truncation robs us of full link
+ * utilization.
+ */
+ tp->snd_cwnd = min(tp->snd_cwnd,
+ (target_cwnd >>
+ V_PARAM_SHIFT)+1);
+
+ }
+ } else {
+ /* Congestion avoidance. */
+ u32 next_snd_cwnd;
+
+ /* Figure out where we would like cwnd
+ * to be.
+ */
+ if (diff > sysctl_tcp_vegas_beta) {
+ /* The old window was too fast, so
+ * we slow down.
+ */
+ next_snd_cwnd = old_snd_cwnd - 1;
+ } else if (diff < sysctl_tcp_vegas_alpha) {
+ /* We don't have enough extra packets
+ * in the network, so speed up.
+ */
+ next_snd_cwnd = old_snd_cwnd + 1;
+ } else {
+ /* Sending just as fast as we
+ * should be.
+ */
+ next_snd_cwnd = old_snd_cwnd;
+ }
+
+ /* Adjust cwnd upward or downward, toward the
+ * desired value.
+ */
+ if (next_snd_cwnd > tp->snd_cwnd)
+ tp->snd_cwnd++;
+ else if (next_snd_cwnd < tp->snd_cwnd)
+ tp->snd_cwnd--;
+ }
+ }
+
+ /* Wipe the slate clean for the next RTT. */
+ tp->vegas.cntRTT = 0;
+ tp->vegas.minRTT = 0x7fffffff;
+ }
+
+ /* The following code is executed for every ack we receive,
+ * except for conditions checked in should_advance_cwnd()
+ * before the call to tcp_cong_avoid(). Mainly this means that
+ * we only execute this code if the ack actually acked some
+ * data.
+ */
+
+ /* If we are in slow start, increase our cwnd in response to this ACK.
+ * (If we are not in slow start then we are in congestion avoidance,
+ * and adjust our congestion window only once per RTT. See the code
+ * above.)
+ */
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tp->snd_cwnd++;
+
+ /* to keep cwnd from growing without bound */
+ tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+
+ /* Make sure that we are never so timid as to reduce our cwnd below
+ * 2 MSS.
+ *
+ * Going below 2 MSS would risk huge delayed ACKs from our receiver.
+ */
+ tp->snd_cwnd = max(tp->snd_cwnd, 2U);
+
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
+{
+ if (tcp_vegas_enabled(tp))
+ vegas_cong_avoid(tp, ack, seq_rtt);
+ else
+ reno_cong_avoid(tp);
+}
+
/* Restart timer after forward progress on connection.
* RFC2988 recommends to restart timer to now+rto.
*/
@@ -1730,7 +2009,7 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
}
/* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct sk_buff *skb;
@@ -1813,6 +2092,7 @@ static int tcp_clean_rtx_queue(struct sock *sk)
}
}
#endif
+ *seq_rtt_p = seq_rtt;
return acked;
}
@@ -1900,6 +2180,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
u32 prior_in_flight;
+ s32 seq_rtt;
int prior_packets;
/* If the ack is newer than sent or older than previous acks
@@ -1947,17 +2228,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
prior_in_flight = tcp_packets_in_flight(tp);
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk);
+ flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tcp_ack_is_dubious(tp, flag)) {
/* Advanve CWND, if state allows this. */
- if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd &&
+ if ((flag&FLAG_DATA_ACKED) &&
+ (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd) &&
tcp_may_raise_cwnd(tp, flag))
- tcp_cong_avoid(tp);
+ tcp_cong_avoid(tp, ack, seq_rtt);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
- if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd)
- tcp_cong_avoid(tp);
+ if ((flag & FLAG_DATA_ACKED) &&
+ (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd))
+ tcp_cong_avoid(tp, ack, seq_rtt);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
diff --git a/release/src/linux/linux/net/ipv4/tcp_minisocks.c b/release/src/linux/linux/net/ipv4/tcp_minisocks.c
index b69cc32c..6fdb7681 100644
--- a/release/src/linux/linux/net/ipv4/tcp_minisocks.c
+++ b/release/src/linux/linux/net/ipv4/tcp_minisocks.c
@@ -715,7 +715,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0;
- newtp->ca_state = TCP_CA_Open;
+ tcp_set_ca_state(newtp, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
newtp->send_head = NULL;
@@ -783,6 +783,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
+ tcp_vegas_init(newtp);
TCP_INC_STATS_BH(TcpPassiveOpens);
}
return newsk;
diff --git a/release/src/linux/linux/net/ipv4/tcp_output.c b/release/src/linux/linux/net/ipv4/tcp_output.c
index 35cbbbf7..3fd4871f 100644
--- a/release/src/linux/linux/net/ipv4/tcp_output.c
+++ b/release/src/linux/linux/net/ipv4/tcp_output.c
@@ -105,6 +105,9 @@ static void tcp_cwnd_restart(struct tcp_opt *tp)
u32 restart_cwnd = tcp_init_cwnd(tp);
u32 cwnd = tp->snd_cwnd;
+ if (tcp_is_vegas(tp))
+ tcp_vegas_enable(tp);
+
tp->snd_ssthresh = tcp_current_ssthresh(tp);
restart_cwnd = min(restart_cwnd, cwnd);
@@ -223,6 +226,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
(tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
}
+
+ /*
+ * If the connection is idle and we are restarting,
+ * then we don't want to do any Vegas calculations
+ * until we get fresh RTT samples. So when we
+ * restart, we reset our Vegas state to a clean
+ * slate. After we get acks for this flight of
+ * packets, _then_ we can make Vegas calculations
+ * again.
+ */
+ if (tcp_is_vegas(tp) && tcp_packets_in_flight(tp) == 0)
+ tcp_vegas_enable(tp);
+
th = (struct tcphdr *) skb_push(skb, tcp_header_size);
skb->h.th = th;
skb_set_owner_w(skb, sk);
@@ -800,7 +816,7 @@ void tcp_simple_retransmit(struct sock *sk)
tp->snd_ssthresh = tcp_current_ssthresh(tp);
tp->prior_ssthresh = 0;
tp->undo_marker = 0;
- tp->ca_state = TCP_CA_Loss;
+ tcp_set_ca_state(tp, TCP_CA_Loss);
}
tcp_xmit_retransmit_queue(sk);
}
@@ -1181,6 +1197,7 @@ static inline void tcp_connect_init(struct sock *sk)
tp->window_clamp = dst->window;
tp->advmss = dst->advmss;
tcp_initialize_rcv_mss(sk);
+ tcp_vegas_init(tp);
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
@@ -1231,6 +1248,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
+ tcp_vegas_init(tp);
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
diff --git a/release/src/linux/linux/net/ipv6/netfilter/Config.in b/release/src/linux/linux/net/ipv6/netfilter/Config.in
index 062ed247..5d2dac2f 100644
--- a/release/src/linux/linux/net/ipv6/netfilter/Config.in
+++ b/release/src/linux/linux/net/ipv6/netfilter/Config.in
@@ -17,6 +17,7 @@ tristate 'IP6 tables support (required for filtering/masq/NAT)' CONFIG_IP6_NF_IP
if [ "$CONFIG_IP6_NF_IPTABLES" != "n" ]; then
# The simple matches.
dep_tristate ' limit match support' CONFIG_IP6_NF_MATCH_LIMIT $CONFIG_IP6_NF_IPTABLES
+ dep_tristate ' condition match support' CONFIG_IP6_NF_MATCH_CONDITION $CONFIG_IP6_NF_IPTABLES
dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
@@ -55,6 +56,9 @@ if [ "$CONFIG_IP6_NF_IPTABLES" != "n" ]; then
if [ "$CONFIG_IP6_NF_MANGLE" != "n" ]; then
# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
dep_tristate ' MARK target support' CONFIG_IP6_NF_TARGET_MARK $CONFIG_IP6_NF_MANGLE
+ dep_tristate ' ROUTE target support' CONFIG_IP6_NF_TARGET_ROUTE $CONFIG_IP6_NF_MANGLE
+
+ dep_tristate ' IMQ target support' CONFIG_IP6_NF_TARGET_IMQ $CONFIG_IP6_NF_MANGLE
fi
#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
fi
diff --git a/release/src/linux/linux/net/ipv6/netfilter/Makefile b/release/src/linux/linux/net/ipv6/netfilter/Makefile
index dfd36a89..2bd664f4 100644
--- a/release/src/linux/linux/net/ipv6/netfilter/Makefile
+++ b/release/src/linux/linux/net/ipv6/netfilter/Makefile
@@ -14,6 +14,7 @@ export-objs := ip6_tables.o
# Link order matters here.
obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
+obj-$(CONFIG_IP6_NF_MATCH_CONDITION) += ip6t_condition.o
obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
@@ -23,6 +24,8 @@ obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
+obj-$(CONFIG_IP6_NF_TARGET_ROUTE) += ip6t_ROUTE.o
+obj-$(CONFIG_IP6_NF_TARGET_IMQ) += ip6t_IMQ.o
obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
diff --git a/release/src/linux/linux/net/ipv6/netfilter/ip6_tables.c b/release/src/linux/linux/net/ipv6/netfilter/ip6_tables.c
index b521af78..8eac7586 100644
--- a/release/src/linux/linux/net/ipv6/netfilter/ip6_tables.c
+++ b/release/src/linux/linux/net/ipv6/netfilter/ip6_tables.c
@@ -1241,13 +1241,7 @@ do_add_counters(void *user, unsigned int len)
goto free;
write_lock_bh(&t->lock);
- /*************************************
- * modify by tanghui @ 2006-10-11
- * for a RACE CONDITION in the "do_add_counters()" function
- *************************************/
if (t->private->number != paddc->num_counters) {
- if (t->private->number != tmp.num_counters) {
- /*************************************/
ret = -EINVAL;
goto unlock_up_free;
}
diff --git a/release/src/linux/linux/net/ipv6/netfilter/ip6t_IMQ.c b/release/src/linux/linux/net/ipv6/netfilter/ip6t_IMQ.c
new file mode 100644
index 00000000..760d7447
--- /dev/null
+++ b/release/src/linux/linux/net/ipv6/netfilter/ip6t_IMQ.c
@@ -0,0 +1,78 @@
+/* This target marks packets to be enqueued to an imq device */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_IMQ.h>
+#include <linux/imq.h>
+
+static unsigned int imq_target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ struct ip6t_imq_info *mr = (struct ip6t_imq_info*)targinfo;
+
+ (*pskb)->imq_flags = mr->todev | IMQ_F_ENQUEUE;
+ (*pskb)->nfcache |= NFC_ALTERED;
+
+ return IP6T_CONTINUE;
+}
+
+static int imq_checkentry(const char *tablename,
+ const struct ip6t_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ struct ip6t_imq_info *mr;
+
+ if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_imq_info))) {
+ printk(KERN_WARNING "IMQ: invalid targinfosize\n");
+ return 0;
+ }
+ mr = (struct ip6t_imq_info*)targinfo;
+
+ if (strcmp(tablename, "mangle") != 0) {
+ printk(KERN_WARNING
+ "IMQ: IMQ can only be called from \"mangle\" table, not \"%s\"\n",
+ tablename);
+ return 0;
+ }
+
+ if (mr->todev > IMQ_MAX_DEVS) {
+ printk(KERN_WARNING
+ "IMQ: invalid device specified, highest is %u\n",
+ IMQ_MAX_DEVS);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct ip6t_target ip6t_imq_reg = {
+ { NULL, NULL},
+ "IMQ",
+ imq_target,
+ imq_checkentry,
+ NULL,
+ THIS_MODULE
+};
+
+static int __init init(void)
+{
+ if (ip6t_register_target(&ip6t_imq_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __exit fini(void)
+{
+ ip6t_unregister_target(&ip6t_imq_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv6/netfilter/ip6t_ROUTE.c b/release/src/linux/linux/net/ipv6/netfilter/ip6t_ROUTE.c
new file mode 100644
index 00000000..bb6d11f8
--- /dev/null
+++ b/release/src/linux/linux/net/ipv6/netfilter/ip6t_ROUTE.c
@@ -0,0 +1,308 @@
+/*
+ * This implements the ROUTE v6 target, which enables you to setup unusual
+ * routes not supported by the standard kernel routing table.
+ *
+ * Copyright (C) 2003 Cedric de Launois <delaunois@info.ucl.ac.be>
+ *
+ * v 1.1 2004/11/23
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_ROUTE.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <linux/icmpv6.h>
+
+#if 1
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define NIP6(addr) \
+ ntohs((addr).s6_addr16[0]), \
+ ntohs((addr).s6_addr16[1]), \
+ ntohs((addr).s6_addr16[2]), \
+ ntohs((addr).s6_addr16[3]), \
+ ntohs((addr).s6_addr16[4]), \
+ ntohs((addr).s6_addr16[5]), \
+ ntohs((addr).s6_addr16[6]), \
+ ntohs((addr).s6_addr16[7])
+
+/* Route the packet according to the routing keys specified in
+ * route_info. Keys are :
+ * - ifindex :
+ * 0 if no oif preferred,
+ * otherwise set to the index of the desired oif
+ * - route_info->gw :
+ * 0 if no gateway specified,
+ * otherwise set to the next host to which the pkt must be routed
+ * If success, skb->dev is the output device to which the packet must
+ * be sent and skb->dst is not NULL
+ *
+ * RETURN: 1 if the packet was succesfully routed to the
+ * destination desired
+ * 0 if the kernel routing table could not route the packet
+ * according to the keys specified
+ */
+static int
+route6(struct sk_buff *skb,
+ unsigned int ifindex,
+ const struct ip6t_route_target_info *route_info)
+{
+ struct rt6_info *rt = NULL;
+ struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+ struct in6_addr *gw = (struct in6_addr*)&route_info->gw;
+
+ DEBUGP("ip6t_ROUTE: called with: ");
+ DEBUGP("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ipv6h->daddr));
+ DEBUGP("GATEWAY=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(*gw));
+ DEBUGP("OUT=%s\n", route_info->oif);
+
+ if (ipv6_addr_any(gw))
+ rt = rt6_lookup(&ipv6h->daddr, &ipv6h->saddr, ifindex, 1);
+ else
+ rt = rt6_lookup(gw, &ipv6h->saddr, ifindex, 1);
+
+ if (!rt)
+ goto no_route;
+
+ DEBUGP("ip6t_ROUTE: routing gives: ");
+ DEBUGP("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(rt->rt6i_dst.addr));
+ DEBUGP("GATEWAY=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(rt->rt6i_gateway));
+ DEBUGP("OUT=%s\n", rt->rt6i_dev->name);
+
+ if (ifindex && rt->rt6i_dev->ifindex!=ifindex)
+ goto wrong_route;
+
+ if (!rt->rt6i_nexthop) {
+ DEBUGP("ip6t_ROUTE: discovering neighbour\n");
+ rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_dst.addr);
+ }
+
+ /* Drop old route. */
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
+ skb->dev = rt->rt6i_dev;
+ return 1;
+
+ wrong_route:
+ dst_release(&rt->u.dst);
+ no_route:
+ if (!net_ratelimit())
+ return 0;
+
+ printk("ip6t_ROUTE: no explicit route found ");
+ if (ifindex)
+ printk("via interface %s ", route_info->oif);
+ if (!ipv6_addr_any(gw))
+ printk("via gateway %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", NIP6(*gw));
+ printk("\n");
+ return 0;
+}
+
+
+/* Stolen from ip6_output_finish
+ * PRE : skb->dev is set to the device we are leaving by
+ * skb->dst is not NULL
+ * POST: the packet is sent with the link layer header pushed
+ * the packet is destroyed
+ */
+static void ip_direct_send(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct hh_cache *hh = dst->hh;
+
+ if (hh) {
+ read_lock_bh(&hh->hh_lock);
+ memcpy(skb->data - 16, hh->hh_data, 16);
+ read_unlock_bh(&hh->hh_lock);
+ skb_push(skb, hh->hh_len);
+ hh->hh_output(skb);
+ } else if (dst->neighbour)
+ dst->neighbour->output(skb);
+ else {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ip6t_ROUTE: no hdr & no neighbour cache!\n");
+ kfree_skb(skb);
+ }
+}
+
+
+static unsigned int
+route6_oif(const struct ip6t_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ unsigned int ifindex = 0;
+ struct net_device *dev_out = NULL;
+
+ /* The user set the interface name to use.
+ * Getting the current interface index.
+ */
+ if ((dev_out = dev_get_by_name(route_info->oif))) {
+ ifindex = dev_out->ifindex;
+ } else {
+ /* Unknown interface name : packet dropped */
+ if (net_ratelimit())
+ DEBUGP("ip6t_ROUTE: oif interface %s not found\n", route_info->oif);
+
+ if (route_info->flags & IP6T_ROUTE_CONTINUE)
+ return IP6T_CONTINUE;
+ else
+ return NF_DROP;
+ }
+
+ /* Trying the standard way of routing packets */
+ if (route6(skb, ifindex, route_info)) {
+ dev_put(dev_out);
+ if (route_info->flags & IP6T_ROUTE_CONTINUE)
+ return IP6T_CONTINUE;
+
+ ip_direct_send(skb);
+ return NF_STOLEN;
+ } else
+ return NF_DROP;
+}
+
+
+static unsigned int
+route6_gw(const struct ip6t_route_target_info *route_info,
+ struct sk_buff *skb)
+{
+ if (route6(skb, 0, route_info)) {
+ if (route_info->flags & IP6T_ROUTE_CONTINUE)
+ return IP6T_CONTINUE;
+
+ ip_direct_send(skb);
+ return NF_STOLEN;
+ } else
+ return NF_DROP;
+}
+
+
+static unsigned int
+ip6t_route_target(struct sk_buff **pskb,
+ unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *targinfo,
+ void *userinfo)
+{
+ const struct ip6t_route_target_info *route_info = targinfo;
+ struct sk_buff *skb = *pskb;
+ struct in6_addr *gw = (struct in6_addr*)&route_info->gw;
+ unsigned int res;
+
+ if (route_info->flags & IP6T_ROUTE_CONTINUE)
+ goto do_it;
+
+ /* If we are at PREROUTING or INPUT hook
+ * the TTL isn't decreased by the IP stack
+ */
+ if (hooknum == NF_IP6_PRE_ROUTING ||
+ hooknum == NF_IP6_LOCAL_IN) {
+
+ struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+
+ if (ipv6h->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = skb->dst->dev;
+
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+ ICMPV6_EXC_HOPLIMIT, 0, skb->dev);
+
+ return NF_DROP;
+ }
+
+ ipv6h->hop_limit--;
+ }
+
+ if ((route_info->flags & IP6T_ROUTE_TEE)) {
+ /*
+ * Copy the *pskb, and route the copy. Will later return
+ * IP6T_CONTINUE for the original skb, which should continue
+ * on its way as if nothing happened. The copy should be
+ * independantly delivered to the ROUTE --gw.
+ */
+ skb = skb_copy(*pskb, GFP_ATOMIC);
+ if (!skb) {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ip6t_ROUTE: copy failed!\n");
+ return IP6T_CONTINUE;
+ }
+ }
+
+do_it:
+ if (route_info->oif[0]) {
+ res = route6_oif(route_info, skb);
+ } else if (!ipv6_addr_any(gw)) {
+ res = route6_gw(route_info, skb);
+ } else {
+ if (net_ratelimit())
+ DEBUGP(KERN_DEBUG "ip6t_ROUTE: no parameter !\n");
+ res = IP6T_CONTINUE;
+ }
+
+ if ((route_info->flags & IP6T_ROUTE_TEE))
+ res = IP6T_CONTINUE;
+
+ return res;
+}
+
+
+static int
+ip6t_route_checkentry(const char *tablename,
+ const struct ip6t_entry *e,
+ void *targinfo,
+ unsigned int targinfosize,
+ unsigned int hook_mask)
+{
+ if (strcmp(tablename, "mangle") != 0) {
+ printk("ip6t_ROUTE: can only be called from \"mangle\" table.\n");
+ return 0;
+ }
+
+ if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_route_target_info))) {
+ printk(KERN_WARNING "ip6t_ROUTE: targinfosize %u != %Zu\n",
+ targinfosize,
+ IP6T_ALIGN(sizeof(struct ip6t_route_target_info)));
+ return 0;
+ }
+
+ return 1;
+}
+
+
+static struct ip6t_target ip6t_route_reg = {
+ .name = "ROUTE",
+ .target = ip6t_route_target,
+ .checkentry = ip6t_route_checkentry,
+ .me = THIS_MODULE
+};
+
+
+static int __init init(void)
+{
+ printk(KERN_DEBUG "registering ipv6 ROUTE target\n");
+ if (ip6t_register_target(&ip6t_route_reg))
+ return -EINVAL;
+
+ return 0;
+}
+
+
+static void __exit fini(void)
+{
+ ip6t_unregister_target(&ip6t_route_reg);
+}
+
+module_init(init);
+module_exit(fini);
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/ipv6/netfilter/ip6t_condition.c b/release/src/linux/linux/net/ipv6/netfilter/ip6t_condition.c
new file mode 100644
index 00000000..15d805e6
--- /dev/null
+++ b/release/src/linux/linux/net/ipv6/netfilter/ip6t_condition.c
@@ -0,0 +1,254 @@
+/*-------------------------------------------*\
+| Netfilter Condition Module for IPv6 |
+| |
+| Description: This module allows firewall |
+| rules to match using condition variables |
+| stored in /proc files. |
+| |
+| Author: Stephane Ouellette 2003-02-10 |
+| <ouellettes@videotron.ca> |
+| |
+| This software is distributed under the |
+| terms of the GNU GPL. |
+\*-------------------------------------------*/
+
+#include<linux/module.h>
+#include<linux/proc_fs.h>
+#include<linux/spinlock.h>
+#include<linux/string.h>
+#include<asm/atomic.h>
+#include<linux/netfilter_ipv6/ip6_tables.h>
+#include<linux/netfilter_ipv6/ip6t_condition.h>
+
+
+#ifndef CONFIG_PROC_FS
+#error "Proc file system support is required for this module"
+#endif
+
+
+MODULE_AUTHOR("Stephane Ouellette <ouellettes@videotron.ca>");
+MODULE_DESCRIPTION("Allows rules to match against condition variables");
+MODULE_LICENSE("GPL");
+
+
+struct condition_variable {
+ struct condition_variable *next;
+ struct proc_dir_entry *status_proc;
+ atomic_t refcount;
+ int enabled; /* TRUE == 1, FALSE == 0 */
+};
+
+
+static rwlock_t list_lock;
+static struct condition_variable *head = NULL;
+static struct proc_dir_entry *proc_net_condition = NULL;
+
+
+static int
+ipt_condition_read_info(char *buffer, char **start, off_t offset,
+ int length, int *eof, void *data)
+{
+ struct condition_variable *var =
+ (struct condition_variable *) data;
+
+ if (offset == 0) {
+ *start = buffer;
+ buffer[0] = (var->enabled) ? '1' : '0';
+ buffer[1] = '\n';
+ return 2;
+ }
+
+ *eof = 1;
+ return 0;
+}
+
+
+static int
+ipt_condition_write_info(struct file *file, const char *buffer,
+ unsigned long length, void *data)
+{
+ struct condition_variable *var =
+ (struct condition_variable *) data;
+
+ if (length) {
+ /* Match only on the first character */
+ switch (buffer[0]) {
+ case '0':
+ var->enabled = 0;
+ break;
+ case '1':
+ var->enabled = 1;
+ }
+ }
+
+ return (int) length;
+}
+
+
+static int
+match(const struct sk_buff *skb, const struct net_device *in,
+ const struct net_device *out, const void *matchinfo, int offset,
+ const void *hdr, u_int16_t datalen, int *hotdrop)
+{
+ const struct condition6_info *info =
+ (const struct condition6_info *) matchinfo;
+ struct condition_variable *var;
+ int condition_status = 0;
+
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ condition_status = var->enabled;
+ break;
+ }
+ }
+
+ read_unlock(&list_lock);
+
+ return condition_status ^ info->invert;
+}
+
+
+
+static int
+checkentry(const char *tablename, const struct ip6t_ip6 *ip,
+ void *matchinfo, unsigned int matchsize, unsigned int hook_mask)
+{
+ struct condition6_info *info =
+ (struct condition6_info *) matchinfo;
+ struct condition_variable *var, *newvar;
+
+ if (matchsize != IP6T_ALIGN(sizeof(struct condition6_info)))
+ return 0;
+
+ /* The first step is to check if the condition variable already exists. */
+ /* Here, a read lock is sufficient because we won't change the list */
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ atomic_inc(&var->refcount);
+ read_unlock(&list_lock);
+ return 1;
+ }
+ }
+
+ read_unlock(&list_lock);
+
+ /* At this point, we need to allocate a new condition variable */
+ newvar = kmalloc(sizeof(struct condition_variable), GFP_KERNEL);
+
+ if (!newvar)
+ return -ENOMEM;
+
+ /* Create the condition variable's proc file entry */
+ newvar->status_proc = create_proc_entry(info->name, 0644, proc_net_condition);
+
+ if (!newvar->status_proc) {
+ /*
+ * There are two possibilities:
+ * 1- Another condition variable with the same name has been created, which is valid.
+ * 2- There was a memory allocation error.
+ */
+ kfree(newvar);
+ read_lock(&list_lock);
+
+ for (var = head; var; var = var->next) {
+ if (strcmp(info->name, var->status_proc->name) == 0) {
+ atomic_inc(&var->refcount);
+ read_unlock(&list_lock);
+ return 1;
+ }
+ }
+
+ read_unlock(&list_lock);
+ return -ENOMEM;
+ }
+
+ atomic_set(&newvar->refcount, 1);
+ newvar->enabled = 0;
+ newvar->status_proc->owner = THIS_MODULE;
+ newvar->status_proc->data = newvar;
+ wmb();
+ newvar->status_proc->read_proc = ipt_condition_read_info;
+ newvar->status_proc->write_proc = ipt_condition_write_info;
+
+ write_lock(&list_lock);
+
+ newvar->next = head;
+ head = newvar;
+
+ write_unlock(&list_lock);
+
+ return 1;
+}
+
+
+static void
+destroy(void *matchinfo, unsigned int matchsize)
+{
+ struct condition6_info *info =
+ (struct condition6_info *) matchinfo;
+ struct condition_variable *var, *prev = NULL;
+
+ if (matchsize != IP6T_ALIGN(sizeof(struct condition6_info)))
+ return;
+
+ write_lock(&list_lock);
+
+ for (var = head; var && strcmp(info->name, var->status_proc->name);
+ prev = var, var = var->next);
+
+ if (var && atomic_dec_and_test(&var->refcount)) {
+ if (prev)
+ prev->next = var->next;
+ else
+ head = var->next;
+
+ write_unlock(&list_lock);
+ remove_proc_entry(var->status_proc->name, proc_net_condition);
+ kfree(var);
+ } else
+ write_unlock(&list_lock);
+}
+
+
+static struct ip6t_match condition_match = {
+ .name = "condition",
+ .match = &match,
+ .checkentry = &checkentry,
+ .destroy = &destroy,
+ .me = THIS_MODULE
+};
+
+
+static int __init
+init(void)
+{
+ int errorcode;
+
+ rwlock_init(&list_lock);
+ proc_net_condition = proc_mkdir("ip6t_condition", proc_net);
+
+ if (proc_net_condition) {
+ errorcode = ipt_register_match(&condition_match);
+
+ if (errorcode)
+ remove_proc_entry("ip6t_condition", proc_net);
+ } else
+ errorcode = -EACCES;
+
+ return errorcode;
+}
+
+
+static void __exit
+fini(void)
+{
+ ipt_unregister_match(&condition_match);
+ remove_proc_entry("ip6t_condition", proc_net);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/release/src/linux/linux/net/sched/Config.in b/release/src/linux/linux/net/sched/Config.in
index 8e203456..468fdf2a 100644
--- a/release/src/linux/linux/net/sched/Config.in
+++ b/release/src/linux/linux/net/sched/Config.in
@@ -5,13 +5,14 @@ tristate ' CBQ packet scheduler' CONFIG_NET_SCH_CBQ
tristate ' HTB packet scheduler' CONFIG_NET_SCH_HTB
tristate ' CSZ packet scheduler' CONFIG_NET_SCH_CSZ
#tristate ' H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
-#tristate ' H-FSC packet scheduler' CONFIG_NET_SCH_HFCS
+tristate ' H-FSC packet scheduler' CONFIG_NET_SCH_HFSC
if [ "$CONFIG_ATM" = "y" ]; then
bool ' ATM pseudo-scheduler' CONFIG_NET_SCH_ATM
fi
tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO
tristate ' RED queue' CONFIG_NET_SCH_RED
tristate ' SFQ queue' CONFIG_NET_SCH_SFQ
+tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ
tristate ' TEQL queue' CONFIG_NET_SCH_TEQL
tristate ' TBF queue' CONFIG_NET_SCH_TBF
tristate ' GRED queue' CONFIG_NET_SCH_GRED
diff --git a/release/src/linux/linux/net/sched/Makefile b/release/src/linux/linux/net/sched/Makefile
index e48e5c3e..49cf71e7 100644
--- a/release/src/linux/linux/net/sched/Makefile
+++ b/release/src/linux/linux/net/sched/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
+obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
diff --git a/release/src/linux/linux/net/sched/sch_api.c b/release/src/linux/linux/net/sched/sch_api.c
index a5d8945e..ae384433 100644
--- a/release/src/linux/linux/net/sched/sch_api.c
+++ b/release/src/linux/linux/net/sched/sch_api.c
@@ -1232,6 +1232,9 @@ int __init pktsched_init(void)
#ifdef CONFIG_NET_SCH_SFQ
INIT_QDISC(sfq);
#endif
+#ifdef CONFIG_NET_SCH_ESFQ
+ INIT_QDISC(esfq);
+#endif
#ifdef CONFIG_NET_SCH_TBF
INIT_QDISC(tbf);
#endif
diff --git a/release/src/linux/linux/net/sched/sch_esfq.c b/release/src/linux/linux/net/sched/sch_esfq.c
new file mode 100644
index 00000000..26640f18
--- /dev/null
+++ b/release/src/linux/linux/net/sched/sch_esfq.c
@@ -0,0 +1,652 @@
+/*
+ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes: Alexander Atanasov, <alex@ssi.bg>
+ * Added dynamic depth,limit,divisor,hash_kind options.
+ * Added dst and src hashes.
+ *
+ * Alexander Clouter, <alex@digriz.org.uk>
+ * Ported ESFQ to Linux 2.6.
+ *
+ * Corey Hickey, <bugfood-c@fatooh.org>
+ * Maintenance of the Linux 2.6 port.
+ * Added fwmark hash (thanks to Robert Kurjata)
+ * Added direct hashing for src, dst, and fwmark.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/* Stochastic Fairness Queuing algorithm.
+ For more comments look at sch_sfq.c.
+ The difference is that you can change limit, depth,
+ hash table size and choose 7 hash types.
+
+ classic: same as in sch_sfq.c
+ dst: destination IP address
+ src: source IP address
+ fwmark: netfilter mark value
+ dst_direct:
+ src_direct:
+ fwmark_direct: direct hashing of the above sources
+
+ TODO:
+ make sfq_change work.
+*/
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned int esfq_index;
+
+struct esfq_head
+{
+ esfq_index next;
+ esfq_index prev;
+};
+
+struct esfq_sched_data
+{
+/* Parameters */
+ int perturb_period;
+ unsigned quantum; /* Allotment per round: MUST BE >= MTU */
+ int limit;
+ unsigned depth;
+ unsigned hash_divisor;
+ unsigned hash_kind;
+/* Variables */
+ struct timer_list perturb_timer;
+ int perturbation;
+ esfq_index tail; /* Index of current slot in round */
+ esfq_index max_depth; /* Maximal depth */
+
+ esfq_index *ht; /* Hash table */
+ esfq_index *next; /* Active slots link */
+ short *allot; /* Current allotment per slot */
+ unsigned short *hash; /* Hash value indexed by slots */
+ struct sk_buff_head *qs; /* Slot queue */
+ struct esfq_head *dep; /* Linked list of slots, indexed by depth */
+ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */
+ unsigned dyn_max; /* maximum value seen */
+ unsigned dyn_range; /* saved range */
+};
+
+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
+{
+ int pert = q->perturbation;
+
+ if (pert)
+ h = (h<<pert) ^ (h>>(0x1F - pert));
+
+ h = ntohl(h) * 2654435761UL;
+ return h & (q->hash_divisor-1);
+}
+
+/* Hash input values directly into the "nearest" slot, taking into account the
+ * range of input values seen. This is most useful when the hash table is at
+ * least as large as the range of possible values. */
+static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h)
+{
+ /* adjust minimum and maximum */
+ if (h < q->dyn_min || h > q->dyn_max) {
+ q->dyn_min = h < q->dyn_min ? h : q->dyn_min;
+ q->dyn_max = h > q->dyn_max ? h : q->dyn_max;
+
+ /* find new range */
+ if ((q->dyn_range = q->dyn_max - q->dyn_min) >= q->hash_divisor)
+ printk(KERN_WARNING "ESFQ: (direct hash) Input range %u is larger than hash "
+ "table. See ESFQ README for details.\n", q->dyn_range);
+ }
+
+ /* hash input values into slot numbers */
+ if (q->dyn_min == q->dyn_max)
+ return 0; /* only one value seen; avoid division by 0 */
+ else
+ return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range;
+}
+
+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
+{
+ int pert = q->perturbation;
+
+ /* Have we any rotation primitives? If not, WHY? */
+ h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+ h ^= h>>10;
+ return h & (q->hash_divisor-1);
+}
+
+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
+{
+ u32 h, h2;
+ u32 hs;
+ u32 nfm;
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ {
+ struct iphdr *iph = skb->nh.iph;
+ h = iph->daddr;
+ hs = iph->saddr;
+ nfm = skb->nfmark;
+ h2 = hs^iph->protocol;
+ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+ (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_SCTP ||
+ iph->protocol == IPPROTO_DCCP ||
+ iph->protocol == IPPROTO_ESP))
+ h2 ^= *(((u32*)iph) + iph->ihl);
+ break;
+ }
+ case __constant_htons(ETH_P_IPV6):
+ {
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ h = iph->daddr.s6_addr32[3];
+ hs = iph->saddr.s6_addr32[3];
+ nfm = skb->nfmark;
+ h2 = hs^iph->nexthdr;
+ if (iph->nexthdr == IPPROTO_TCP ||
+ iph->nexthdr == IPPROTO_UDP ||
+ iph->nexthdr == IPPROTO_SCTP ||
+ iph->nexthdr == IPPROTO_DCCP ||
+ iph->nexthdr == IPPROTO_ESP)
+ h2 ^= *(u32*)&iph[1];
+ break;
+ }
+ default:
+ h = (u32)(unsigned long)skb->dst;
+ hs = (u32)(unsigned long)skb->sk;
+ nfm = skb->nfmark;
+ h2 = hs^skb->protocol;
+ }
+ switch(q->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
+ return esfq_fold_hash_classic(q, h, h2);
+ case TCA_SFQ_HASH_DST:
+ return esfq_hash_u32(q,h);
+ case TCA_SFQ_HASH_DSTDIR:
+ return esfq_hash_direct(q, ntohl(h));
+ case TCA_SFQ_HASH_SRC:
+ return esfq_hash_u32(q,hs);
+ case TCA_SFQ_HASH_SRCDIR:
+ return esfq_hash_direct(q, ntohl(hs));
+#ifdef CONFIG_NETFILTER
+ case TCA_SFQ_HASH_FWMARK:
+ return esfq_hash_u32(q,nfm);
+ case TCA_SFQ_HASH_FWMARKDIR:
+ return esfq_hash_direct(q,nfm);
+#endif
+ default:
+ if (net_ratelimit())
+ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
+ }
+ return esfq_fold_hash_classic(q, h, h2);
+}
+
+static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d = q->qs[x].qlen + q->depth;
+
+ p = d;
+ n = q->dep[d].next;
+ q->dep[x].next = n;
+ q->dep[x].prev = p;
+ q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+
+ if (n == p && q->max_depth == q->qs[x].qlen + 1)
+ q->max_depth--;
+
+ esfq_link(q, x);
+}
+
+static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+ d = q->qs[x].qlen;
+ if (q->max_depth < d)
+ q->max_depth = d;
+
+ esfq_link(q, x);
+}
+
+static int esfq_drop(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ esfq_index d = q->max_depth;
+ struct sk_buff *skb;
+ unsigned int len;
+
+ /* Queue is full! Find the longest slot and
+ drop a packet from it */
+
+ if (d > 1) {
+ esfq_index x = q->dep[d+q->depth].next;
+ skb = q->qs[x].prev;
+ len = skb->len;
+ __skb_unlink(skb, &q->qs[x]);
+ kfree_skb(skb);
+ esfq_dec(q, x);
+ sch->q.qlen--;
+ sch->stats.drops++;
+ return len;
+ }
+
+ if (d == 1) {
+ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+ d = q->next[q->tail];
+ q->next[q->tail] = q->next[d];
+ q->allot[q->next[d]] += q->quantum;
+ skb = q->qs[d].prev;
+ len = skb->len;
+ __skb_unlink(skb, &q->qs[d]);
+ kfree_skb(skb);
+ esfq_dec(q, d);
+ sch->q.qlen--;
+ q->ht[q->hash[d]] = q->depth;
+ sch->stats.drops++;
+ return len;
+ }
+
+ return 0;
+}
+
+static int
+esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_tail(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit-1) {
+ sch->stats.bytes += skb->len;
+ sch->stats.packets++;
+ return 0;
+ }
+
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+static int
+esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_head(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit - 1) {
+// sch->stats.requeues++;
+ return 0;
+ }
+
+ sch->stats.drops++;
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+esfq_dequeue(struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct sk_buff *skb;
+ unsigned depth = q->depth;
+ esfq_index a, old_a;
+
+ /* No active slots */
+ if (q->tail == depth)
+ return NULL;
+
+ a = old_a = q->next[q->tail];
+
+ /* Grab packet */
+ skb = __skb_dequeue(&q->qs[a]);
+ esfq_dec(q, a);
+ sch->q.qlen--;
+
+ /* Is the slot empty? */
+ if (q->qs[a].qlen == 0) {
+ q->ht[q->hash[a]] = depth;
+ a = q->next[a];
+ if (a == old_a) {
+ q->tail = depth;
+ return skb;
+ }
+ q->next[q->tail] = a;
+ q->allot[a] += q->quantum;
+ } else if ((q->allot[a] -= skb->len) <= 0) {
+ q->tail = a;
+ a = q->next[a];
+ q->allot[a] += q->quantum;
+ }
+
+ return skb;
+}
+
+static void
+esfq_reset(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = esfq_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void esfq_perturbation(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc*)arg;
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+
+ q->perturbation = net_random()&0x1F;
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+}
+
+/*
+static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct tc_esfq_qopt *ctl = RTA_DATA(opt);
+ int old_perturb = q->perturb_period;
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ sch_tree_lock(sch);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+// q->hash_divisor = ctl->divisor;
+// q->tail = q->limit = q->depth = ctl->flows;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
+ q->perturb_period = 0;
+ }
+
+ // is sch_tree_lock enough to do this ?
+ while (sch->q.qlen >= q->limit-1)
+ esfq_drop(sch);
+
+ if (old_perturb)
+ del_timer(&q->perturb_timer);
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ } else {
+ q->perturbation = 0;
+ }
+ sch_tree_unlock(sch);
+ return 0;
+}
+*/
+
+static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct tc_esfq_qopt *ctl;
+ esfq_index p = ~0UL/2;
+ int i;
+
+ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ init_timer(&q->perturb_timer);
+ q->perturb_timer.data = (unsigned long)sch;
+ q->perturb_timer.function = esfq_perturbation;
+ q->perturbation = 0;
+ q->hash_kind = TCA_SFQ_HASH_CLASSIC;
+ q->max_depth = 0;
+ q->dyn_min = ~0U; /* maximum value for this type */
+ q->dyn_max = 0; /* dyn_min/dyn_max will be set properly upon first packet */
+ if (opt == NULL) {
+ q->quantum = psched_mtu(sch->dev);
+ q->perturb_period = 0;
+ q->hash_divisor = 1024;
+ q->tail = q->limit = q->depth = 128;
+
+ } else {
+ ctl = RTA_DATA(opt);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+ q->hash_divisor = ctl->divisor ? : 1024;
+ q->tail = q->limit = q->depth = ctl->flows ? : 128;
+
+ if ( q->depth > p - 1 )
+ return -EINVAL;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ }
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+ }
+
+ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->ht)
+ goto err_case;
+
+ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
+ if (!q->dep)
+ goto err_case;
+ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->next)
+ goto err_case;
+
+ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
+ if (!q->allot)
+ goto err_case;
+ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
+ if (!q->hash)
+ goto err_case;
+ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
+ if (!q->qs)
+ goto err_case;
+
+ for (i=0; i< q->hash_divisor; i++)
+ q->ht[i] = q->depth;
+ for (i=0; i<q->depth; i++) {
+ skb_queue_head_init(&q->qs[i]);
+ q->dep[i+q->depth].next = i+q->depth;
+ q->dep[i+q->depth].prev = i+q->depth;
+ }
+
+ for (i=0; i<q->depth; i++)
+ esfq_link(q, i);
+ return 0;
+err_case:
+ del_timer(&q->perturb_timer);
+ if (q->ht)
+ kfree(q->ht);
+ if (q->dep)
+ kfree(q->dep);
+ if (q->next)
+ kfree(q->next);
+ if (q->allot)
+ kfree(q->allot);
+ if (q->hash)
+ kfree(q->hash);
+ if (q->qs)
+ kfree(q->qs);
+ return -ENOBUFS;
+}
+
+static void esfq_destroy(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ del_timer(&q->perturb_timer);
+ if(q->ht)
+ kfree(q->ht);
+ if(q->dep)
+ kfree(q->dep);
+ if(q->next)
+ kfree(q->next);
+ if(q->allot)
+ kfree(q->allot);
+ if(q->hash)
+ kfree(q->hash);
+ if(q->qs)
+ kfree(q->qs);
+}
+
+static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned char *b = skb->tail;
+ struct tc_esfq_qopt opt;
+
+ opt.quantum = q->quantum;
+ opt.perturb_period = q->perturb_period/HZ;
+
+ opt.limit = q->limit;
+ opt.divisor = q->hash_divisor;
+ opt.flows = q->depth;
+ opt.hash_kind = q->hash_kind;
+
+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static struct Qdisc_ops esfq_qdisc_ops =
+{
+ .next = NULL,
+ .cl_ops = NULL,
+ .id = "esfq",
+ .priv_size = sizeof(struct esfq_sched_data),
+ .enqueue = esfq_enqueue,
+ .dequeue = esfq_dequeue,
+ .requeue = esfq_requeue,
+ .drop = esfq_drop,
+ .init = esfq_init,
+ .reset = esfq_reset,
+ .destroy = esfq_destroy,
+ .change = NULL, /* esfq_change - needs more work */
+ .dump = esfq_dump,
+// .owner = THIS_MODULE,
+};
+
+static int __init esfq_module_init(void)
+{
+ return register_qdisc(&esfq_qdisc_ops);
+}
+static void __exit esfq_module_exit(void)
+{
+ unregister_qdisc(&esfq_qdisc_ops);
+}
+module_init(esfq_module_init)
+module_exit(esfq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/sched/sch_fifo.c b/release/src/linux/linux/net/sched/sch_fifo.c
index d8ce46f2..3a7741e9 100644
--- a/release/src/linux/linux/net/sched/sch_fifo.c
+++ b/release/src/linux/linux/net/sched/sch_fifo.c
@@ -46,7 +46,7 @@ bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data;
- if (sch->stats.backlog <= q->limit) {
+ if (sch->stats.backlog + skb->len <= q->limit) {
__skb_queue_tail(&sch->q, skb);
sch->stats.backlog += skb->len;
sch->stats.bytes += skb->len;
@@ -87,9 +87,10 @@ fifo_drop(struct Qdisc* sch)
skb = __skb_dequeue_tail(&sch->q);
if (skb) {
- sch->stats.backlog -= skb->len;
+ int len = skb->len;
+ sch->stats.backlog -= len;
kfree_skb(skb);
- return 1;
+ return len;
}
return 0;
}
@@ -106,7 +107,7 @@ pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data;
- if (sch->q.qlen <= q->limit) {
+ if (sch->q.qlen < q->limit) {
__skb_queue_tail(&sch->q, skb);
sch->stats.bytes += skb->len;
sch->stats.packets++;
@@ -139,10 +140,12 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
struct fifo_sched_data *q = (void*)sch->data;
if (opt == NULL) {
+ unsigned int limit = sch->dev->tx_queue_len ? : 1;
+
if (sch->ops == &bfifo_qdisc_ops)
- q->limit = sch->dev->tx_queue_len*sch->dev->mtu;
+ q->limit = limit*sch->dev->mtu;
else
- q->limit = sch->dev->tx_queue_len;
+ q->limit = limit;
} else {
struct tc_fifo_qopt *ctl = RTA_DATA(opt);
if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
diff --git a/release/src/linux/linux/net/sched/sch_generic.c b/release/src/linux/linux/net/sched/sch_generic.c
index 7b0d49e7..ca30d124 100644
--- a/release/src/linux/linux/net/sched/sch_generic.c
+++ b/release/src/linux/linux/net/sched/sch_generic.c
@@ -29,6 +29,9 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+#include <linux/imq.h>
+#endif
#include <net/sock.h>
#include <net/pkt_sched.h>
@@ -79,6 +82,10 @@ int qdisc_restart(struct net_device *dev)
struct Qdisc *q = dev->qdisc;
struct sk_buff *skb;
+ /* BRCM: bail out if queue is null */
+ if (!q)
+ return 0;
+
/* Dequeue packet */
if ((skb = q->dequeue(q)) != NULL) {
if (spin_trylock(&dev->xmit_lock)) {
@@ -89,7 +96,11 @@ int qdisc_restart(struct net_device *dev)
spin_unlock(&dev->queue_lock);
if (!netif_queue_stopped(dev)) {
- if (netdev_nit)
+ if (netdev_nit
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+ && !(skb->imq_flags & IMQ_F_ENQUEUE)
+#endif
+ )
dev_queue_xmit_nit(skb, dev);
if (dev->hard_start_xmit(skb, dev) == 0) {
diff --git a/release/src/linux/linux/net/sched/sch_hfsc.c b/release/src/linux/linux/net/sched/sch_hfsc.c
new file mode 100644
index 00000000..0b6e6d38
--- /dev/null
+++ b/release/src/linux/linux/net/sched/sch_hfsc.c
@@ -0,0 +1,1817 @@
+/*
+ * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * 2003-10-17 - Ported from altq
+ */
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve. the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+
+#define HFSC_DEBUG 0
+
+/*
+ * kernel internal service curve representation:
+ * coordinates are given by 64 bit unsigned integers.
+ * x-axis: unit is clock count.
+ * y-axis: unit is byte.
+ *
+ * The service curve parameters are converted to the internal
+ * representation. The slope values are scaled to avoid overflow.
+ * the inverse slope values as well as the y-projection of the 1st
+ * segment are kept in order to to avoid 64-bit divide operations
+ * that are expensive on 32-bit architectures.
+ */
+
+struct internal_sc
+{
+ u64 sm1; /* scaled slope of the 1st segment */
+ u64 ism1; /* scaled inverse-slope of the 1st segment */
+ u64 dx; /* the x-projection of the 1st segment */
+ u64 dy; /* the y-projection of the 1st segment */
+ u64 sm2; /* scaled slope of the 2nd segment */
+ u64 ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc
+{
+ u64 x; /* current starting position on x-axis */
+ u64 y; /* current starting position on y-axis */
+ u64 sm1; /* scaled slope of the 1st segment */
+ u64 ism1; /* scaled inverse-slope of the 1st segment */
+ u64 dx; /* the x-projection of the 1st segment */
+ u64 dy; /* the y-projection of the 1st segment */
+ u64 sm2; /* scaled slope of the 2nd segment */
+ u64 ism2; /* scaled inverse-slope of the 2nd segment */
+};
+
+enum hfsc_class_flags
+{
+ HFSC_RSC = 0x1,
+ HFSC_FSC = 0x2,
+ HFSC_USC = 0x4
+};
+
+struct hfsc_class
+{
+ u32 classid; /* class id */
+ unsigned int refcnt; /* usage count */
+
+ struct tc_stats stats; /* generic statistics */
+ unsigned int level; /* class level in hierarchy */
+ struct tcf_proto *filter_list; /* filter list */
+ unsigned int filter_cnt; /* filter count */
+
+ struct hfsc_sched *sched; /* scheduler data */
+ struct hfsc_class *cl_parent; /* parent class */
+ struct list_head siblings; /* sibling classes */
+ struct list_head children; /* child classes */
+ struct Qdisc *qdisc; /* leaf qdisc */
+
+ rb_node_t el_node; /* qdisc's eligible tree member */
+ rb_root_t vt_tree; /* active children sorted by cl_vt */
+ rb_node_t vt_node; /* parent's vt_tree member */
+ rb_root_t cf_tree; /* active children sorted by cl_f */
+ rb_node_t cf_node; /* parent's cf_heap member */
+ struct list_head hlist; /* hash list member */
+ struct list_head dlist; /* drop list member */
+
+ u64 cl_total; /* total work in bytes */
+ u64 cl_cumul; /* cumulative work in bytes done by
+ real-time criteria */
+
+ u64 cl_d; /* deadline*/
+ u64 cl_e; /* eligible time */
+ u64 cl_vt; /* virtual time */
+ u64 cl_f; /* time when this class will fit for
+ link-sharing, max(myf, cfmin) */
+ u64 cl_myf; /* my fit-time (calculated from this
+ class's own upperlimit curve) */
+ u64 cl_myfadj; /* my fit-time adjustment (to cancel
+ history dependence) */
+ u64 cl_cfmin; /* earliest children's fit-time (used
+ with cl_myf to obtain cl_f) */
+ u64 cl_cvtmin; /* minimal virtual time among the
+ children fit for link-sharing
+ (monotonic within a period) */
+ u64 cl_vtadj; /* intra-period cumulative vt
+ adjustment */
+ u64 cl_vtoff; /* inter-period cumulative vt offset */
+ u64 cl_cvtmax; /* max child's vt in the last period */
+ u64 cl_cvtoff; /* cumulative cvtmax of all periods */
+ u64 cl_pcvtoff; /* parent's cvtoff at initalization
+ time */
+
+ struct internal_sc cl_rsc; /* internal real-time service curve */
+ struct internal_sc cl_fsc; /* internal fair service curve */
+ struct internal_sc cl_usc; /* internal upperlimit service curve */
+ struct runtime_sc cl_deadline; /* deadline curve */
+ struct runtime_sc cl_eligible; /* eligible curve */
+ struct runtime_sc cl_virtual; /* virtual curve */
+ struct runtime_sc cl_ulimit; /* upperlimit curve */
+
+ unsigned long cl_flags; /* which curves are valid */
+ unsigned long cl_vtperiod; /* vt period sequence number */
+ unsigned long cl_parentperiod;/* parent's vt period sequence number*/
+ unsigned long cl_nactive; /* number of active children */
+};
+
+#define HFSC_HSIZE 16
+
+struct hfsc_sched
+{
+ u16 defcls; /* default class id */
+ struct hfsc_class root; /* root class */
+ struct list_head clhash[HFSC_HSIZE]; /* class hash */
+ rb_root_t eligible; /* eligible tree */
+ struct list_head droplist; /* active leaf class list (for
+ dropping) */
+ struct sk_buff_head requeue; /* requeued packet */
+ struct timer_list wd_timer; /* watchdog timer */
+};
+
+/*
+ * macros
+ */
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+#include <linux/time.h>
+#undef PSCHED_GET_TIME
+#define PSCHED_GET_TIME(stamp) \
+do { \
+ struct timeval tv; \
+ do_gettimeofday(&tv); \
+ (stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec; \
+} while (0)
+#endif
+
+#if HFSC_DEBUG
+#define ASSERT(cond) \
+do { \
+ if (unlikely(!(cond))) \
+ printk("assertion %s failed at %s:%i (%s)\n", \
+ #cond, __FILE__, __LINE__, __FUNCTION__); \
+} while (0)
+#else
+#define ASSERT(cond)
+#endif /* HFSC_DEBUG */
+
+#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
+
+
+/*
+ * eligible tree holds backlogged classes being sorted by their eligible times.
+ * there is one eligible tree per hfsc instance.
+ */
+
+static void
+eltree_insert(struct hfsc_class *cl)
+{
+ rb_node_t **p = &cl->sched->eligible.rb_node;
+ rb_node_t *parent = NULL;
+ struct hfsc_class *cl1;
+
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, el_node);
+ if (cl->cl_e >= cl1->cl_e)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&cl->el_node, parent, p);
+ rb_insert_color(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_remove(struct hfsc_class *cl)
+{
+ rb_erase(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_update(struct hfsc_class *cl)
+{
+ eltree_remove(cl);
+ eltree_insert(cl);
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_mindl(struct hfsc_sched *q, u64 cur_time)
+{
+ struct hfsc_class *p, *cl = NULL;
+ rb_node_t *n;
+
+ for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) {
+ p = rb_entry(n, struct hfsc_class, el_node);
+ if (p->cl_e > cur_time)
+ break;
+ if (cl == NULL || p->cl_d < cl->cl_d)
+ cl = p;
+ }
+ return cl;
+}
+
+/* find the class with minimum eligible time among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_minel(struct hfsc_sched *q)
+{
+ rb_node_t *n;
+
+ n = rb_first(&q->eligible);
+ if (n == NULL)
+ return NULL;
+ return rb_entry(n, struct hfsc_class, el_node);
+}
+
+/*
+ * vttree holds holds backlogged child classes being sorted by their virtual
+ * time. each intermediate class has one vttree.
+ */
+static void
+vttree_insert(struct hfsc_class *cl)
+{
+ rb_node_t **p = &cl->cl_parent->vt_tree.rb_node;
+ rb_node_t *parent = NULL;
+ struct hfsc_class *cl1;
+
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, vt_node);
+ if (cl->cl_vt >= cl1->cl_vt)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&cl->vt_node, parent, p);
+ rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_remove(struct hfsc_class *cl)
+{
+ rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_update(struct hfsc_class *cl)
+{
+ vttree_remove(cl);
+ vttree_insert(cl);
+}
+
+static inline struct hfsc_class *
+vttree_firstfit(struct hfsc_class *cl, u64 cur_time)
+{
+ struct hfsc_class *p;
+ rb_node_t *n;
+
+ for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) {
+ p = rb_entry(n, struct hfsc_class, vt_node);
+ if (p->cl_f <= cur_time)
+ return p;
+ }
+ return NULL;
+}
+
+/*
+ * get the leaf class with the minimum vt in the hierarchy
+ */
+static struct hfsc_class *
+vttree_get_minvt(struct hfsc_class *cl, u64 cur_time)
+{
+ /* if root-class's cfmin is bigger than cur_time nothing to do */
+ if (cl->cl_cfmin > cur_time)
+ return NULL;
+
+ while (cl->level > 0) {
+ cl = vttree_firstfit(cl, cur_time);
+ if (cl == NULL)
+ return NULL;
+ /*
+ * update parent's cl_cvtmin.
+ */
+ if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+ cl->cl_parent->cl_cvtmin = cl->cl_vt;
+ }
+ return cl;
+}
+
+static void
+cftree_insert(struct hfsc_class *cl)
+{
+ rb_node_t **p = &cl->cl_parent->cf_tree.rb_node;
+ rb_node_t *parent = NULL;
+ struct hfsc_class *cl1;
+
+ while (*p != NULL) {
+ parent = *p;
+ cl1 = rb_entry(parent, struct hfsc_class, cf_node);
+ if (cl->cl_f >= cl1->cl_f)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&cl->cf_node, parent, p);
+ rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_remove(struct hfsc_class *cl)
+{
+ rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_update(struct hfsc_class *cl)
+{
+ cftree_remove(cl);
+ cftree_insert(cl);
+}
+
+/*
+ * service curve support functions
+ *
+ * external service curve parameters
+ * m: bps
+ * d: us
+ * internal service curve parameters
+ * sm: (bytes/psched_us) << SM_SHIFT
+ * ism: (psched_us/byte) << ISM_SHIFT
+ * dx: psched_us
+ *
+ * Time source resolution
+ * PSCHED_JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
+ * PSCHED_CPU: resolution is between 0.5us and 1us.
+ * PSCHED_GETTIMEOFDAY: resolution is exactly 1us.
+ *
+ * sm and ism are scaled in order to keep effective digits.
+ * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
+ * digits in decimal using the following table.
+ *
+ * Note: We can afford the additional accuracy (altq hfsc keeps at most
+ * 3 effective digits) thanks to the fact that linux clock is bounded
+ * much more tightly.
+ *
+ * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
+ * ------------+-------------------------------------------------------
+ * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3
+ * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3
+ * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3
+ *
+ * 0.5us/byte 160 16 1.6 0.16 0.016
+ * us/byte 80 8 0.8 0.08 0.008
+ * 1.27us/byte 63 6.3 0.63 0.063 0.0063
+ */
+#define SM_SHIFT 20
+#define ISM_SHIFT 18
+
+#define SM_MASK ((1ULL << SM_SHIFT) - 1)
+#define ISM_MASK ((1ULL << ISM_SHIFT) - 1)
+
+static inline u64
+seg_x2y(u64 x, u64 sm)
+{
+ u64 y;
+
+ /*
+ * compute
+ * y = x * sm >> SM_SHIFT
+ * but divide it for the upper and lower bits to avoid overflow
+ */
+ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+ return y;
+}
+
+static inline u64
+seg_y2x(u64 y, u64 ism)
+{
+ u64 x;
+
+ if (y == 0)
+ x = 0;
+ else if (ism == HT_INFINITY)
+ x = HT_INFINITY;
+ else {
+ x = (y >> ISM_SHIFT) * ism
+ + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+ }
+ return x;
+}
+
+/* Convert m (bps) into sm (bytes/psched us) */
+static u64
+m2sm(u32 m)
+{
+ u64 sm;
+
+ sm = ((u64)m << SM_SHIFT);
+ sm += PSCHED_JIFFIE2US(HZ) - 1;
+ do_div(sm, PSCHED_JIFFIE2US(HZ));
+ return sm;
+}
+
+/* convert m (bps) into ism (psched us/byte) */
+static u64
+m2ism(u32 m)
+{
+ u64 ism;
+
+ if (m == 0)
+ ism = HT_INFINITY;
+ else {
+ ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+ ism += m - 1;
+ do_div(ism, m);
+ }
+ return ism;
+}
+
+/* convert d (us) into dx (psched us) */
+static u64
+d2dx(u32 d)
+{
+ u64 dx;
+
+ dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+ dx += 1000000 - 1;
+ do_div(dx, 1000000);
+ return dx;
+}
+
+/* convert sm (bytes/psched us) into m (bps) */
+static u32
+sm2m(u64 sm)
+{
+ u64 m;
+
+ m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+ return (u32)m;
+}
+
+/* convert dx (psched us) into d (us) */
+static u32
+dx2d(u64 dx)
+{
+ u64 d;
+
+ d = dx * 1000000;
+ do_div(d, PSCHED_JIFFIE2US(HZ));
+ return (u32)d;
+}
+
+static void
+sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
+{
+ isc->sm1 = m2sm(sc->m1);
+ isc->ism1 = m2ism(sc->m1);
+ isc->dx = d2dx(sc->d);
+ isc->dy = seg_x2y(isc->dx, isc->sm1);
+ isc->sm2 = m2sm(sc->m2);
+ isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->sm1 = isc->sm1;
+ rtsc->ism1 = isc->ism1;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ rtsc->sm2 = isc->sm2;
+ rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u64
+rtsc_y2x(struct runtime_sc *rtsc, u64 y)
+{
+ u64 x;
+
+ if (y < rtsc->y)
+ x = rtsc->x;
+ else if (y <= rtsc->y + rtsc->dy) {
+ /* x belongs to the 1st segment */
+ if (rtsc->dy == 0)
+ x = rtsc->x + rtsc->dx;
+ else
+ x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+ } else {
+ /* x belongs to the 2nd segment */
+ x = rtsc->x + rtsc->dx
+ + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+ }
+ return x;
+}
+
+static u64
+rtsc_x2y(struct runtime_sc *rtsc, u64 x)
+{
+ u64 y;
+
+ if (x <= rtsc->x)
+ y = rtsc->y;
+ else if (x <= rtsc->x + rtsc->dx)
+ /* y belongs to the 1st segment */
+ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+ else
+ /* y belongs to the 2nd segment */
+ y = rtsc->y + rtsc->dy
+ + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+ return y;
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+ u64 y1, y2, dx, dy;
+ u32 dsm;
+
+ if (isc->sm1 <= isc->sm2) {
+ /* service curve is convex */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 < y)
+ /* the current rtsc is smaller */
+ return;
+ rtsc->x = x;
+ rtsc->y = y;
+ return;
+ }
+
+ /*
+ * service curve is concave
+ * compute the two y values of the current rtsc
+ * y1: at x
+ * y2: at (x + dx)
+ */
+ y1 = rtsc_x2y(rtsc, x);
+ if (y1 <= y) {
+ /* rtsc is below isc, no change to rtsc */
+ return;
+ }
+
+ y2 = rtsc_x2y(rtsc, x + isc->dx);
+ if (y2 >= y + isc->dy) {
+ /* rtsc is above isc, replace rtsc by isc */
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = isc->dx;
+ rtsc->dy = isc->dy;
+ return;
+ }
+
+ /*
+ * the two curves intersect
+ * compute the offsets (dx, dy) using the reverse
+ * function of seg_x2y()
+ * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+ */
+ dx = (y1 - y) << SM_SHIFT;
+ dsm = isc->sm1 - isc->sm2;
+ do_div(dx, dsm);
+ /*
+ * check if (x, y1) belongs to the 1st segment of rtsc.
+ * if so, add the offset.
+ */
+ if (rtsc->x + rtsc->dx > x)
+ dx += rtsc->x + rtsc->dx - x;
+ dy = seg_x2y(dx, isc->sm1);
+
+ rtsc->x = x;
+ rtsc->y = y;
+ rtsc->dx = dx;
+ rtsc->dy = dy;
+ return;
+}
+
+static void
+init_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+ u64 cur_time;
+
+ PSCHED_GET_TIME(cur_time);
+
+ /* update the deadline curve */
+ rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+
+ /*
+ * update the eligible curve.
+ * for concave, it is equal to the deadline curve.
+ * for convex, it is a linear curve with slope m2.
+ */
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+
+ /* compute e and d */
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ eltree_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+ eltree_update(cl);
+}
+
+static inline void
+update_d(struct hfsc_class *cl, unsigned int next_len)
+{
+ cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static inline void
+update_cfmin(struct hfsc_class *cl)
+{
+ rb_node_t *n = rb_first(&cl->cf_tree);
+ struct hfsc_class *p;
+
+ if (n == NULL) {
+ cl->cl_cfmin = 0;
+ return;
+ }
+ p = rb_entry(n, struct hfsc_class, cf_node);
+ cl->cl_cfmin = p->cl_f;
+}
+
+static void
+init_vf(struct hfsc_class *cl, unsigned int len)
+{
+ struct hfsc_class *max_cl;
+ rb_node_t *n;
+ u64 vt, f, cur_time;
+ int go_active;
+
+ cur_time = 0;
+ go_active = 1;
+ for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+ if (go_active && cl->cl_nactive++ == 0)
+ go_active = 1;
+ else
+ go_active = 0;
+
+ if (go_active) {
+ n = rb_last(&cl->cl_parent->vt_tree);
+ if (n != NULL) {
+ max_cl = rb_entry(n, struct hfsc_class,vt_node);
+ /*
+ * set vt to the average of the min and max
+ * classes. if the parent's period didn't
+ * change, don't decrease vt of the class.
+ */
+ vt = max_cl->cl_vt;
+ if (cl->cl_parent->cl_cvtmin != 0)
+ vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+ if (cl->cl_parent->cl_vtperiod !=
+ cl->cl_parentperiod || vt > cl->cl_vt)
+ cl->cl_vt = vt;
+ } else {
+ /*
+ * first child for a new parent backlog period.
+ * add parent's cvtmax to cvtoff to make a new
+ * vt (vtoff + vt) larger than the vt in the
+ * last period for all children.
+ */
+ vt = cl->cl_parent->cl_cvtmax;
+ cl->cl_parent->cl_cvtoff += vt;
+ cl->cl_parent->cl_cvtmax = 0;
+ cl->cl_parent->cl_cvtmin = 0;
+ cl->cl_vt = 0;
+ }
+
+ cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
+ cl->cl_pcvtoff;
+
+ /* update the virtual curve */
+ vt = cl->cl_vt + cl->cl_vtoff;
+ rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
+ cl->cl_total);
+ if (cl->cl_virtual.x == vt) {
+ cl->cl_virtual.x -= cl->cl_vtoff;
+ cl->cl_vtoff = 0;
+ }
+ cl->cl_vtadj = 0;
+
+ cl->cl_vtperiod++; /* increment vt period */
+ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+ if (cl->cl_parent->cl_nactive == 0)
+ cl->cl_parentperiod++;
+ cl->cl_f = 0;
+
+ vttree_insert(cl);
+ cftree_insert(cl);
+
+ if (cl->cl_flags & HFSC_USC) {
+ /* class has upper limit curve */
+ if (cur_time == 0)
+ PSCHED_GET_TIME(cur_time);
+
+ /* update the ulimit curve */
+ rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
+ cl->cl_total);
+ /* compute myf */
+ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+ cl->cl_total);
+ cl->cl_myfadj = 0;
+ }
+ }
+
+ f = max(cl->cl_myf, cl->cl_cfmin);
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ cftree_update(cl);
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
+{
+ u64 f; /* , myf_bound, delta; */
+ int go_passive = 0;
+
+ if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC)
+ go_passive = 1;
+
+ for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+ cl->cl_total += len;
+
+ if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0)
+ continue;
+
+ if (go_passive && --cl->cl_nactive == 0)
+ go_passive = 1;
+ else
+ go_passive = 0;
+
+ if (go_passive) {
+ /* no more active child, going passive */
+
+ /* update cvtmax of the parent class */
+ if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+ cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+ /* remove this class from the vt tree */
+ vttree_remove(cl);
+
+ cftree_remove(cl);
+ update_cfmin(cl->cl_parent);
+
+ continue;
+ }
+
+ /*
+ * update vt and f
+ */
+ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+ - cl->cl_vtoff + cl->cl_vtadj;
+
+ /*
+ * if vt of the class is smaller than cvtmin,
+ * the class was skipped in the past due to non-fit.
+ * if so, we need to adjust vtadj.
+ */
+ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+ cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+ cl->cl_vt = cl->cl_parent->cl_cvtmin;
+ }
+
+ /* update the vt tree */
+ vttree_update(cl);
+
+ if (cl->cl_flags & HFSC_USC) {
+ cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
+ cl->cl_total);
+#if 0
+ /*
+ * This code causes classes to stay way under their
+ * limit when multiple classes are used at gigabit
+ * speed. needs investigation. -kaber
+ */
+ /*
+ * if myf lags behind by more than one clock tick
+ * from the current time, adjust myfadj to prevent
+ * a rate-limited class from going greedy.
+ * in a steady state under rate-limiting, myf
+ * fluctuates within one clock tick.
+ */
+ myf_bound = cur_time - PSCHED_JIFFIE2US(1);
+ if (cl->cl_myf < myf_bound) {
+ delta = cur_time - cl->cl_myf;
+ cl->cl_myfadj += delta;
+ cl->cl_myf += delta;
+ }
+#endif
+ }
+
+ f = max(cl->cl_myf, cl->cl_cfmin);
+ if (f != cl->cl_f) {
+ cl->cl_f = f;
+ cftree_update(cl);
+ update_cfmin(cl->cl_parent);
+ }
+ }
+}
+
+static void
+set_active(struct hfsc_class *cl, unsigned int len)
+{
+ if (cl->cl_flags & HFSC_RSC)
+ init_ed(cl, len);
+ if (cl->cl_flags & HFSC_FSC)
+ init_vf(cl, len);
+
+ list_add_tail(&cl->dlist, &cl->sched->droplist);
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+ if (cl->cl_flags & HFSC_RSC)
+ eltree_remove(cl);
+
+ list_del(&cl->dlist);
+
+ /*
+ * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+ * needs to be called explicitly to remove a class from vttree.
+ */
+}
+
+/*
+ * hack to get length of first packet in queue.
+ */
+static unsigned int
+qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+ unsigned int len;
+
+ skb = sch->dequeue(sch);
+ if (skb == NULL) {
+ if (net_ratelimit())
+ printk("qdisc_peek_len: non work-conserving qdisc ?\n");
+ return 0;
+ }
+ len = skb->len;
+ if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
+ if (net_ratelimit())
+ printk("qdisc_peek_len: failed to requeue\n");
+ return 0;
+ }
+ return len;
+}
+
+static void
+hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
+{
+ unsigned int len = cl->qdisc->q.qlen;
+
+ qdisc_reset(cl->qdisc);
+ if (len > 0) {
+ update_vf(cl, 0, 0);
+ set_passive(cl);
+ sch->q.qlen -= len;
+ }
+}
+
+static void
+hfsc_adjust_levels(struct hfsc_class *cl)
+{
+ struct hfsc_class *p;
+ unsigned int level;
+
+ do {
+ level = 0;
+ list_for_each_entry(p, &cl->children, siblings) {
+ if (p->level > level)
+ level = p->level;
+ }
+ cl->level = level + 1;
+ } while ((cl = cl->cl_parent) != NULL);
+}
+
+static inline unsigned int
+hfsc_hash(u32 h)
+{
+ h ^= h >> 8;
+ h ^= h >> 4;
+
+ return h & (HFSC_HSIZE - 1);
+}
+
+static inline struct hfsc_class *
+hfsc_find_class(u32 classid, struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+
+ list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
+ if (cl->classid == classid)
+ return cl;
+ }
+ return NULL;
+}
+
+static void
+hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc,
+ u64 cur_time)
+{
+ sc2isc(rsc, &cl->cl_rsc);
+ rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+ cl->cl_eligible = cl->cl_deadline;
+ if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+ cl->cl_eligible.dx = 0;
+ cl->cl_eligible.dy = 0;
+ }
+ cl->cl_flags |= HFSC_RSC;
+}
+
+static void
+hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
+{
+ sc2isc(fsc, &cl->cl_fsc);
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+ cl->cl_flags |= HFSC_FSC;
+}
+
+static void
+hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
+ u64 cur_time)
+{
+ sc2isc(usc, &cl->cl_usc);
+ rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total);
+ cl->cl_flags |= HFSC_USC;
+}
+
+static int
+hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ struct rtattr **tca, unsigned long *arg)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl = (struct hfsc_class *)*arg;
+ struct hfsc_class *parent = NULL;
+ struct rtattr *opt = tca[TCA_OPTIONS-1];
+ struct rtattr *tb[TCA_HFSC_MAX];
+ struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
+ u64 cur_time;
+
+ if (opt == NULL ||
+ rtattr_parse(tb, TCA_HFSC_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
+ return -EINVAL;
+
+ if (tb[TCA_HFSC_RSC-1]) {
+ if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc))
+ return -EINVAL;
+ rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]);
+ if (rsc->m1 == 0 && rsc->m2 == 0)
+ rsc = NULL;
+ }
+
+ if (tb[TCA_HFSC_FSC-1]) {
+ if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc))
+ return -EINVAL;
+ fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
+ if (fsc->m1 == 0 && fsc->m2 == 0)
+ fsc = NULL;
+ }
+
+ if (tb[TCA_HFSC_USC-1]) {
+ if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc))
+ return -EINVAL;
+ usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
+ if (usc->m1 == 0 && usc->m2 == 0)
+ usc = NULL;
+ }
+
+ if (cl != NULL) {
+ if (parentid) {
+ if (cl->cl_parent && cl->cl_parent->classid != parentid)
+ return -EINVAL;
+ if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
+ return -EINVAL;
+ }
+ PSCHED_GET_TIME(cur_time);
+
+ sch_tree_lock(sch);
+ if (rsc != NULL)
+ hfsc_change_rsc(cl, rsc, cur_time);
+ if (fsc != NULL)
+ hfsc_change_fsc(cl, fsc);
+ if (usc != NULL)
+ hfsc_change_usc(cl, usc, cur_time);
+
+ if (cl->qdisc->q.qlen != 0) {
+ if (cl->cl_flags & HFSC_RSC)
+ update_ed(cl, qdisc_peek_len(cl->qdisc));
+ if (cl->cl_flags & HFSC_FSC)
+ update_vf(cl, 0, cur_time);
+ }
+ sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tca[TCA_RATE-1]) {
+ qdisc_kill_estimator(&cl->stats);
+ qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
+ }
+#endif
+ return 0;
+ }
+
+ if (parentid == TC_H_ROOT)
+ return -EEXIST;
+
+ parent = &q->root;
+ if (parentid) {
+ parent = hfsc_find_class(parentid, sch);
+ if (parent == NULL)
+ return -ENOENT;
+ }
+
+ if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+ return -EINVAL;
+ if (hfsc_find_class(classid, sch))
+ return -EEXIST;
+
+ if (rsc == NULL && fsc == NULL)
+ return -EINVAL;
+
+ cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+ if (cl == NULL)
+ return -ENOBUFS;
+ memset(cl, 0, sizeof(struct hfsc_class));
+
+ if (rsc != NULL)
+ hfsc_change_rsc(cl, rsc, 0);
+ if (fsc != NULL)
+ hfsc_change_fsc(cl, fsc);
+ if (usc != NULL)
+ hfsc_change_usc(cl, usc, 0);
+
+ cl->refcnt = 1;
+ cl->classid = classid;
+ cl->sched = q;
+ cl->cl_parent = parent;
+ cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (cl->qdisc == NULL)
+ cl->qdisc = &noop_qdisc;
+ cl->stats.lock = &sch->dev->queue_lock;
+ INIT_LIST_HEAD(&cl->children);
+ cl->vt_tree = RB_ROOT;
+ cl->cf_tree = RB_ROOT;
+
+ sch_tree_lock(sch);
+ list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
+ list_add_tail(&cl->siblings, &parent->children);
+ if (parent->level == 0)
+ hfsc_purge_queue(sch, parent);
+ hfsc_adjust_levels(parent);
+ cl->cl_pcvtoff = parent->cl_cvtoff;
+ sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tca[TCA_RATE-1])
+ qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
+#endif
+ *arg = (unsigned long)cl;
+ return 0;
+}
+
+static void
+hfsc_destroy_filters(struct tcf_proto **fl)
+{
+ struct tcf_proto *tp;
+
+ while ((tp = *fl) != NULL) {
+ *fl = tp->next;
+ tcf_destroy(tp);
+ }
+}
+
+static void
+hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+
+ hfsc_destroy_filters(&cl->filter_list);
+ qdisc_destroy(cl->qdisc);
+#ifdef CONFIG_NET_ESTIMATOR
+ qdisc_kill_estimator(&cl->stats);
+#endif
+ if (cl != &q->root)
+ kfree(cl);
+}
+
+static int
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+ return -EBUSY;
+
+ sch_tree_lock(sch);
+
+ list_del(&cl->hlist);
+ list_del(&cl->siblings);
+ hfsc_adjust_levels(cl->cl_parent);
+ hfsc_purge_queue(sch, cl);
+ if (--cl->refcnt == 0)
+ hfsc_destroy_class(sch, cl);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static struct hfsc_class *
+hfsc_classify(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ struct tcf_result res;
+ struct tcf_proto *tcf;
+ int result;
+
+ if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 &&
+ (cl = hfsc_find_class(skb->priority, sch)) != NULL)
+ if (cl->level == 0)
+ return cl;
+
+ tcf = q->root.filter_list;
+ while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_POLICE
+ if (result == TC_POLICE_SHOT)
+ return NULL;
+#endif
+ if ((cl = (struct hfsc_class *)res.class) == NULL) {
+ if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+ break; /* filter selected invalid classid */
+ }
+
+ if (cl->level == 0)
+ return cl; /* hit leaf class */
+
+ /* apply inner filter chain */
+ tcf = cl->filter_list;
+ }
+
+ /* classification failed, try default class */
+ cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+ if (cl == NULL || cl->level > 0)
+ return NULL;
+
+ return cl;
+}
+
+static int
+hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+ struct Qdisc **old)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (cl == NULL)
+ return -ENOENT;
+ if (cl->level > 0)
+ return -EINVAL;
+ if (new == NULL) {
+ new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (new == NULL)
+ new = &noop_qdisc;
+ }
+
+ sch_tree_lock(sch);
+ hfsc_purge_queue(sch, cl);
+ *old = xchg(&cl->qdisc, new);
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static struct Qdisc *
+hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (cl != NULL && cl->level == 0)
+ return cl->qdisc;
+
+ return NULL;
+}
+
+static unsigned long
+hfsc_get_class(struct Qdisc *sch, u32 classid)
+{
+ struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+ if (cl != NULL)
+ cl->refcnt++;
+
+ return (unsigned long)cl;
+}
+
+static void
+hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (--cl->refcnt == 0)
+ hfsc_destroy_class(sch, cl);
+}
+
+static unsigned long
+hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+ struct hfsc_class *p = (struct hfsc_class *)parent;
+ struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+ if (cl != NULL) {
+ if (p != NULL && p->level <= cl->level)
+ return 0;
+ cl->filter_cnt++;
+ }
+
+ return (unsigned long)cl;
+}
+
+static void
+hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ cl->filter_cnt--;
+}
+
+static struct tcf_proto **
+hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+ if (cl == NULL)
+ cl = &q->root;
+
+ return &cl->filter_list;
+}
+
+static int
+hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
+{
+ struct tc_service_curve tsc;
+
+ tsc.m1 = sm2m(sc->sm1);
+ tsc.d = dx2d(sc->dx);
+ tsc.m2 = sm2m(sc->sm2);
+ RTA_PUT(skb, attr, sizeof(tsc), &tsc);
+
+ return skb->len;
+
+ rtattr_failure:
+ return -1;
+}
+
+static inline int
+hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
+{
+ if ((cl->cl_flags & HFSC_RSC) &&
+ (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
+ goto rtattr_failure;
+
+ if ((cl->cl_flags & HFSC_FSC) &&
+ (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
+ goto rtattr_failure;
+
+ if ((cl->cl_flags & HFSC_USC) &&
+ (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
+ goto rtattr_failure;
+
+ return skb->len;
+
+ rtattr_failure:
+ return -1;
+}
+
+static inline int
+hfsc_dump_stats(struct sk_buff *skb, struct hfsc_class *cl)
+{
+ cl->stats.qlen = cl->qdisc->q.qlen;
+ if (qdisc_copy_stats(skb, &cl->stats) < 0)
+ goto rtattr_failure;
+
+ return skb->len;
+
+ rtattr_failure:
+ return -1;
+}
+
+static inline int
+hfsc_dump_xstats(struct sk_buff *skb, struct hfsc_class *cl)
+{
+ struct tc_hfsc_stats xstats;
+
+ xstats.level = cl->level;
+ xstats.period = cl->cl_vtperiod;
+ xstats.work = cl->cl_total;
+ xstats.rtwork = cl->cl_cumul;
+ RTA_PUT(skb, TCA_XSTATS, sizeof(xstats), &xstats);
+
+ return skb->len;
+
+ rtattr_failure:
+ return -1;
+}
+
+static int
+hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
+ struct tcmsg *tcm)
+{
+ struct hfsc_class *cl = (struct hfsc_class *)arg;
+ unsigned char *b = skb->tail;
+ struct rtattr *rta = (struct rtattr *)b;
+
+ tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
+ tcm->tcm_handle = cl->classid;
+ if (cl->level == 0)
+ tcm->tcm_info = cl->qdisc->handle;
+
+ RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+ if (hfsc_dump_curves(skb, cl) < 0)
+ goto rtattr_failure;
+ rta->rta_len = skb->tail - b;
+
+ if ((hfsc_dump_stats(skb, cl) < 0) ||
+ (hfsc_dump_xstats(skb, cl) < 0))
+ goto rtattr_failure;
+
+ return skb->len;
+
+ rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static void
+hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ unsigned int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < HFSC_HSIZE; i++) {
+ list_for_each_entry(cl, &q->clhash[i], hlist) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+ arg->count++;
+ }
+ }
+}
+
+static void
+hfsc_watchdog(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc *)arg;
+
+ sch->flags &= ~TCQ_F_THROTTLED;
+ netif_schedule(sch->dev);
+}
+
+static void
+hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ u64 next_time = 0;
+ long delay;
+
+ if ((cl = eltree_get_minel(q)) != NULL)
+ next_time = cl->cl_e;
+ if (q->root.cl_cfmin != 0) {
+ if (next_time == 0 || next_time > q->root.cl_cfmin)
+ next_time = q->root.cl_cfmin;
+ }
+ ASSERT(next_time != 0);
+ delay = next_time - cur_time;
+ delay = PSCHED_US2JIFFIE(delay);
+
+ sch->flags |= TCQ_F_THROTTLED;
+ mod_timer(&q->wd_timer, jiffies + delay);
+}
+
+static int
+hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct tc_hfsc_qopt *qopt;
+ unsigned int i;
+
+ if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -EINVAL;
+ qopt = RTA_DATA(opt);
+
+ sch->stats.lock = &sch->dev->queue_lock;
+
+ q->defcls = qopt->defcls;
+ for (i = 0; i < HFSC_HSIZE; i++)
+ INIT_LIST_HEAD(&q->clhash[i]);
+ q->eligible = RB_ROOT;
+ INIT_LIST_HEAD(&q->droplist);
+ skb_queue_head_init(&q->requeue);
+
+ q->root.refcnt = 1;
+ q->root.classid = sch->handle;
+ q->root.sched = q;
+ q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (q->root.qdisc == NULL)
+ q->root.qdisc = &noop_qdisc;
+ q->root.stats.lock = &sch->dev->queue_lock;
+ INIT_LIST_HEAD(&q->root.children);
+ q->root.vt_tree = RB_ROOT;
+ q->root.cf_tree = RB_ROOT;
+
+ list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
+
+ init_timer(&q->wd_timer);
+ q->wd_timer.function = hfsc_watchdog;
+ q->wd_timer.data = (unsigned long)sch;
+
+ MOD_INC_USE_COUNT;
+ return 0;
+}
+
+static int
+hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct tc_hfsc_qopt *qopt;
+
+ if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -EINVAL;;
+ qopt = RTA_DATA(opt);
+
+ sch_tree_lock(sch);
+ q->defcls = qopt->defcls;
+ sch_tree_unlock(sch);
+
+ return 0;
+}
+
+static void
+hfsc_reset_class(struct hfsc_class *cl)
+{
+ cl->cl_total = 0;
+ cl->cl_cumul = 0;
+ cl->cl_d = 0;
+ cl->cl_e = 0;
+ cl->cl_vt = 0;
+ cl->cl_vtadj = 0;
+ cl->cl_vtoff = 0;
+ cl->cl_cvtmin = 0;
+ cl->cl_cvtmax = 0;
+ cl->cl_cvtoff = 0;
+ cl->cl_pcvtoff = 0;
+ cl->cl_vtperiod = 0;
+ cl->cl_parentperiod = 0;
+ cl->cl_f = 0;
+ cl->cl_myf = 0;
+ cl->cl_myfadj = 0;
+ cl->cl_cfmin = 0;
+ cl->cl_nactive = 0;
+
+ cl->vt_tree = RB_ROOT;
+ cl->cf_tree = RB_ROOT;
+ qdisc_reset(cl->qdisc);
+
+ if (cl->cl_flags & HFSC_RSC)
+ rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
+ if (cl->cl_flags & HFSC_FSC)
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
+ if (cl->cl_flags & HFSC_USC)
+ rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
+}
+
+static void
+hfsc_reset_qdisc(struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ unsigned int i;
+
+ for (i = 0; i < HFSC_HSIZE; i++) {
+ list_for_each_entry(cl, &q->clhash[i], hlist)
+ hfsc_reset_class(cl);
+ }
+ __skb_queue_purge(&q->requeue);
+ q->eligible = RB_ROOT;
+ INIT_LIST_HEAD(&q->droplist);
+ del_timer(&q->wd_timer);
+ sch->flags &= ~TCQ_F_THROTTLED;
+ sch->q.qlen = 0;
+}
+
+static void
+hfsc_destroy_qdisc(struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl, *next;
+ unsigned int i;
+
+ for (i = 0; i < HFSC_HSIZE; i++) {
+ list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
+ hfsc_destroy_class(sch, cl);
+ }
+ __skb_queue_purge(&q->requeue);
+ del_timer(&q->wd_timer);
+ MOD_DEC_USE_COUNT;
+}
+
+static int
+hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ unsigned char *b = skb->tail;
+ struct tc_hfsc_qopt qopt;
+
+ qopt.defcls = q->defcls;
+ RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+
+ return skb->len;
+
+ rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hfsc_class *cl = hfsc_classify(skb, sch);
+ unsigned int len = skb->len;
+ int err;
+
+ if (cl == NULL) {
+ kfree_skb(skb);
+ sch->stats.drops++;
+ return NET_XMIT_DROP;
+ }
+
+ err = cl->qdisc->enqueue(skb, cl->qdisc);
+ if (unlikely(err != NET_XMIT_SUCCESS)) {
+ cl->stats.drops++;
+ sch->stats.drops++;
+ return err;
+ }
+
+ if (cl->qdisc->q.qlen == 1)
+ set_active(cl, len);
+
+ cl->stats.packets++;
+ cl->stats.bytes += len;
+ sch->stats.packets++;
+ sch->stats.bytes += len;
+ sch->q.qlen++;
+
+ return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *
+hfsc_dequeue(struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ struct sk_buff *skb;
+ u64 cur_time;
+ unsigned int next_len;
+ int realtime = 0;
+
+ if (sch->q.qlen == 0)
+ return NULL;
+ if ((skb = __skb_dequeue(&q->requeue)))
+ goto out;
+
+ PSCHED_GET_TIME(cur_time);
+
+ /*
+ * if there are eligible classes, use real-time criteria.
+ * find the class with the minimum deadline among
+ * the eligible classes.
+ */
+ if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+ realtime = 1;
+ } else {
+ /*
+ * use link-sharing criteria
+ * get the class with the minimum vt in the hierarchy
+ */
+ cl = vttree_get_minvt(&q->root, cur_time);
+ if (cl == NULL) {
+ sch->stats.overlimits++;
+ hfsc_schedule_watchdog(sch, cur_time);
+ return NULL;
+ }
+ }
+
+ skb = cl->qdisc->dequeue(cl->qdisc);
+ if (skb == NULL) {
+ if (net_ratelimit())
+ printk("HFSC: Non-work-conserving qdisc ?\n");
+ return NULL;
+ }
+
+ update_vf(cl, skb->len, cur_time);
+ if (realtime)
+ cl->cl_cumul += skb->len;
+
+ if (cl->qdisc->q.qlen != 0) {
+ if (cl->cl_flags & HFSC_RSC) {
+ /* update ed */
+ next_len = qdisc_peek_len(cl->qdisc);
+ if (realtime)
+ update_ed(cl, next_len);
+ else
+ update_d(cl, next_len);
+ }
+ } else {
+ /* the class becomes passive */
+ set_passive(cl);
+ }
+
+ out:
+ sch->flags &= ~TCQ_F_THROTTLED;
+ sch->q.qlen--;
+
+ return skb;
+}
+
+static int
+hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+
+ __skb_queue_head(&q->requeue, skb);
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+}
+
+static unsigned int
+hfsc_drop(struct Qdisc *sch)
+{
+ struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+ struct hfsc_class *cl;
+ unsigned int len;
+
+ list_for_each_entry(cl, &q->droplist, dlist) {
+ if (cl->qdisc->ops->drop != NULL &&
+ (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
+ if (cl->qdisc->q.qlen == 0) {
+ update_vf(cl, 0, 0);
+ set_passive(cl);
+ } else {
+ list_move_tail(&cl->dlist, &q->droplist);
+ }
+ cl->stats.drops++;
+ sch->stats.drops++;
+ sch->q.qlen--;
+ return len;
+ }
+ }
+ return 0;
+}
+
+static struct Qdisc_class_ops hfsc_class_ops = {
+ .change = hfsc_change_class,
+ .delete = hfsc_delete_class,
+ .graft = hfsc_graft_class,
+ .leaf = hfsc_class_leaf,
+ .get = hfsc_get_class,
+ .put = hfsc_put_class,
+ .bind_tcf = hfsc_bind_tcf,
+ .unbind_tcf = hfsc_unbind_tcf,
+ .tcf_chain = hfsc_tcf_chain,
+ .dump = hfsc_dump_class,
+ .walk = hfsc_walk
+};
+
+struct Qdisc_ops hfsc_qdisc_ops = {
+ .id = "hfsc",
+ .init = hfsc_init_qdisc,
+ .change = hfsc_change_qdisc,
+ .reset = hfsc_reset_qdisc,
+ .destroy = hfsc_destroy_qdisc,
+ .dump = hfsc_dump_qdisc,
+ .enqueue = hfsc_enqueue,
+ .dequeue = hfsc_dequeue,
+ .requeue = hfsc_requeue,
+ .drop = hfsc_drop,
+ .cl_ops = &hfsc_class_ops,
+ .priv_size = sizeof(struct hfsc_sched)
+};
+
+static int __init
+hfsc_init(void)
+{
+ return register_qdisc(&hfsc_qdisc_ops);
+}
+
+static void __exit
+hfsc_cleanup(void)
+{
+ unregister_qdisc(&hfsc_qdisc_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(hfsc_init);
+module_exit(hfsc_cleanup);
diff --git a/release/src/linux/linux/net/sched/sch_htb.c b/release/src/linux/linux/net/sched/sch_htb.c
index 7539e490..944cb555 100644
--- a/release/src/linux/linux/net/sched/sch_htb.c
+++ b/release/src/linux/linux/net/sched/sch_htb.c
@@ -9,6 +9,8 @@
* Authors: Martin Devera, <devik@cdi.cz>
*
* Credits (in time order) for older HTB versions:
+ * Stef Coene <stef.coene@docum.org>
+ * HTB support at LARTC mailing list
* Ondrej Kraus, <krauso@barr.cz>
* found missing INIT_QDISC(htb)
* Vladimir Smelhaus, Aamer Akhter, Bert Hubert
@@ -17,9 +19,13 @@
* code review and helpful comments on shaping
* Tomasz Wrona, <tw@eter.tym.pl>
* created test case so that I was able to fix nasty bug
+ * Wilfried Weissmann
+ * spotted bug in dequeue code and helped with fix
+ * Jiri Fojtasek
+ * fixed requeue routine
* and many others. thanks.
*
- * $Id: sch_htb.c,v 1.1.1.4 2003/10/14 08:09:35 sparq Exp $
+ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
*/
#include <linux/config.h>
#include <linux/module.h>
@@ -66,21 +72,17 @@
#define HTB_HSIZE 16 /* classid hash size */
#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_DEBUG 1 /* compile debugging support (activated by tc tool) */
+//#define HTB_DEBUG 1 /* compile debugging support (activated by tc tool) */
#define HTB_RATECM 1 /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
+#define HTB_HYSTERESIS 0/* whether to use mode hysteresis for speedup */
#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30007 /* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
#endif
-/* temporary debug defines to be removed after beta stage */
-#define DEVIK_MEND(N)
-#define DEVIK_MSTART(N)
-
/* debugging support; S is subsystem, these are defined:
0 - netlink messages
1 - enqueue
@@ -100,13 +102,16 @@
from LSB
*/
#ifdef HTB_DEBUG
-#define HTB_DBG(S,L,FMT,ARG...) if (((q->debug>>(2*S))&3) >= L) \
+#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
+#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
printk(KERN_DEBUG FMT,##ARG)
#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
#define HTB_PASSQ q,
#define HTB_ARGQ struct htb_sched *q,
#define static
+#undef __inline__
#define __inline__
+#undef inline
#define inline
#define HTB_CMAGIC 0xFEFAFEF1
#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
@@ -114,6 +119,7 @@
rb_erase(N,R); \
(N)->rb_color = -1; } while (0)
#else
+#define HTB_DBG_COND(S,L) (0)
#define HTB_DBG(S,L,FMT,ARG...)
#define HTB_PASSQ
#define HTB_ARGQ
@@ -166,6 +172,11 @@ struct htb_class
struct htb_class_inner {
rb_root_t feed[TC_HTB_NUMPRIO]; /* feed trees */
rb_node_t *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
+ /* When class changes from state 1->2 and disconnects from
+ parent's feed then we lost ptr value and start from the
+ first child again. Here we store classid of the
+ last valid ptr (used when ptr is NULL). */
+ u32 last_ptr_id[TC_HTB_NUMPRIO];
} inner;
} un;
rb_node_t node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
@@ -212,6 +223,7 @@ struct htb_sched
rb_root_t row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
int row_mask[TC_HTB_MAXDEPTH];
rb_node_t *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+ u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
/* self wait list - roots of wait PQs per row */
rb_root_t wait_pq[TC_HTB_MAXDEPTH];
@@ -219,6 +231,9 @@ struct htb_sched
/* time of nearest event per level (row) */
unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+ /* cached value of jiffies in dequeue */
+ unsigned long jiffies;
+
/* whether we hit non-work conserving class during this dequeue; we use */
int nwc_hit; /* this to disable mindelay complaint in dequeue */
@@ -297,7 +312,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
rules in it */
if (skb->priority == sch->handle)
return HTB_DIRECT; /* X:0 (direct flow) selected */
- if ((cl = htb_find(skb->priority,sch)) != NULL)
+ if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
return cl;
tcf = q->filter_list;
@@ -338,7 +353,7 @@ static void htb_next_rb_node(rb_node_t **n);
static void htb_debug_dump (struct htb_sched *q)
{
int i,p;
- printk(KERN_DEBUG "htb*g j=%lu\n",jiffies);
+ printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
/* rows */
for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
@@ -421,26 +436,24 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
#endif
- DEVIK_MSTART(9);
- cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay);
- if (cl->pq_key == jiffies)
+ cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
+ if (cl->pq_key == q->jiffies)
cl->pq_key++;
/* update the nearest event cache */
- if (q->near_ev_cache[cl->level] - cl->pq_key < 0x80000000)
+ if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {
struct htb_class *c; parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
- if (cl->pq_key - c->pq_key < 0x80000000)
+ if (time_after_eq(cl->pq_key, c->pq_key))
p = &parent->rb_right;
else
p = &parent->rb_left;
}
rb_link_node(&cl->pq_node, parent, p);
rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
- DEVIK_MEND(9);
}
/**
@@ -453,12 +466,14 @@ static void htb_next_rb_node(rb_node_t **n)
{
rb_node_t *p;
if ((*n)->rb_right) {
+ /* child at right. use it or its leftmost ancestor */
*n = (*n)->rb_right;
while ((*n)->rb_left)
*n = (*n)->rb_left;
return;
}
while ((p = (*n)->rb_parent) != NULL) {
+ /* if we've arrived from left child then we have next node */
if (p->rb_left == *n) break;
*n = p;
}
@@ -567,8 +582,13 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
int prio = ffz(~m);
m &= ~(1 << prio);
- if (p->un.inner.ptr[prio] == cl->node+prio)
- htb_next_rb_node(p->un.inner.ptr + prio);
+ if (p->un.inner.ptr[prio] == cl->node+prio) {
+ /* we are removing child which is pointed to from
+ parent feed - forget the pointer but remember
+ classid */
+ p->un.inner.last_ptr_id[prio] = cl->classid;
+ p->un.inner.ptr[prio] = NULL;
+ }
htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
@@ -602,7 +622,7 @@ htb_class_mode(struct htb_class *cl,long *diff)
long toks;
if ((toks = (cl->ctokens + *diff)) < (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
#endif
0)) {
@@ -610,7 +630,7 @@ htb_class_mode(struct htb_class *cl,long *diff)
return HTB_CANT_SEND;
}
if ((toks = (cl->tokens + *diff)) >= (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
cl->cmode == HTB_CAN_SEND ? -cl->buffer :
#endif
0))
@@ -689,7 +709,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl = htb_classify(skb,sch);
- DEVIK_MSTART(0);
if (cl == HTB_DIRECT || !cl) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen && cl) {
@@ -698,25 +717,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
} else {
kfree_skb (skb);
sch->stats.drops++;
- DEVIK_MEND(0);
return NET_XMIT_DROP;
}
} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->stats.drops++;
cl->stats.drops++;
- DEVIK_MEND(0);
return NET_XMIT_DROP;
} else {
cl->stats.packets++; cl->stats.bytes += skb->len;
- DEVIK_MSTART(1);
htb_activate (q,cl);
- DEVIK_MEND(1);
}
sch->q.qlen++;
sch->stats.packets++; sch->stats.bytes += skb->len;
- HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
- DEVIK_MEND(0);
+ HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
return NET_XMIT_SUCCESS;
}
@@ -725,16 +739,18 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
{
struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl = htb_classify(skb,sch);
+ struct sk_buff *tskb;
if (cl == HTB_DIRECT || !cl) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen && cl) {
- __skb_queue_tail(&q->direct_queue, skb);
- q->direct_pkts++;
+ __skb_queue_head(&q->direct_queue, skb);
} else {
- kfree_skb (skb);
- sch->stats.drops++;
- return NET_XMIT_DROP;
+ __skb_queue_head(&q->direct_queue, skb);
+ tskb = __skb_dequeue_tail(&q->direct_queue);
+ kfree_skb (tskb);
+ sch->stats.drops++;
+ return NET_XMIT_CN;
}
} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->stats.drops++;
@@ -744,7 +760,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
htb_activate (q,cl);
sch->q.qlen++;
- HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
+ HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
return NET_XMIT_SUCCESS;
}
@@ -819,7 +835,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
cl->classid, diff,
(unsigned long long) q->now,
(unsigned long long) cl->t_c,
- jiffies);
+ q->jiffies);
diff = 1000;
}
#endif
@@ -862,6 +878,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
*
* Scans event queue for pending events and applies them. Returns jiffies to
* next pending event (0 for no event in pq).
+ * Note: Aplied are events whose have cl->pq_key <= jiffies.
*/
static long htb_do_events(struct htb_sched *q,int level)
{
@@ -876,9 +893,9 @@ static long htb_do_events(struct htb_sched *q,int level)
while (p->rb_left) p = p->rb_left;
cl = rb_entry(p, struct htb_class, pq_node);
- if (cl->pq_key - (jiffies+1) < 0x80000000) {
- HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies);
- return cl->pq_key - jiffies;
+ if (time_after(cl->pq_key, q->jiffies)) {
+ HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
+ return cl->pq_key - q->jiffies;
}
htb_safe_rb_erase(p,q->wait_pq+level);
diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
@@ -889,7 +906,7 @@ static long htb_do_events(struct htb_sched *q,int level)
cl->classid, diff,
(unsigned long long) q->now,
(unsigned long long) cl->t_c,
- jiffies);
+ q->jiffies);
diff = 1000;
}
#endif
@@ -902,24 +919,56 @@ static long htb_do_events(struct htb_sched *q,int level)
return HZ/10;
}
+/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
+ is no such one exists. */
+static rb_node_t *
+htb_id_find_next_upper(int prio,rb_node_t *n,u32 id)
+{
+ rb_node_t *r = NULL;
+ while (n) {
+ struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
+ if (id == cl->classid) return n;
+
+ if (id > cl->classid) {
+ n = n->rb_right;
+ } else {
+ r = n;
+ n = n->rb_left;
+ }
+ }
+ return r;
+}
+
/**
* htb_lookup_leaf - returns next leaf class in DRR order
*
* Find leaf where current feed pointers points to.
*/
static struct htb_class *
-htb_lookup_leaf(rb_root_t *tree,int prio,rb_node_t **pptr)
+htb_lookup_leaf(HTB_ARGQ rb_root_t *tree,int prio,rb_node_t **pptr,u32 *pid)
{
int i;
struct {
rb_node_t *root;
rb_node_t **pptr;
+ u32 *pid;
} stk[TC_HTB_MAXDEPTH],*sp = stk;
+ BUG_TRAP(tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
+ sp->pid = pid;
for (i = 0; i < 65535; i++) {
+ HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
+
+ if (!*sp->pptr && *sp->pid) {
+ /* ptr was invalidated but id is valid - try to recover
+ the original or next ptr */
+ *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+ }
+ *sp->pid = 0; /* ptr is valid now so that remove this hint as it
+ can become out of date quickly */
if (!*sp->pptr) { /* we are at right end; rewind & go up */
*sp->pptr = sp->root;
while ((*sp->pptr)->rb_left)
@@ -937,6 +986,7 @@ htb_lookup_leaf(rb_root_t *tree,int prio,rb_node_t **pptr)
return cl;
(++sp)->root = cl->un.inner.feed[prio].rb_node;
sp->pptr = cl->un.inner.ptr+prio;
+ sp->pid = cl->un.inner.last_ptr_id+prio;
}
}
BUG_TRAP(0);
@@ -949,16 +999,37 @@ static struct sk_buff *
htb_dequeue_tree(struct htb_sched *q,int prio,int level)
{
struct sk_buff *skb = NULL;
- //struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl,*start;
/* look initial class up in the row */
- DEVIK_MSTART(6);
- start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
+ start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
+ q->ptr[level]+prio,q->last_ptr_id[level]+prio);
do {
- BUG_TRAP(cl && cl->un.leaf.q->q.qlen); if (!cl) return NULL;
+next:
+ BUG_TRAP(cl);
+ if (!cl) return NULL;
HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
prio,level,cl->classid,cl->un.leaf.deficit[level]);
+
+ /* class can be empty - it is unlikely but can be true if leaf
+ qdisc drops packets in enqueue routine or if someone used
+ graft operation on the leaf since last dequeue;
+ simply deactivate and skip such class */
+ if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+ struct htb_class *next;
+ htb_deactivate(q,cl);
+
+ /* row/level might become empty */
+ if ((q->row_mask[level] & (1 << prio)) == 0)
+ return NULL;
+
+ next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
+ prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+ if (cl == start) /* fix start if we just deleted it */
+ start = next;
+ cl = next;
+ goto next;
+ }
if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
break;
@@ -968,11 +1039,10 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
}
q->nwc_hit++;
htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
- cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
+ cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
+ q->last_ptr_id[level]+prio);
} while (cl != start);
- DEVIK_MEND(6);
- DEVIK_MSTART(7);
if (likely(skb != NULL)) {
if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
@@ -984,27 +1054,22 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
gives us slightly better performance */
if (!cl->un.leaf.q->q.qlen)
htb_deactivate (q,cl);
- DEVIK_MSTART(8);
htb_charge_class (q,cl,level,skb->len);
- DEVIK_MEND(8);
}
- DEVIK_MEND(7);
return skb;
}
static void htb_delay_by(struct Qdisc *sch,long delay)
{
struct htb_sched *q = (struct htb_sched *)sch->data;
- if (netif_queue_stopped(sch->dev)) return;
if (delay <= 0) delay = 1;
if (unlikely(delay > 5*HZ)) {
if (net_ratelimit())
printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
delay = 5*HZ;
}
- del_timer(&q->timer);
- q->timer.expires = jiffies + delay;
- add_timer(&q->timer);
+ /* why don't use jiffies here ? because expires can be in past */
+ mod_timer(&q->timer, q->jiffies + delay);
sch->flags |= TCQ_F_THROTTLED;
sch->stats.overlimits++;
HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
@@ -1016,7 +1081,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
struct htb_sched *q = (struct htb_sched *)sch->data;
int level;
long min_delay;
+#ifdef HTB_DEBUG
+ int evs_used = 0;
+#endif
+ q->jiffies = jiffies;
HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
sch->q.qlen);
@@ -1027,27 +1096,26 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
return skb;
}
- DEVIK_MSTART(2);
if (!sch->q.qlen) goto fin;
PSCHED_GET_TIME(q->now);
- min_delay = HZ*5;
+ min_delay = LONG_MAX;
q->nwc_hit = 0;
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
long delay;
- DEVIK_MSTART(3);
- if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
+ if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
delay = htb_do_events(q,level);
- q->near_ev_cache[level] += delay ? delay : HZ;
+ q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+#ifdef HTB_DEBUG
+ evs_used++;
+#endif
} else
- delay = q->near_ev_cache[level] - jiffies;
+ delay = q->near_ev_cache[level] - q->jiffies;
if (delay && min_delay > delay)
min_delay = delay;
- DEVIK_MEND(3);
- DEVIK_MSTART(5);
m = ~q->row_mask[level];
while (m != (int)(-1)) {
int prio = ffz (m);
@@ -1056,29 +1124,29 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
if (likely(skb != NULL)) {
sch->q.qlen--;
sch->flags &= ~TCQ_F_THROTTLED;
- DEVIK_MEND(5);
goto fin;
}
}
- DEVIK_MEND(5);
}
- DEVIK_MSTART(4);
#ifdef HTB_DEBUG
- if (!q->nwc_hit && min_delay >= 5*HZ && net_ratelimit()) {
- printk(KERN_ERR "HTB: mindelay=%ld, report it please !\n",min_delay);
- htb_debug_dump(q);
+ if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
+ if (min_delay == LONG_MAX) {
+ printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
+ evs_used,q->jiffies,jiffies);
+ htb_debug_dump(q);
+ } else
+ printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
+ "too small rate\n",min_delay);
}
#endif
- htb_delay_by (sch,min_delay);
- DEVIK_MEND(4);
+ htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
fin:
- HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb);
- DEVIK_MEND(2);
+ HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
return skb;
}
/* try to drop from each class (by prio) until one succeed */
-static int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc* sch)
{
struct htb_sched *q = (struct htb_sched *)sch->data;
int prio;
@@ -1086,14 +1154,15 @@ static int htb_drop(struct Qdisc* sch)
for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
struct list_head *p;
list_for_each (p,q->drops+prio) {
- struct htb_class *cl = list_entry(p,struct htb_class,
- un.leaf.drop_list);
+ struct htb_class *cl = list_entry(p, struct htb_class,
+ un.leaf.drop_list);
+ unsigned int len;
if (cl->un.leaf.q->ops->drop &&
- cl->un.leaf.q->ops->drop(cl->un.leaf.q)) {
+ (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate (q,cl);
- return 1;
+ return len;
}
}
}
@@ -1162,7 +1231,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
return -EINVAL;
}
- memset(q,0,sizeof(*q));
q->debug = gopt->debug;
HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
@@ -1208,7 +1276,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
gopt.direct_pkts = q->direct_pkts;
#ifdef HTB_DEBUG
- htb_debug_dump(q);
+ if (HTB_DBG_COND(0,2))
+ htb_debug_dump(q);
#endif
gopt.version = HTB_VER;
gopt.rate2quantum = q->rate2quantum;
@@ -1218,8 +1287,6 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
rta->rta_len = skb->tail - b;
- sch->stats.qlen = sch->q.qlen;
- RTA_PUT(skb, TCA_STATS, sizeof(sch->stats), &sch->stats);
HTB_QUNLOCK(sch);
return skb->len;
rtattr_failure:
@@ -1289,6 +1356,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
return -ENOBUFS;
sch_tree_lock(sch);
if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+ if (cl->prio_activity)
+ htb_deactivate ((struct htb_sched*)sch->data,cl);
+
/* TODO: is it correct ? Why CBQ doesn't do it ? */
sch->q.qlen -= (*old)->q.qlen;
qdisc_reset(*old);
@@ -1323,7 +1393,7 @@ static void htb_destroy_filters(struct tcf_proto **fl)
while ((tp = *fl) != NULL) {
*fl = tp->next;
- tp->ops->destroy(tp);
+ tcf_destroy(tp);
}
}
@@ -1371,11 +1441,16 @@ static void htb_destroy(struct Qdisc* sch)
#ifdef HTB_RATECM
del_timer_sync (&q->rttim);
#endif
+ /* This line used to be after htb_destroy_class call below
+ and surprisingly it worked in 2.4. But it must precede it
+ because filter need its target class alive to be able to call
+ unbind_filter on it (without Oops). */
+ htb_destroy_filters(&q->filter_list);
+
while (!list_empty(&q->root))
htb_destroy_class (sch,list_entry(q->root.next,
struct htb_class,sibling));
- htb_destroy_filters(&q->filter_list);
__skb_queue_purge(&q->direct_queue);
MOD_DEC_USE_COUNT;
}
@@ -1438,12 +1513,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
- HTB_DBG(0,1,"htb_chg cl=%p, clid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
+ HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
if (!rtab || !ctab) goto failure;
if (!cl) { /* new class */
+ struct Qdisc *new_q;
/* check for valid classid */
if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
goto failure;
@@ -1467,6 +1543,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->magic = HTB_CMAGIC;
#endif
+ /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
+ so that can't be used inside of sch_tree_lock
+ -- thanks to Karlis Peisenieks */
+ new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
sch_tree_lock(sch);
if (parent && !parent->level) {
/* turn parent into inner node */
@@ -1485,8 +1565,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
memset (&parent->un.inner,0,sizeof(parent->un.inner));
}
/* leaf (we) needs elementary qdisc */
- if (!(cl->un.leaf.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
- cl->un.leaf.q = &noop_qdisc;
+ cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
cl->classid = classid; cl->parent = parent;
@@ -1514,11 +1593,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl->level) {
cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
- printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.", cl->classid);
+ printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
cl->un.leaf.quantum = 1000;
}
if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
- printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.", cl->classid);
+ printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
cl->un.leaf.quantum = 200000;
}
if (hopt->quantum)
diff --git a/release/src/linux/linux/net/sched/sch_ingress.c b/release/src/linux/linux/net/sched/sch_ingress.c
index 2b30fce0..70698728 100644
--- a/release/src/linux/linux/net/sched/sch_ingress.c
+++ b/release/src/linux/linux/net/sched/sch_ingress.c
@@ -250,7 +250,6 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt)
}
DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
- memset(p, 0, sizeof(*p));
p->filter_list = NULL;
p->q = &noop_qdisc;
MOD_INC_USE_COUNT;
@@ -286,10 +285,7 @@ static void ingress_destroy(struct Qdisc *sch)
p->filter_list = tp->next;
tp->ops->destroy(tp);
}
- memset(p, 0, sizeof(*p));
- p->filter_list = NULL;
-
MOD_DEC_USE_COUNT;
}
diff --git a/release/src/linux/linux/net/sched/sch_sfq.c b/release/src/linux/linux/net/sched/sch_sfq.c
index c96762fb..a6c17424 100644
--- a/release/src/linux/linux/net/sched/sch_sfq.c
+++ b/release/src/linux/linux/net/sched/sch_sfq.c
@@ -218,6 +218,7 @@ static int sfq_drop(struct Qdisc *sch)
struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
sfq_index d = q->max_depth;
struct sk_buff *skb;
+ int len;
/* Queue is full! Find the longest slot and
drop a packet from it */
@@ -225,12 +226,13 @@ static int sfq_drop(struct Qdisc *sch)
if (d > 1) {
sfq_index x = q->dep[d+SFQ_DEPTH].next;
skb = q->qs[x].prev;
+ len = skb->len;
__skb_unlink(skb, &q->qs[x]);
kfree_skb(skb);
sfq_dec(q, x);
sch->q.qlen--;
sch->stats.drops++;
- return 1;
+ return len;
}
if (d == 1) {
@@ -239,13 +241,14 @@ static int sfq_drop(struct Qdisc *sch)
q->next[q->tail] = q->next[d];
q->allot[q->next[d]] += q->quantum;
skb = q->qs[d].prev;
+ len = skb->len;
__skb_unlink(skb, &q->qs[d]);
kfree_skb(skb);
sfq_dec(q, d);
sch->q.qlen--;
q->ht[q->hash[d]] = SFQ_DEPTH;
sch->stats.drops++;
- return 1;
+ return len;
}
return 0;
@@ -342,6 +345,7 @@ sfq_dequeue(struct Qdisc* sch)
/* Is the slot empty? */
if (q->qs[a].qlen == 0) {
+ q->ht[q->hash[a]] = SFQ_DEPTH;
a = q->next[a];
if (a == old_a) {
q->tail = SFQ_DEPTH;
diff --git a/release/src/linux/linux/net/socket.c b/release/src/linux/linux/net/socket.c
index d8b479c9..4816eeb6 100644
--- a/release/src/linux/linux/net/socket.c
+++ b/release/src/linux/linux/net/socket.c
@@ -607,6 +607,9 @@ ssize_t sock_sendpage(struct file *file, struct page *page,
if (more)
flags |= MSG_MORE;
+ if (!sock->ops->sendpage)
+ return sock_no_sendpage(sock, page, offset, size, flags);
+
return sock->ops->sendpage(sock, page, offset, size, flags);
}