diff options
author | Andreas Baumann <mail@andreasbaumann.cc> | 2015-01-03 13:58:15 +0100 |
---|---|---|
committer | Andreas Baumann <mail@andreasbaumann.cc> | 2015-01-03 13:58:15 +0100 |
commit | 4aca87515a5083ae0e31ce3177189fd43b6d05ac (patch) | |
tree | 7b1d9a31393ca090757dc6f0d3859b4fcd93f271 /release/src/linux/linux/net/ipv4 | |
parent | 008d0be72b2f160382c6e880765e96b64a050c65 (diff) | |
download | tomato-4aca87515a5083ae0e31ce3177189fd43b6d05ac.tar.gz tomato-4aca87515a5083ae0e31ce3177189fd43b6d05ac.tar.bz2 |
patch to Vanilla Tomato 1.28
Diffstat (limited to 'release/src/linux/linux/net/ipv4')
58 files changed, 8700 insertions, 980 deletions
diff --git a/release/src/linux/linux/net/ipv4/arp.c b/release/src/linux/linux/net/ipv4/arp.c index aecd020a..e458f7d5 100644 --- a/release/src/linux/linux/net/ipv4/arp.c +++ b/release/src/linux/linux/net/ipv4/arp.c @@ -171,8 +171,8 @@ struct neigh_table arp_tbl = { id: "arp_cache", parms: { tbl: &arp_tbl, - /*zhijian 2006-10-23 modify to solve arp entry timeout problem(cdrouter3.3 scaling module)*/ - #if 0 +/*zhijian 2006-10-23 modify to solve arp entry timeout problem(cdrouter3.3 scaling module)*/ +#if 0 base_reachable_time: 30 * HZ, retrans_time: 1 * HZ, gc_staletime: 60 * HZ, @@ -181,16 +181,16 @@ struct neigh_table arp_tbl = { queue_len: 3, ucast_probes: 3, mcast_probes: 3, - #else - base_reachable_time: 60 * HZ, - retrans_time: 5 * HZ, - gc_staletime: 120 * HZ, - reachable_time: 60 * HZ, - delay_probe_time: 10 * HZ, - queue_len: 3, - ucast_probes: 6, - mcast_probes: 6, - #endif +#else + base_reachable_time: 60 * HZ, + retrans_time: 5 * HZ, + gc_staletime: 120 * HZ, + reachable_time: 60 * HZ, + delay_probe_time: 10 * HZ, + queue_len: 3, + ucast_probes: 6, + mcast_probes: 6, +#endif anycast_delay: 1 * HZ, proxy_delay: (8 * HZ) / 10, proxy_qlen: 64, diff --git a/release/src/linux/linux/net/ipv4/igmp.c b/release/src/linux/linux/net/ipv4/igmp.c index 3f718f2a..c53d8feb 100644 --- a/release/src/linux/linux/net/ipv4/igmp.c +++ b/release/src/linux/linux/net/ipv4/igmp.c @@ -677,8 +677,9 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) count++; } err = -ENOBUFS; - //if (iml == NULL || count >= sysctl_igmp_max_memberships) - if (iml == NULL || count > sysctl_igmp_max_memberships)// modify for cdrouter v3.3 item 300(cdrouter_mcast_100) bug + // if (iml == NULL || count >= sysctl_igmp_max_memberships) + // 43011: modify for cdrouter v3.3 item 300(cdrouter_mcast_100) bug + if (iml == NULL || count > sysctl_igmp_max_memberships) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); iml->next = sk->protinfo.af_inet.mc_list; diff --git a/release/src/linux/linux/net/ipv4/netfilter/Config.in b/release/src/linux/linux/net/ipv4/netfilter/Config.in index 7662305e..b1f7f985 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/Config.in +++ b/release/src/linux/linux/net/ipv4/netfilter/Config.in @@ -7,16 +7,17 @@ comment ' IP: Netfilter Configuration' tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK - dep_tristate ' TFTP protocol support' CONFIG_IP_NF_TFTP $CONFIG_IP_NF_CONNTRACK + bool ' Connection mark tracking support' CONFIG_IP_NF_CONNTRACK_MARK dep_tristate ' H.323 (netmeeting) support' CONFIG_IP_NF_H323 $CONFIG_IP_NF_CONNTRACK + dep_tristate ' TFTP protocol support' CONFIG_IP_NF_TFTP $CONFIG_IP_NF_CONNTRACK dep_tristate ' IRC protocol support' CONFIG_IP_NF_IRC $CONFIG_IP_NF_CONNTRACK dep_tristate ' CuSeeMe protocol support' CONFIG_IP_NF_CUSEEME $CONFIG_IP_NF_CONNTRACK dep_tristate ' Quake III protocol support' CONFIG_IP_NF_QUAKE3 $CONFIG_IP_NF_CONNTRACK dep_tristate ' RTSP protocol support' CONFIG_IP_NF_RTSP $CONFIG_IP_NF_CONNTRACK dep_tristate ' MMS protocol support' CONFIG_IP_NF_MMS $CONFIG_IP_NF_CONNTRACK - dep_tristate ' SIP protocol support' CONFIG_IP_NF_SIP $CONFIG_IP_NF_CONNTRACK dep_tristate ' GRE protocol support' CONFIG_IP_NF_CT_PROTO_GRE $CONFIG_IP_NF_CONNTRACK dep_tristate ' PPTP protocol support' CONFIG_IP_NF_PPTP $CONFIG_IP_NF_CT_PROTO_GRE + dep_tristate ' SIP protocol support' CONFIG_IP_NF_SIP $CONFIG_IP_NF_CONNTRACK dep_tristate ' ESP protocol support' CONFIG_IP_NF_CT_PROTO_ESP $CONFIG_IP_NF_CONNTRACK fi @@ -27,6 +28,10 @@ tristate 'IP tables support (required for filtering/masq/NAT)' CONFIG_IP_NF_IPTA if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then # The simple matches. dep_tristate ' limit match support' CONFIG_IP_NF_MATCH_LIMIT $CONFIG_IP_NF_IPTABLES + dep_tristate ' IPP2P match support' CONFIG_IP_NF_MATCH_IPP2P $CONFIG_IP_NF_IPTABLES + dep_tristate ' geoip match support' CONFIG_IP_NF_MATCH_GEOIP $CONFIG_IP_NF_IPTABLES + dep_tristate ' quota match support' CONFIG_IP_NF_MATCH_QUOTA $CONFIG_IP_NF_IPTABLES + dep_tristate ' IP range match support' CONFIG_IP_NF_MATCH_IPRANGE $CONFIG_IP_NF_IPTABLES dep_tristate ' IP address pool support' CONFIG_IP_NF_POOL $CONFIG_IP_NF_IPTABLES if [ "$CONFIG_IP_NF_POOL" = "y" -o "$CONFIG_IP_NF_POOL" = "m" ]; then @@ -39,6 +44,12 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then dep_tristate ' Multiple port match support' CONFIG_IP_NF_MATCH_MULTIPORT $CONFIG_IP_NF_IPTABLES dep_tristate ' Multiple port with ranges match support' CONFIG_IP_NF_MATCH_MPORT $CONFIG_IP_NF_IPTABLES dep_tristate ' TOS match support' CONFIG_IP_NF_MATCH_TOS $CONFIG_IP_NF_IPTABLES + dep_tristate ' recent match support' CONFIG_IP_NF_MATCH_RECENT $CONFIG_IP_NF_IPTABLES + dep_tristate ' account match support' CONFIG_IP_NF_MATCH_ACCOUNT $CONFIG_IP_NF_IPTABLES $CONFIG_PROC_FS + if [ "$CONFIG_IP_NF_MATCH_ACCOUNT" != "n" ]; then + bool ' account debugging output' CONFIG_IP_NF_MATCH_ACCOUNT_DEBUG + fi + dep_tristate ' condition match support' CONFIG_IP_NF_MATCH_CONDITION $CONFIG_IP_NF_IPTABLES dep_tristate ' TIME match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_TIME $CONFIG_IP_NF_IPTABLES dep_tristate ' ECN match support' CONFIG_IP_NF_MATCH_ECN $CONFIG_IP_NF_IPTABLES @@ -46,19 +57,36 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then dep_tristate ' AH/ESP match support' CONFIG_IP_NF_MATCH_AH_ESP $CONFIG_IP_NF_IPTABLES dep_tristate ' LENGTH match support' CONFIG_IP_NF_MATCH_LENGTH $CONFIG_IP_NF_IPTABLES + dep_tristate ' U32 match support' CONFIG_IP_NF_MATCH_U32 $CONFIG_IP_NF_U32 dep_tristate ' TTL match support' CONFIG_IP_NF_MATCH_TTL $CONFIG_IP_NF_IPTABLES dep_tristate ' tcpmss match support' CONFIG_IP_NF_MATCH_TCPMSS $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then dep_tristate ' Helper match support' CONFIG_IP_NF_MATCH_HELPER $CONFIG_IP_NF_IPTABLES fi if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then dep_tristate ' Connection state match support' CONFIG_IP_NF_MATCH_STATE $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES + dep_tristate ' Connections/IP limit match support' CONFIG_IP_NF_MATCH_CONNLIMIT $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_CONNTRACK_MARK" != "n" ]; then + dep_tristate ' Connection mark match support' CONFIG_IP_NF_MATCH_CONNMARK $CONFIG_IP_NF_IPTABLES + fi dep_tristate ' Connection tracking match support' CONFIG_IP_NF_MATCH_CONNTRACK $CONFIG_IP_NF_CONNTRACK $CONFIG_IP_NF_IPTABLES fi if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES + dep_tristate ' String match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_STRING $CONFIG_IP_NF_IPTABLES dep_tristate ' Webstr match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_WEBSTR $CONFIG_IP_NF_IPTABLES dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES + dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK + dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7 + + dep_tristate ' web match' CONFIG_IP_NF_MATCH_WEB $CONFIG_IP_NF_IPTABLES + dep_tristate ' BCOUNT target' CONFIG_IP_NF_TARGET_BCOUNT $CONFIG_IP_NF_IPTABLES + dep_tristate ' bcount match' CONFIG_IP_NF_MATCH_BCOUNT $CONFIG_IP_NF_TARGET_BCOUNT + dep_tristate ' MACSAVE target' CONFIG_IP_NF_TARGET_MACSAVE $CONFIG_IP_NF_IPTABLES + dep_tristate ' macsave match' CONFIG_IP_NF_MATCH_MACSAVE $CONFIG_IP_NF_TARGET_MACSAVE + dep_tristate ' exp match (experimental rig - do not use)' CONFIG_IP_NF_MATCH_EXP $CONFIG_IP_NF_IPTABLES + fi # The targets dep_tristate ' Packet filtering' CONFIG_IP_NF_FILTER $CONFIG_IP_NF_IPTABLES @@ -75,8 +103,6 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then define_bool CONFIG_IP_NF_NAT_NEEDED y dep_tristate ' MASQUERADE target support' CONFIG_IP_NF_TARGET_MASQUERADE $CONFIG_IP_NF_NAT dep_tristate ' REDIRECT target support' CONFIG_IP_NF_TARGET_REDIRECT $CONFIG_IP_NF_NAT - dep_tristate ' Automatic port forwarding (autofw) target support' CONFIG_IP_NF_AUTOFW $CONFIG_IP_NF_NAT - dep_tristate ' TRIGGER target support (port-trigger)' CONFIG_IP_NF_TARGET_TRIGGER $CONFIG_IP_NF_NAT if [ "$CONFIG_IP_NF_H323" = "m" ]; then define_tristate CONFIG_IP_NF_NAT_H323 m else @@ -84,6 +110,8 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then define_tristate CONFIG_IP_NF_NAT_H323 $CONFIG_IP_NF_NAT fi fi + dep_tristate ' Automatic port forwarding (autofw) target support' CONFIG_IP_NF_AUTOFW $CONFIG_IP_NF_NAT + dep_tristate ' TRIGGER target support (port-trigger)' CONFIG_IP_NF_TARGET_TRIGGER $CONFIG_IP_NF_NAT if [ "$CONFIG_IP_NF_PPTP" = "m" ]; then define_tristate CONFIG_IP_NF_NAT_PPTP m else @@ -109,13 +137,20 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then dep_tristate ' Basic SNMP-ALG support (EXPERIMENTAL)' CONFIG_IP_NF_NAT_SNMP_BASIC $CONFIG_IP_NF_NAT fi + if [ "$CONFIG_IP_NF_RTSP" = "m" ]; then + define_tristate CONFIG_IP_NF_NAT_RTSP m + else + if [ "$CONFIG_IP_NF_RTSP" = "y" ]; then + define_tristate CONFIG_IP_NF_NAT_RTSP $CONFIG_IP_NF_NAT + fi + fi if [ "$CONFIG_IP_NF_IRC" = "m" ]; then define_tristate CONFIG_IP_NF_NAT_IRC m else if [ "$CONFIG_IP_NF_IRC" = "y" ]; then define_tristate CONFIG_IP_NF_NAT_IRC $CONFIG_IP_NF_NAT fi - fi + fi if [ "$CONFIG_IP_NF_CUSEEME" = "m" ]; then define_tristate CONFIG_IP_NF_NAT_CUSEEME m else @@ -153,13 +188,6 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then define_tristate CONFIG_IP_NF_NAT_TFTP $CONFIG_IP_NF_NAT fi fi - if [ "$CONFIG_IP_NF_RTSP" = "m" ]; then - define_tristate CONFIG_IP_NF_NAT_RTSP m - else - if [ "$CONFIG_IP_NF_RTSP" = "y" ]; then - define_tristate CONFIG_IP_NF_NAT_RTSP $CONFIG_IP_NF_NAT - fi - fi if [ "$CONFIG_IP_NF_CT_PROTO_ESP" = "m" ]; then define_tristate CONFIG_IP_NF_NAT_PROTO_ESP m else @@ -178,8 +206,15 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ]; then dep_tristate ' DSCP target support' CONFIG_IP_NF_TARGET_DSCP $CONFIG_IP_NF_MANGLE dep_tristate ' MARK target support' CONFIG_IP_NF_TARGET_MARK $CONFIG_IP_NF_MANGLE + dep_tristate ' ROUTE target support' CONFIG_IP_NF_TARGET_ROUTE $CONFIG_IP_NF_MANGLE + dep_tristate ' CLASSIFY target support (EXPERIMENTAL)' CONFIG_IP_NF_TARGET_CLASSIFY $CONFIG_IP_NF_FILTER + dep_tristate ' IMQ target support' CONFIG_IP_NF_TARGET_IMQ $CONFIG_IP_NF_MANGLE fi dep_tristate ' LOG target support' CONFIG_IP_NF_TARGET_LOG $CONFIG_IP_NF_IPTABLES + if [ "$CONFIG_IP_NF_CONNTRACK_MARK" != "n" ]; then + dep_tristate ' CONNMARK target support' CONFIG_IP_NF_TARGET_CONNMARK $CONFIG_IP_NF_IPTABLES + fi + dep_tristate ' TTL target support' CONFIG_IP_NF_TARGET_TTL $CONFIG_IP_NF_IPTABLES dep_tristate ' ULOG target support' CONFIG_IP_NF_TARGET_ULOG $CONFIG_IP_NF_IPTABLES dep_tristate ' TCPMSS target support' CONFIG_IP_NF_TARGET_TCPMSS $CONFIG_IP_NF_IPTABLES fi @@ -189,6 +224,10 @@ if [ "$CONFIG_IP_NF_ARPTABLES" != "n" ]; then dep_tristate ' ARP packet filtering' CONFIG_IP_NF_ARPFILTER $CONFIG_IP_NF_ARPTABLES fi +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + tristate 'tomato_ct' CONFIG_IP_NF_TOMATOCT +fi + # Backwards compatibility modules: only if you don't build in the others. if [ "$CONFIG_IP_NF_CONNTRACK" != "y" ]; then if [ "$CONFIG_IP_NF_IPTABLES" != "y" ]; then diff --git a/release/src/linux/linux/net/ipv4/netfilter/Makefile b/release/src/linux/linux/net/ipv4/netfilter/Makefile index abf55469..80de56f3 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/Makefile +++ b/release/src/linux/linux/net/ipv4/netfilter/Makefile @@ -33,10 +33,10 @@ obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o # H.323 support obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o -obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o -ifdef CONFIG_IP_NF_NAT_H323 +ifdef CONFIG_IP_NF_H323 export-objs += ip_conntrack_h323.o endif +obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o # connection tracking protocol helpers @@ -83,10 +83,14 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o ifdef CONFIG_IP_NF_NAT_IRC export-objs += ip_conntrack_irc.o endif + +# rtsp protocol support obj-$(CONFIG_IP_NF_RTSP) += ip_conntrack_rtsp.o ifdef CONFIG_IP_NF_NAT_RTSP export-objs += ip_conntrack_rtsp.o endif +obj-$(CONFIG_IP_NF_NAT_RTSP) += ip_nat_rtsp.o + # NAT helpers obj-$(CONFIG_IP_NF_NAT_CUSEEME) += ip_nat_cuseeme.o obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o @@ -95,7 +99,6 @@ obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o obj-$(CONFIG_IP_NF_NAT_QUAKE3) += ip_nat_quake3.o -obj-$(CONFIG_IP_NF_NAT_RTSP) += ip_nat_rtsp.o obj-$(CONFIG_IP_NF_NAT_MMS) += ip_nat_mms.o # generic IP tables @@ -109,48 +112,64 @@ obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o # matches obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o +obj-$(CONFIG_IP_NF_MATCH_IPP2P) += ipt_ipp2p.o +obj-$(CONFIG_IP_NF_MATCH_GEOIP) += ipt_geoip.o +obj-$(CONFIG_IP_NF_MATCH_QUOTA) += ipt_quota.o +obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o obj-$(CONFIG_IP_NF_POOL) += ipt_pool.o ip_pool.o obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o - obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o - obj-$(CONFIG_IP_NF_MATCH_MPORT) += ipt_mport.o - obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o - +obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o +obj-$(CONFIG_IP_NF_MATCH_ACCOUNT) += ipt_account.o +obj-$(CONFIG_IP_NF_MATCH_CONDITION) += ipt_condition.o obj-$(CONFIG_IP_NF_MATCH_TIME) += ipt_time.o - obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o - obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o - +obj-$(CONFIG_IP_NF_MATCH_U32) += ipt_u32.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o +obj-$(CONFIG_IP_NF_MATCH_CONNLIMIT) += ipt_connlimit.o +obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o obj-$(CONFIG_IP_NF_MATCH_WEBSTR) += ipt_webstr.o obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o +obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o +obj-$(CONFIG_IP_NF_MATCH_WEB) += ipt_web.o +obj-$(CONFIG_IP_NF_MATCH_MACSAVE) += ipt_macsave.o +obj-$(CONFIG_IP_NF_MATCH_EXP) += ipt_exp.o +obj-$(CONFIG_IP_NF_MATCH_BCOUNT) += ipt_bcount.o # targets obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_MIRROR) += ipt_MIRROR.o +obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o +obj-$(CONFIG_IP_NF_TARGET_IMQ) += ipt_IMQ.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o +obj-$(CONFIG_IP_NF_TARGET_ROUTE) += ipt_ROUTE.o obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o +obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o +obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_AUTOFW) += ip_autofw.o obj-$(CONFIG_IP_NF_TARGET_TRIGGER) += ipt_TRIGGER.o +obj-$(CONFIG_IP_NF_TARGET_MACSAVE) += ipt_MACSAVE.o +obj-$(CONFIG_IP_NF_TARGET_BCOUNT) += ipt_BCOUNT.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o @@ -164,6 +183,8 @@ obj-$(CONFIG_IP_NF_COMPAT_IPFWADM) += ipfwadm.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o +obj-$(CONFIG_IP_NF_TOMATOCT) += tomato_ct.o + include $(TOPDIR)/Rules.make ip_conntrack.o: $(ip_conntrack-objs) diff --git a/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c b/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c index aa1c034a..757fc2ab 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c +++ b/release/src/linux/linux/net/ipv4/netfilter/arp_tables.c @@ -986,13 +986,12 @@ static int do_add_counters(void *user, unsigned int len) goto free; write_lock_bh(&t->lock); - /************************************* - * modify by tanghui @ 2006-10-11 - * for a RACE CONDITION in the "do_add_counters()" function - *************************************/ - //if (t->private->number != paddc->num_counters) { - if (t->private->number != tmp.num_counters) { - /*************************************/ + +#if 0 // removed 1.11 forward bug test + // 43011 (09?): checkme: modify by tanghui @ 2006-10-11 for a RACE CONDITION in the "do_add_counters()" function + // if (t->private->number != tmp.num_counters) { +#endif + if (t->private->number != paddc->num_counters) { ret = -EINVAL; goto unlock_up_free; } diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c index 9c6f040f..324951ee 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_core.c @@ -34,7 +34,12 @@ /* For ERR_PTR(). Yeah, I know... --RR */ #include <linux/fs.h> -#include <linux/netdevice.h> +#define TEST_JHASH // test jhash from 2.4.33 -- zzz + +#ifdef TEST_JHASH +#include <linux/jhash.h> +#include <linux/random.h> +#endif /* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -71,7 +76,7 @@ static kmem_cache_t *ip_conntrack_cachep; int sysctl_ip_conntrack_tcp_timeouts[10] = { 30 MINS, /* TCP_CONNTRACK_NONE, */ - 5 DAYS, /* TCP_CONNTRACK_ESTABLISHED, */ + 4 HOURS, /* TCP_CONNTRACK_ESTABLISHED, */ // was 5 days zzz 2 MINS, /* TCP_CONNTRACK_SYN_SENT, */ 60 SECS, /* TCP_CONNTRACK_SYN_RECV, */ 2 MINS, /* TCP_CONNTRACK_FIN_WAIT, */ @@ -128,9 +133,20 @@ ip_conntrack_put(struct ip_conntrack *ct) nf_conntrack_put(&ct->infos[0]); } +#ifdef TEST_JHASH +static int ip_conntrack_hash_rnd_initted; +static unsigned int ip_conntrack_hash_rnd; +#endif + static inline u_int32_t hash_conntrack(const struct ip_conntrack_tuple *tuple) { +#ifdef TEST_JHASH + return (jhash_3words(tuple->src.ip, + (tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + ip_conntrack_hash_rnd) % ip_conntrack_htable_size); +#else /* ntohl because more differences in low bits. */ /* To ensure that halves of the same connection don't hash clash, we add the source per-proto again. */ @@ -139,6 +155,7 @@ hash_conntrack(const struct ip_conntrack_tuple *tuple) + tuple->dst.protonum) + ntohs(tuple->src.u.all)) % ip_conntrack_htable_size; +#endif } inline int @@ -314,9 +331,6 @@ clean_from_lists(struct ip_conntrack *ct) { DEBUGP("clean_from_lists(%p)\n", ct); MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); - /* Remove from both hash lists: must not NULL out next ptrs, - otherwise we'll look unconfirmed. Fortunately, LIST_DELETE - doesn't do this. --RR */ LIST_DELETE(&ip_conntrack_hash [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); @@ -359,6 +373,14 @@ destroy_conntrack(struct nf_conntrack *nfct) list_del(&ct->master->expected_list); kfree(ct->master); } + + #if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) + if(ct->layer7.app_proto) + kfree(ct->layer7.app_proto); + if(ct->layer7.app_data) + kfree(ct->layer7.app_data); + #endif + WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); @@ -489,6 +511,7 @@ __ip_conntrack_confirm(struct nf_ct_info *nfct) ct->timeout.expires += jiffies; add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); WRITE_UNLOCK(&ip_conntrack_lock); return NF_ACCEPT; } @@ -606,7 +629,7 @@ icmp_error_track(struct sk_buff *skb, connection. Too bad: we're in trouble anyway. */ static inline int unreplied(const struct ip_conntrack_tuple_hash *i) { - return !(i->ctrack->status & IPS_ASSURED); + return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status)); } static int early_drop(struct list_head *chain) @@ -632,31 +655,6 @@ static int early_drop(struct list_head *chain) return dropped; } -/******************lzh add *************************************** -* DESCRIPTION:delete seleted ip conntrack from conntrack_hash list -* INPUT : ip_conntrack_tuple_hash h -* OUTPUT: NULL -* AUTHOR: linzhihong -* DATE : 2006.7.20 -*****************************************************************/ -void del_selected_conntrack(struct ip_conntrack_tuple_hash *h) -{ - DEBUGP("hahaha enter %s\n", __FUNCTION__); - if(h) - { - #if 1 - ip_ct_refresh(h->ctrack, 1*HZ); - #else - if(del_timer(&h->ctrack->timeout)) - { - death_by_timeout((unsigned long)h->ctrack); - } - //ip_conntrack_put(h->ctrack); - #endif - } -} -/**************************** lzh end ******************************/ - static inline int helper_cmp(const struct ip_conntrack_helper *i, const struct ip_conntrack_tuple *rtuple) { @@ -670,41 +668,6 @@ struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *t tuple); } -#define RESERVE_CONNTRACK_FOR_ROUTER -#ifdef RESERVE_CONNTRACK_FOR_ROUTER -#define RESERVE_CONNTRACK_NUM 20 -/* - Check if the packet is for Router AP(LAN side only), or generate from - Router itself(Both sides). - */ -static int cmp_local_ip(u_int32_t dst, u_int32_t src) -{ -#define IF_LAN_NAME "br0" - - int ret = -1; - struct in_device *in_dev; - struct net_device *dev; - struct in_ifaddr **ifap = NULL; - struct in_ifaddr *ifa = NULL; - - for(dev = dev_base; dev != NULL; dev = dev->next){ - if((in_dev=__in_dev_get(dev)) != NULL){ - for(ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next){ - if((ifa->ifa_address == dst && !strcmp(IF_LAN_NAME, ifa->ifa_label)) || ifa->ifa_address == src){ - /*match*/ - ret = 0; - break; - } - } - } - } - - return ret; - -#undef IF_LAN_NAME -} -#endif - /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct ip_conntrack_tuple_hash * @@ -719,9 +682,15 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, int i; static unsigned int drop_next = 0; +#ifdef TEST_JHASH + if (!ip_conntrack_hash_rnd_initted) { + get_random_bytes(&ip_conntrack_hash_rnd, 4); + ip_conntrack_hash_rnd_initted = 1; + } +#endif + hash = hash_conntrack(tuple); - #ifndef RESERVE_CONNTRACK_FOR_ROUTER if (ip_conntrack_max && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { /* Try dropping from random chain, or else from the @@ -738,32 +707,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, return ERR_PTR(-ENOMEM); } } - #else -#define IPV4_BROADCAST_ADDR 0x000000FF -#define IPV4_MULTICAST_ADDR 0xE0000000 - if (ip_conntrack_max && - (ip_conntrack_max - atomic_read(&ip_conntrack_count)) <= RESERVE_CONNTRACK_NUM){ - if((atomic_read(&ip_conntrack_count) < ip_conntrack_max) && - (((tuple->dst).ip & IPV4_BROADCAST_ADDR == IPV4_BROADCAST_ADDR) || ((tuple->dst).ip & IPV4_BROADCAST_ADDR == IPV4_MULTICAST_ADDR) || !cmp_local_ip((tuple->dst).ip, (tuple->src).ip))){ - //packet for router(LAN side only) or packet from router, let it go thru - } - else{ - /* Try dropping from random chain, or else from the - chain about to put into (in case they're trying to - bomb one hash chain). */ - unsigned int next = (drop_next++)%ip_conntrack_htable_size; - - if (!early_drop(&ip_conntrack_hash[next]) - && !early_drop(&ip_conntrack_hash[hash])) { - if (net_ratelimit()) - printk(KERN_WARNING - "ip_conntrack: table full, dropping" - " packet.\n"); - return ERR_PTR(-ENOMEM); - } - } - } - #endif if (!invert_tuple(&repl_tuple, tuple, protocol)) { DEBUGP("Can't invert tuple.\n"); @@ -829,9 +772,12 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, conntrack, expected); /* Welcome, Mr. Bond. We've been expecting you... */ IP_NF_ASSERT(master_ct(conntrack)); - conntrack->status = IPS_EXPECTED; + __set_bit(IPS_EXPECTED_BIT, &conntrack->status); conntrack->master = expected; expected->sibling = conntrack; +#if CONFIG_IP_NF_CONNTRACK_MARK + conntrack->mark = expected->expectant->mark; +#endif LIST_DELETE(&ip_conntrack_expect_list, expected); INIT_LIST_HEAD(&expected->list); expected->expectant->expecting--; @@ -878,11 +824,11 @@ resolve_normal_ct(struct sk_buff *skb, *set_reply = 1; } else { /* Once we've had two way comms, always ESTABLISHED. */ - if (h->ctrack->status & IPS_SEEN_REPLY) { + if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) { DEBUGP("ip_conntrack_in: normal packet for %p\n", h->ctrack); *ctinfo = IP_CT_ESTABLISHED; - } else if (h->ctrack->status & IPS_EXPECTED) { + } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) { DEBUGP("ip_conntrack_in: related packet for %p\n", h->ctrack); *ctinfo = IP_CT_RELATED; @@ -1056,16 +1002,15 @@ int ip_conntrack_expect_related(struct ip_conntrack *related_to, } if (old) { - /************************* lzh add ****************************************** - * fix sip alg CDROUTE test fail - * 2007/3/16 - ***************************************************************************/ - if (old->help.exp_sip_info.nated && (old->help.exp_sip_info.type == CONN_RTP)) - { - DEBUGP("%s: found old exp and nated, rtp port=%d\n", __FUNCTION__,ntohs(old->tuple.dst.u.udp.port)); - related_to->help.ct_sip_info.rtpport = ntohs(old->tuple.dst.u.udp.port); +#if 0 // removed 1.11 forward bug test + if (1) { // 43011 (09?): checkme + // lzh add, fix sip alg CDROUTE test fail, 2007/3/16 + if (old->help.exp_sip_info.nated && (old->help.exp_sip_info.type == CONN_RTP)) { + DEBUGP("%s: found old exp and nated, rtp port=%d\n", __FUNCTION__,ntohs(old->tuple.dst.u.udp.port)); + related_to->help.ct_sip_info.rtpport = ntohs(old->tuple.dst.u.udp.port); + } } - /************************ lzh end ******************************************/ +#endif WRITE_UNLOCK(&ip_conntrack_lock); return -EEXIST; } @@ -1530,6 +1475,7 @@ int __init ip_conntrack_init(void) unsigned int i; int ret; +#if 0 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ if (hashsize) { @@ -1544,6 +1490,33 @@ int __init ip_conntrack_init(void) ip_conntrack_htable_size = 16; } ip_conntrack_max = 8 * ip_conntrack_htable_size; +#else +/* + + sizeof(list_head) = 8 + x 4096 = 32K + + sizeof(ip_conntrack) = 368 + x 2048 = 736K + +*/ + +#ifdef TEST_JHASH +/* + if (hashsize) ip_conntrack_htable_size = hashsize; + else ip_conntrack_htable_size = 4096; + ip_conntrack_max = 2048; +*/ + if (hashsize) ip_conntrack_htable_size = hashsize; + else ip_conntrack_htable_size = 8092; + ip_conntrack_max = 4096; +#else + if (hashsize) ip_conntrack_htable_size = hashsize; + else ip_conntrack_htable_size = 4099; + ip_conntrack_max = 2048; +#endif + +#endif printk("ip_conntrack version %s (%u buckets, %d max)" " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION, @@ -1605,3 +1578,5 @@ err_unreg_sockopt: return -ENOMEM; } + + diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c index cb0b1da5..c6172945 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_h323.c @@ -104,30 +104,26 @@ static int h245_help(const struct iphdr *iph, size_t len, exp->seq = ntohl(tcph->seq) + i; - *((u_int32_t *)data) = ct->tuplehash[!dir].tuple.dst.ip; //!!! Netmeeting fix - - { - unsigned int chksum; - - chksum = csum_partial((char *)tcph + tcph->doff*4, - datalen, 0); - - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, tcph->doff*4, chksum)); - - } - + // 43011 (09?): checkme + if (1) { + unsigned int chksum; + + *((u_int32_t *)data) = ct->tuplehash[!dir].tuple.dst.ip; //!!! Netmeeting fix + chksum = csum_partial((char *)tcph + tcph->doff*4, datalen, 0); + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, tcph->doff*4, chksum)); + } exp->tuple = ((struct ip_conntrack_tuple) { { ct->tuplehash[!dir].tuple.src.ip, { 0 } }, { data_ip, - { data_port }, + { .tcp = { data_port } }, IPPROTO_UDP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }}); + { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }}); exp->expectfn = NULL; @@ -252,11 +248,11 @@ static int h225_help(const struct iphdr *iph, size_t len, { { ct->tuplehash[!dir].tuple.src.ip, { 0 } }, { data_ip, - { data_port }, + { .tcp = { data_port } }, IPPROTO_TCP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { 0xFFFF }, 0xFFFF }}); + { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }}); exp->expectfn = h225_expect; @@ -317,9 +313,7 @@ static void __exit fini(void) ip_conntrack_helper_unregister(&h225); } -#ifdef CONFIG_IP_NF_NAT_NEEDED EXPORT_SYMBOL(ip_h323_lock); -#endif module_init(init); module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c index 17cf2b7b..a0b41051 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_pptp.c @@ -1,5 +1,5 @@ /* - * ip_conntrack_pptp.c - Version 1.11 + * ip_conntrack_pptp.c - Version 1.9 * * Connection tracking support for PPTP (Point to Point Tunneling Protocol). * PPTP is a a protocol for creating virtual private networks. @@ -9,7 +9,7 @@ * GRE is defined in RFC 1701 and RFC 1702. Documentation of * PPTP can be found in RFC 2637 * - * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>, + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) * @@ -21,6 +21,18 @@ * TODO: - finish support for multiple calls within one session * (needs expect reservations in newnat) * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_timeout_related() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig <philipc@snapgear.com>) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * */ #include <linux/config.h> @@ -35,13 +47,21 @@ #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> +#define IP_CT_PPTP_VERSION "1.9" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); DECLARE_LOCK(ip_pptp_lock); +#if 0 +#include "ip_conntrack_pptp_priv.h" +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#else #define DEBUGP(format, args...) +#endif #define SECS *HZ #define MINS * 60 SECS @@ -53,8 +73,8 @@ DECLARE_LOCK(ip_pptp_lock); static int pptp_expectfn(struct ip_conntrack *ct) { - struct ip_conntrack_expect *exp, *other_exp; struct ip_conntrack *master; + struct ip_conntrack_expect *exp; DEBUGP("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ @@ -67,6 +87,12 @@ static int pptp_expectfn(struct ip_conntrack *ct) return 0; } + exp = ct->master; + if (!exp) { + DEBUGP("no expectation!!\n"); + return 0; + } + DEBUGP("completing tuples with ct info\n"); /* we can do this, since we're unconfirmed */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == @@ -83,6 +109,26 @@ static int pptp_expectfn(struct ip_conntrack *ct) ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = htonl(master->help.ct_pptp_info.pac_call_id); } + + /* delete other expectation */ + if (exp->expected_list.next != &exp->expected_list) { + struct ip_conntrack_expect *other_exp; + struct list_head *cur_item, *next; + + for (cur_item = master->sibling_list.next; + cur_item != &master->sibling_list; cur_item = next) { + next = cur_item->next; + other_exp = list_entry(cur_item, + struct ip_conntrack_expect, + expected_list); + /* remove only if occurred at same sequence number */ + if (other_exp != exp && other_exp->seq == exp->seq) { + DEBUGP("unexpecting other direction\n"); + ip_ct_gre_keymap_destroy(other_exp); + ip_conntrack_unexpect_related(other_exp); + } + } + } return 0; } @@ -90,15 +136,21 @@ static int pptp_expectfn(struct ip_conntrack *ct) /* timeout GRE data connections */ static int pptp_timeout_related(struct ip_conntrack *ct) { - struct list_head *cur_item; + struct list_head *cur_item, *next; struct ip_conntrack_expect *exp; - list_for_each(cur_item, &ct->sibling_list) { + /* FIXME: do we have to lock something ? */ + for (cur_item = ct->sibling_list.next; + cur_item != &ct->sibling_list; cur_item = next) { + next = cur_item->next; exp = list_entry(cur_item, struct ip_conntrack_expect, expected_list); - if (!exp->sibling) + ip_ct_gre_keymap_destroy(exp); + if (!exp->sibling) { + ip_conntrack_unexpect_related(exp); continue; + } DEBUGP("setting timeout of conntrack %p to 0\n", exp->sibling); @@ -110,7 +162,7 @@ static int pptp_timeout_related(struct ip_conntrack *ct) return 0; } -/* expect GRE connection in PNS->PAC direction */ +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int exp_gre(struct ip_conntrack *master, u_int32_t seq, @@ -121,7 +173,7 @@ exp_gre(struct ip_conntrack *master, struct ip_conntrack_tuple inv_tuple; memset(&exp, 0, sizeof(exp)); - /* tuple in original direction, PAC->PNS */ + /* tuple in original direction, PNS->PAC */ exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; exp.tuple.src.u.gre.key = htonl(ntohs(peer_callid)); exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; @@ -149,11 +201,43 @@ exp_gre(struct ip_conntrack *master, DUMP_TUPLE_RAW(&exp.tuple); /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(&exp, &exp.tuple, 0) != 0) + return 1; + + invert_tuplepr(&inv_tuple, &exp.tuple); + if (ip_ct_gre_keymap_add(&exp, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(&exp); + return 1; + } + + if (ip_conntrack_expect_related(master, &exp) != 0) { + ip_ct_gre_keymap_destroy(&exp); + DEBUGP("cannot expect_related()\n"); + return 1; + } + + /* tuple in reply direction, PAC->PNS */ + exp.tuple.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp.tuple.src.u.gre.key = htonl(ntohs(callid)); + exp.tuple.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp.tuple.dst.u.gre.key = htonl(ntohs(peer_callid)); + + DEBUGP("calling expect_related "); + DUMP_TUPLE_RAW(&exp.tuple); + + /* Add GRE keymap entries */ ip_ct_gre_keymap_add(&exp, &exp.tuple, 0); invert_tuplepr(&inv_tuple, &exp.tuple); ip_ct_gre_keymap_add(&exp, &inv_tuple, 1); + /* FIXME: cannot handle error correctly, since we need to free + * the above keymap :( */ - ip_conntrack_expect_related(master, &exp); + if (ip_conntrack_expect_related(master, &exp) != 0) { + /* free the second pair of keypmaps */ + ip_ct_gre_keymap_destroy(&exp); + DEBUGP("cannot expect_related():\n"); + return 1; + } return 0; } @@ -240,7 +324,8 @@ pptp_inbound_pkt(struct tcphdr *tcph, info->cstate = PPTP_CALL_OUT_CONF; seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph); - exp_gre(ct, seq, *cid, *pcid); + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); break; case PPTP_IN_CALL_REQUEST: @@ -282,7 +367,8 @@ pptp_inbound_pkt(struct tcphdr *tcph, /* we expect a GRE connection from PAC to PNS */ seq = ntohl(tcph->seq) + ((void *)pcid - (void *)pptph); - exp_gre(ct, seq, *cid, *pcid); + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); break; @@ -294,7 +380,6 @@ pptp_inbound_pkt(struct tcphdr *tcph, /* untrack this call id, unexpect GRE packets */ pptp_timeout_related(ct); - /* NEWNAT: look up exp for call id and unexpct_related */ break; case PPTP_WAN_ERROR_NOTIFY: @@ -446,7 +531,8 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len, if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr, csum_partial((char *) tcph, tcplen, 0))) { printk(KERN_NOTICE __FILE__ ": bad csum\n"); -// return NF_ACCEPT; + /* W2K PPTP server sends TCP packets with wrong checksum :(( */ + //return NF_ACCEPT; } if (tcph->fin || tcph->rst) { @@ -456,8 +542,6 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len, /* untrack this call id, unexpect GRE packets */ pptp_timeout_related(ct); - /* no need to call unexpect_related since master conn - * dies anyway */ } @@ -482,6 +566,8 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len, LOCK_BH(&ip_pptp_lock); + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ ret = pptp_outbound_pkt(tcph, pptph, datalen, ct, ctinfo); @@ -497,13 +583,31 @@ conntrack_pptp_help(const struct iphdr *iph, size_t len, /* control protocol helper */ static struct ip_conntrack_helper pptp = { - { NULL, NULL }, - "pptp", IP_CT_HELPER_F_REUSE_EXPECT, THIS_MODULE, 2, 0, - { { 0, { tcp: { port: __constant_htons(PPTP_CONTROL_PORT) } } }, - { 0, { 0 }, IPPROTO_TCP } }, - { { 0, { tcp: { port: 0xffff } } }, - { 0, { 0 }, 0xffff } }, - conntrack_pptp_help }; + .list = { NULL, NULL }, + .name = "pptp", + .flags = IP_CT_HELPER_F_REUSE_EXPECT, + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = 0xffff } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xffff + } + }, + .help = conntrack_pptp_help +}; /* ip_conntrack_pptp initialization */ static int __init init(void) @@ -517,12 +621,14 @@ static int __init init(void) return -EIO; } + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); return 0; } static void __exit fini(void) { ip_conntrack_helper_unregister(&pptp); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); } module_init(init); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c index bcb8d892..bcb8d892 100755..100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_esp.c diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 4ff06dcb..55b3ecea 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,5 +1,5 @@ /* - * ip_conntrack_proto_gre.c - Version 1.11 + * ip_conntrack_proto_gre.c - Version 1.2 * * Connection tracking protocol helper module for GRE. * @@ -17,7 +17,7 @@ * * Documentation about PPTP can be found in RFC 2637 * - * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) * @@ -54,8 +54,18 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define GRE_TIMEOUT (30*HZ) #define GRE_STREAM_TIMEOUT (180*HZ) +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x:%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohl((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.gre.key), \ + (x)->dst.u.gre.version, \ + ntohs((x)->dst.u.gre.protocol)) +#else #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) +#endif /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -103,7 +113,6 @@ int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp, memset(km, 0, sizeof(*km)); memcpy(&km->tuple, t, sizeof(*t)); - km->master = exp; if (!reply) exp->proto.gre.keymap_orig = km; @@ -132,6 +141,26 @@ void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km, WRITE_UNLOCK(&ip_ct_gre_lock); } +/* destroy the keymap entries associated with specified expect */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp) +{ + DEBUGP("entering for exp %p\n", exp); + WRITE_LOCK(&ip_ct_gre_lock); + if (exp->proto.gre.keymap_orig) { + DEBUGP("removing %p from list\n", exp->proto.gre.keymap_orig); + list_del(&exp->proto.gre.keymap_orig->list); + kfree(exp->proto.gre.keymap_orig); + exp->proto.gre.keymap_orig = NULL; + } + if (exp->proto.gre.keymap_reply) { + DEBUGP("removing %p from list\n", exp->proto.gre.keymap_reply); + list_del(&exp->proto.gre.keymap_reply->list); + kfree(exp->proto.gre.keymap_reply); + exp->proto.gre.keymap_reply = NULL; + } + WRITE_UNLOCK(&ip_ct_gre_lock); +} + /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ @@ -186,6 +215,10 @@ static int gre_pkt_to_tuple(const void *datah, size_t datalen, srckey = gre_keymap_lookup(tuple); +#if 0 + DEBUGP("found src key %x for tuple ", ntohl(srckey)); + DUMP_TUPLE_GRE(tuple); +#endif tuple->src.u.gre.key = srckey; return 1; @@ -256,18 +289,7 @@ static void gre_destroy(struct ip_conntrack *ct) return; } - WRITE_LOCK(&ip_ct_gre_lock); - if (master->proto.gre.keymap_orig) { - DEBUGP("removing %p from list\n", master->proto.gre.keymap_orig); - list_del(&master->proto.gre.keymap_orig->list); - kfree(master->proto.gre.keymap_orig); - } - if (master->proto.gre.keymap_reply) { - DEBUGP("removing %p from list\n", master->proto.gre.keymap_reply); - list_del(&master->proto.gre.keymap_reply->list); - kfree(master->proto.gre.keymap_reply); - } - WRITE_UNLOCK(&ip_ct_gre_lock); + ip_ct_gre_keymap_destroy(master); } /* protocol helper struct */ @@ -304,7 +326,7 @@ static void __exit fini(void) /* delete all keymap entries */ WRITE_LOCK(&ip_ct_gre_lock); list_for_each_safe(pos, n, &gre_keymap_list) { - DEBUGP("deleting keymap %p\n", pos); + DEBUGP("deleting keymap %p at module unload time\n", pos); list_del(pos); kfree(pos); } @@ -315,6 +337,7 @@ static void __exit fini(void) EXPORT_SYMBOL(ip_ct_gre_keymap_add); EXPORT_SYMBOL(ip_ct_gre_keymap_change); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); module_init(init); module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 02f20742..ebb9b493 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -173,7 +173,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, have an established connection: this is a fairly common problem case, so we can delete the conntrack immediately. --RR */ - if (!(conntrack->status & IPS_SEEN_REPLY) && tcph->rst) { + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph->rst) { WRITE_UNLOCK(&tcp_lock); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long)conntrack); @@ -211,12 +211,6 @@ static int tcp_new(struct ip_conntrack *conntrack, return 0; } - if (tcph->syn && tcph->ack) - { - DEBUGP("ip_conntrack_tcp: invalid new deleting.\n"); - return 0; - } - conntrack->proto.tcp.state = newconntrack; return 1; } diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 49ac62c7..af660a27 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -7,25 +7,13 @@ #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_udp.h> -unsigned long ip_ct_udp_isakmp_timeout = (300*HZ); - static int udp_pkt_to_tuple(const void *datah, size_t datalen, struct ip_conntrack_tuple *tuple) { const struct udphdr *hdr = datah; - struct isakmp_hdr *isakmp_h = (void *)hdr + 8; tuple->src.u.udp.port = hdr->source; tuple->dst.u.udp.port = hdr->dest; - if(ntohs(hdr->source) == 500 && ntohs(hdr->dest) == 500) - { - if(NULL == isakmp_h) - tuple->dst.u.udp.init_cookie = 0; - else - tuple->dst.u.udp.init_cookie = (unsigned int)(isakmp_h->init_cookie[0]); - } - else - tuple->dst.u.udp.init_cookie = 0; return 1; } @@ -35,7 +23,6 @@ static int udp_invert_tuple(struct ip_conntrack_tuple *tuple, { tuple->src.u.udp.port = orig->dst.u.udp.port; tuple->dst.u.udp.port = orig->src.u.udp.port; - tuple->dst.u.udp.init_cookie = orig->dst.u.udp.init_cookie; return 1; } @@ -60,21 +47,16 @@ static int udp_packet(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len, enum ip_conntrack_info conntrackinfo) { - u_int16_t *portptr; - portptr = &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ - if (conntrack->status & IPS_SEEN_REPLY) - { - if(ntohs(*portptr) == 500) - ip_ct_refresh(conntrack, ip_ct_udp_isakmp_timeout); - else - ip_ct_refresh(conntrack, sysctl_ip_conntrack_udp_timeouts[UDP_STREAM_TIMEOUT]); + if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + ip_ct_refresh(conntrack, + sysctl_ip_conntrack_udp_timeouts[UDP_STREAM_TIMEOUT]); /* Also, more likely to be important, and not a probe */ set_bit(IPS_ASSURED_BIT, &conntrack->status); - } - else - ip_ct_refresh(conntrack, sysctl_ip_conntrack_udp_timeouts[UDP_TIMEOUT]); + } else + ip_ct_refresh(conntrack, + sysctl_ip_conntrack_udp_timeouts[UDP_TIMEOUT]); return NF_ACCEPT; } diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c index c7e31931..78c3062c 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -32,503 +32,6 @@ struct module *ip_conntrack_module = THIS_MODULE; MODULE_LICENSE("GPL"); -#define CLEAR_IP_CONNTRACK -#define DEL_IP_CONNTRACK_ENTRY 1 -#ifdef DEL_IP_CONNTRACK_ENTRY -/* - * - *This part of code add for delete an entry in ip_conntrack table. - * - */ - - -#define DEL_LIST_PATH "/tmp/.del_ip_conntrack" -#define printkerrline() printk("del_ip_conntrack error : %s %s %d\n", __FILE__, __func__, __LINE__) - -struct del_list -{ - unsigned short proto; - unsigned int begin_port; - unsigned int end_port; - unsigned int ip; - struct del_list *next; -}; - -void free_del_list(struct del_list *head); -void print_del_list(struct del_list *head); -static struct del_list * malloc_new_node(const char *buf, struct del_list * head); -struct del_list * init_del_list(const char *buf, size_t size); -static int read_del_file(char * buf, unsigned int size, char *path); -static int del_match_method(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * pList); -static int del_conntrack_check(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * head); -void pf_del_ip_conntrack(void); -static int proc_read_del_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context); -static int proc_write_del_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data); -static int end_proc_read(const char *p, char *page, off_t off, int count, char **start, int *eof); - -void pf_del_ip_conntrack(void) -{ -#define MAX_BUF_SIZE 1024*2 - int i; - char buf[MAX_BUF_SIZE]; - struct del_list * del_head = NULL; - struct list_head *head, *temp_head; - struct ip_conntrack_tuple_hash *tuple_hash; - - //printk("pf_del_ip_conntrack---------------------------------------1\n"); - memset(buf, 0, MAX_BUF_SIZE); - - if(read_del_file(buf, MAX_BUF_SIZE, DEL_LIST_PATH) == -1) - { - goto final_return; - } - - buf[MAX_BUF_SIZE - 1] = '\0'; - del_head = init_del_list(buf, MAX_BUF_SIZE - 1); - //print_del_list(del_head); - READ_LOCK(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) - { - head = &ip_conntrack_hash[i]; - temp_head = head; - while(1) - { - temp_head = temp_head->next; - if(temp_head == head) - { - head = NULL; - temp_head = NULL; - break; - } - tuple_hash = (struct ip_conntrack_tuple_hash *)temp_head; - if(del_conntrack_check(tuple_hash, del_head) == 1) - { - del_selected_conntrack(tuple_hash); - } - } - } - READ_UNLOCK(&ip_conntrack_lock); - free_del_list(del_head); - -final_return: - - //printk("pf_del_ip_conntrack---------------------------------------2\n"); - return; -#undef MAX_BUF_SIZE -} - -static int del_conntrack_check(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * head) -{ - int ret; - const struct del_list * p; - - ret = 0; - - if(pConn == NULL || head == NULL) - { - ret = -1; - goto final_return; - } - - for(p = head; p; p = p->next) - { - if(del_match_method(pConn, p) == 1) - { - //Match,jump out - ret = 1; - break; - } - } - -final_return: - return ret; -} - -static int del_match_method(const struct ip_conntrack_tuple_hash *pConn, const struct del_list * pList) -{ - int ret; - typedef enum - { - TCP_PROTO = 0x06, - UDP_PROTO = 0x11, - }proto_type; - proto_type pt[2] = {TCP_PROTO, UDP_PROTO}; - - ret = 0; - //Check tcp and udp only - if(pConn->tuple.dst.protonum == TCP_PROTO || pConn->tuple.dst.protonum == UDP_PROTO) - { - //Check proto match - if((pList->proto == 3) || - ((pList->proto == 0 || pList->proto == 1) && (pConn->tuple.dst.protonum == pt[pList->proto]))) - { - //Chcek ip address match - if(pConn->ctrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == pList->ip) - { - //Check port match - unsigned int tport; - if(pConn->tuple.dst.protonum == TCP_PROTO) - { - //TCP - tport = pConn->ctrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; - } - else - { - //UDP - tport = pConn->ctrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.udp.port; - } - tport = htons(tport); - if(tport >= pList->begin_port && tport <= pList->end_port) - { - ret = 1; - } - } - } - } - return ret; -} - -static int read_del_file(char * buf, unsigned int size, char *path) -{ - int retval, orgfsuid, orgfsgid; - mm_segment_t orgfs; - struct file *srcf; - - // Save uid and gid used for filesystem access. - // Set user and group to 0 (root) - orgfsuid = current->fsuid; - orgfsgid = current->fsgid; - current->fsuid=current->fsgid = 0; - orgfs = get_fs(); - set_fs(KERNEL_DS); - - if(path && *path) - { - srcf = filp_open(path, O_RDONLY, 0); - if(IS_ERR(srcf)) - { - printkerrline(); - retval = -1; - goto final_return; - } - else - { - if(srcf->f_op && srcf->f_op->read) - { - memset(buf, 0x00, size); - retval=srcf->f_op->read(srcf, buf, size, &srcf->f_pos); - if(retval < 0) - { - printkerrline(); - retval = -1; - goto final_return; - } - else - { - //Success,go! - retval = 0; - goto final_return; - } - } - else - { - printkerrline(); - retval = -1; - goto final_return; - } - } - } - else - { - printkerrline(); - retval = -1; - goto final_return; - } - -final_return: - if(!IS_ERR(srcf)) - { - retval=filp_close(srcf,NULL); - if(retval) - { - printkerrline(); - retval = -1; - } - } - set_fs(orgfs); - current->fsuid = orgfsuid; - current->fsgid = orgfsgid; - - return retval; -} - -struct del_list * init_del_list(const char *buf, size_t size) -{ -#define LINE_FEED "\n" -#define TMP_BUF_SIZE 100 - const char *begin, *end; - char tmpbuf[TMP_BUF_SIZE]; - struct del_list * head = NULL, *tmp_p; - - if(buf == NULL || size <= 0 || buf[size] != '\0') - { - head = NULL; - goto final_return; - } - - for(begin = end = buf; begin && (begin - buf < size); begin = end + strlen(LINE_FEED)) - { - end = strstr(begin, LINE_FEED); - if(end) - { - if((end - begin) > (TMP_BUF_SIZE - 1)) - { - //Too large,go on - continue; - } - else - { - memcpy(tmpbuf, begin, end - begin); - tmpbuf[end - begin] = '\0'; - //printk("obtain string : %s\n", tmpbuf); - if((tmp_p = malloc_new_node(tmpbuf, head)) == NULL) - { - //Invalid format or malloc fail,go on - continue; - } - else - { - head = tmp_p; - } - } - } - else - { - //printk("Last string : %s\n", begin); - if((tmp_p = malloc_new_node(begin, head)) == NULL) - { - //Invalid format or malloc fail,jump out - break; - } - else - { - head = tmp_p; - } - } - } - -final_return: - return head; - -#undef TMP_BUF_SIZE -#undef LINE_FEED -} - -static struct del_list * malloc_new_node(const char *buf, struct del_list * head) -{ -#define SSCANF_MATCH_NUM 7 - int i, j, k, c1, c2, c3, c4; - struct del_list *p = NULL; - - if(sscanf(buf, "%d %d.%d.%d.%d %d-%d", &i, &c4, &c3, &c2, &c1, &j, &k) != SSCANF_MATCH_NUM) - { - p = NULL; - goto final_return; - } - else - { - if(p = (struct del_list *)kmalloc(sizeof(struct del_list), GFP_ATOMIC)) - { - p->proto = i; - #if 0 - //Big endian - ((char *)&(p->ip))[0] = (char)c1; - ((char *)&(p->ip))[1] = (char)c2; - ((char *)&(p->ip))[2] = (char)c3; - ((char *)&(p->ip))[3] = (char)c4; - #else - //Little endian - ((char *)&(p->ip))[3] = (char)c1; - ((char *)&(p->ip))[2] = (char)c2; - ((char *)&(p->ip))[1] = (char)c3; - ((char *)&(p->ip))[0] = (char)c4; - #endif - p->begin_port = j; - p->end_port = k; - p->next = head; - } - else - { - p = NULL; - goto final_return; - } - } - -final_return: - return p; -#undef SSCANF_MATCH_NUM -} - -void print_del_list(struct del_list *head) -{ - int i; - struct del_list *tmp_p; - - for(i = 1, tmp_p = head; tmp_p; tmp_p = tmp_p->next, i++) - { - printk("Node(%d): proto=%d | ip=%0x | port=[%d-%d]\n", i, tmp_p->proto, tmp_p->ip, tmp_p->begin_port, tmp_p->end_port); - } -} - -void free_del_list(struct del_list *head) -{ - int i; - struct del_list *tmp_p; - - if(head == NULL) - { - goto final_return; - } - for(i = 1, tmp_p = head; head; head = tmp_p, i++) - { - tmp_p = head->next; - //printk("Free@Node(%d):proto=%d | ip=%0x | port=[%d-%d]\n", i, head->proto, head->ip, head->begin_port, head->end_port); - kfree(head); - } - -final_return: - return; -} - -static int proc_read_del_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context) -{ - char *p; - - p = page; - p += sprintf(page, "%s\n", "use echo \"1(0)\" to enable or disbable"); - return end_proc_read(p, page, off, count, start, eof); -} - -static int proc_write_del_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data) -{ - unsigned char tmp[2]; - - if(buffer) - { - memset(tmp, 0, sizeof(tmp)); - copy_from_user(tmp, buffer, count); - tmp[1] = 0x00; - switch(*tmp) - { - case '0': - //Do something here - break; - - case '1': - pf_del_ip_conntrack(); - break; - - default: - printk("<1>invalid args\n"); - } - return count; - } - return 0; -} - -static int end_proc_read(const char *p, char *page, off_t off, int count, char **start, int *eof) -{ - int len = p - page; - - if(len < off + count) - { - *eof = 1; - } - - *start = page + off; - len -= off; - if(len > count) - { - len = count; - } - - if(len < 0) - { - len = 0; - } - - return len; -} - -#endif - -#ifdef CLEAR_IP_CONNTRACK -void clear_ip_conntrack(void) -{ - int i; - struct list_head *head, *temp_head; - struct ip_conntrack_tuple_hash *tuple_hash; - - printk("warning : %s %d\n", __func__, __LINE__); - - READ_LOCK(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) - { - head = &ip_conntrack_hash[i]; - temp_head = head; - while(1) - { - temp_head = temp_head->next; - if(temp_head == head) - { - head = NULL; - temp_head = NULL; - break; - } - tuple_hash = (struct ip_conntrack_tuple_hash *)temp_head; - del_selected_conntrack(tuple_hash); - } - } - READ_UNLOCK(&ip_conntrack_lock); -} - -static int proc_read_clear_ip_conntrack(char *page, char **start, off_t off, int count, int *eof, void *context) -{ - char *p; - - p = page; - p += sprintf(page, "%s\n", "use echo \"1(0)\" to enable or disbable"); - return end_proc_read(p, page, off, count, start, eof); -} - -static int proc_write_clear_ip_conntrack(struct file *file, const char *buffer, unsigned long count, void *data) -{ - unsigned char tmp[2]; - - if(buffer) - { - memset(tmp, 0, sizeof(tmp)); - copy_from_user(tmp, buffer, count); - tmp[1] = 0x00; - switch(*tmp) - { - case '0': - //Do something here - break; - - case '1': - clear_ip_conntrack(); - break; - - default: - printk("<1>invalid args\n"); - } - return count; - } - return 0; -} -#endif - static int kill_proto(const struct ip_conntrack *i, void *data) { return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == @@ -575,7 +78,7 @@ print_expect(char *buffer, const struct ip_conntrack_expect *expect) } static unsigned int -print_conntrack(char *buffer, const struct ip_conntrack *conntrack) +print_conntrack(char *buffer, struct ip_conntrack *conntrack) { unsigned int len; struct ip_conntrack_protocol *proto @@ -593,15 +96,38 @@ print_conntrack(char *buffer, const struct ip_conntrack *conntrack) len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, proto); - if (!(conntrack->status & IPS_SEEN_REPLY)) + if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) len += sprintf(buffer + len, "[UNREPLIED] "); len += print_tuple(buffer + len, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, proto); - if (conntrack->status & IPS_ASSURED) + if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) len += sprintf(buffer + len, "[ASSURED] "); len += sprintf(buffer + len, "use=%u ", atomic_read(&conntrack->ct_general.use)); +#if defined(CONFIG_IP_NF_CONNTRACK_MARK) + len += sprintf(buffer + len, "mark=%ld ", conntrack->mark); +#endif +#if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) + if(conntrack->layer7.app_proto) + len += sprintf(buffer + len, "l7proto=%s ", + conntrack->layer7.app_proto); +#endif +#if defined(CONFIG_IP_NF_TARGET_MACSAVE) || defined(CONFIG_IP_NF_TARGET_MACSAVE_MODULE) + if ((*((u32 *)conntrack->macsave) != 0) || (*((u16*)(conntrack->macsave + 4)) != 0)) { + len += sprintf(buffer + len, "macsave=%02X:%02X:%02X:%02X:%02X:%02X ", + conntrack->macsave[0], conntrack->macsave[1], conntrack->macsave[2], + conntrack->macsave[3], conntrack->macsave[4], conntrack->macsave[5]); + } +#endif +#if defined(CONFIG_IP_NF_TARGET_BCOUNT) || defined(CONFIG_IP_NF_TARGET_BCOUNT_MODULE) +#if 0 + if (conntrack->bcount != 0) { +// len += sprintf(buffer + len, "bcount=0x%lx ", conntrack->bcount); + len += sprintf(buffer + len, "bcount=%ldK ", conntrack->bcount / 1024); + } +#endif +#endif len += sprintf(buffer + len, "\n"); return len; @@ -748,30 +274,6 @@ static int init_or_cleanup(int init) if (ret < 0) goto cleanup_nothing; -#ifdef DEL_IP_CONNTRACK_ENTRY - proc = proc_net_create("del_ip_conntrack", S_IFREG | S_IRUGO | S_IWUSR, proc_read_del_ip_conntrack); - if(proc) - { - proc->write_proc = proc_write_del_ip_conntrack; - proc->owner = THIS_MODULE; - } - else - { - //Maybe we can just let it go! - } -#endif -#ifdef CLEAR_IP_CONNTRACK - proc = proc_net_create("clear_ip_conntrack", S_IFREG | S_IRUGO | S_IWUSR, proc_read_clear_ip_conntrack); - if(proc) - { - proc->write_proc = proc_write_clear_ip_conntrack; - proc->owner = THIS_MODULE; - } - else - { - //Maybe we can just let it go! - } -#endif proc = proc_net_create("ip_conntrack",0,list_conntracks); if (!proc) goto cleanup_init; proc->owner = THIS_MODULE; diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c index f64ddabf..de6b4925 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_core.c @@ -763,12 +763,11 @@ do_bindings(struct ip_conntrack *ct, /* if this expectation is already established, skip */ if (exp->sibling) { - //lzh add 2007/3/16 for fix sip alg CDROUTE test - exp = NULL; - //lzh end +#if 0 // removed 1.11 forward bug test + exp = NULL; // lzh add 2007/3/16 for fix sip alg CDROUTE test +#endif continue; } - if (exp_for_packet(exp, pskb)) { DEBUGP("calling nat helper (exp=%p) for packet\n", diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c index 2c080cf3..bcf886e8 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_h323.c @@ -129,7 +129,7 @@ h225_nat_expected(struct sk_buff **pskb, mr.range[0].flags |= IP_NAT_RANGE_PROTO_SPECIFIED; mr.range[0].min = mr.range[0].max = ((union ip_conntrack_manip_proto) - { port }); + { .tcp = { port } }); } ret = ip_nat_setup_info(ct, &mr, hooknum); @@ -184,12 +184,14 @@ static int h323_signal_address_fixup(struct ip_conntrack *ct, if (!between(info->seq[i] + 6, ntohl(tcph->seq), ntohl(tcph->seq) + datalen)) { /* Partial retransmisison. It's a cracker being funky. */ +#if 0 // ... or a miss id? zzz if (net_ratelimit()) { printk("H.323_NAT: partial packet %u/6 in %u/%u\n", info->seq[i], ntohl(tcph->seq), ntohl(tcph->seq) + datalen); } +#endif return 0; } @@ -252,18 +254,18 @@ static int h323_data_fixup(struct ip_ct_h225_expect *info, DEBUGP("h323_data_fixup: offset %u + 6 in %u\n", info->offset, tcplen); DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - memset(&newtuple, 0, sizeof(newtuple)); if (!between(expect->seq + 6, ntohl(tcph->seq), ntohl(tcph->seq) + tcplen - tcph->doff * 4)) { /* Partial retransmisison. It's a cracker being funky. */ +#if 1 // also caused by bad id? if (net_ratelimit()) { printk("H.323_NAT: partial packet %u/6 in %u/%u\n", expect->seq, ntohl(tcph->seq), ntohl(tcph->seq) + tcplen - tcph->doff * 4); } +#endif return 0; } @@ -392,9 +394,9 @@ static struct ip_nat_helper h225 = "H.225", /* name */ IP_NAT_HELPER_F_ALWAYS, /* flags */ THIS_MODULE, /* module */ - { { 0, { __constant_htons(H225_PORT) } }, /* tuple */ + { { 0, { .tcp = { __constant_htons(H225_PORT) } } }, /* tuple */ { 0, { 0 }, IPPROTO_TCP } }, - { { 0, { 0xFFFF } }, /* mask */ + { { 0, { .tcp = { 0xFFFF } } }, /* mask */ { 0, { 0 }, 0xFFFF } }, h225_nat_help, /* helper */ h225_nat_expected /* expectfn */ diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c index ffde5133..e7987430 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_helper.c @@ -79,7 +79,6 @@ ip_nat_resize_packet(struct sk_buff **skb, iph = (*skb)->nh.iph; if (iph->protocol == IPPROTO_TCP) { struct tcphdr *tcph = (void *)iph + iph->ihl*4; - void *data = (void *)tcph + tcph->doff*4; DEBUGP("ip_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); @@ -354,54 +353,49 @@ sack_adjust(struct tcphdr *tcph, } -/* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */ -static inline int +/* TCP SACK sequence number adjustment. */ +static inline void ip_nat_sack_adjust(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct iphdr *iph; struct tcphdr *tcph; - unsigned char *ptr; - int length, dir, sack_adjusted = 0; + unsigned char *ptr, *optend; + unsigned int dir; - iph = skb->nh.iph; - tcph = (void *)iph + iph->ihl*4; - length = (tcph->doff*4)-sizeof(struct tcphdr); + tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; + optend = (unsigned char *)tcph + tcph->doff*4; ptr = (unsigned char *)(tcph+1); dir = CTINFO2DIR(ctinfo); - while (length > 0) { - int opcode = *ptr++; + while (ptr < optend) { + int opcode = ptr[0]; int opsize; switch (opcode) { case TCPOPT_EOL: - return !sack_adjusted; + return; case TCPOPT_NOP: - length--; + ptr++; continue; default: - opsize = *ptr++; - if (opsize > length) /* no partial opts */ - return !sack_adjusted; + opsize = ptr[1]; + /* no partial opts */ + if (ptr + opsize > optend || opsize < 2) + return; if (opcode == TCPOPT_SACK) { /* found SACK */ if((opsize >= (TCPOLEN_SACK_BASE +TCPOLEN_SACK_PERBLOCK)) && !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK)) - sack_adjust(tcph, ptr-2, + sack_adjust(tcph, ptr, &ct->nat.info.seq[!dir]); - - sack_adjusted = 1; } - ptr += opsize-2; - length -= opsize; + ptr += opsize; } } - return !sack_adjusted; } /* TCP sequence number adjustment */ diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c index 71077933..358a4579 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_pptp.c @@ -1,5 +1,5 @@ /* - * ip_nat_pptp.c - Version 1.11 + * ip_nat_pptp.c - Version 1.5 * * NAT support for PPTP (Point to Point Tunneling Protocol). * PPTP is a a protocol for creating virtual private networks. @@ -9,7 +9,7 @@ * GRE is defined in RFC 1701 and RFC 1702. Documentation of * PPTP can be found in RFC 2637 * - * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) * @@ -17,7 +17,18 @@ * (needs netfilter newnat code) * - NAT to a unique tuple, not to TCP source port * (needs netfilter tuple reservation) - * - Support other NAT scenarios than SNAT of PNS + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig <philipc@snapgear.com>) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig <philipc@snapgear.com>) * */ @@ -34,6 +45,8 @@ #include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> #include <linux/netfilter_ipv4/ip_conntrack_pptp.h> +#define IP_NAT_PPTP_VERSION "1.5" + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); @@ -57,7 +70,7 @@ pptp_nat_expected(struct sk_buff **pskb, struct ip_nat_multi_range mr; struct ip_ct_pptp_master *ct_pptp_info; struct ip_nat_pptp *nat_pptp_info; - u_int32_t newsrcip, newdstip, newcid; + u_int32_t newip, newcid; int ret; IP_NF_ASSERT(info); @@ -72,7 +85,7 @@ pptp_nat_expected(struct sk_buff **pskb, /* need to alter GRE tuple because conntrack expectfn() used 'wrong' * (unmanipulated) values */ - if (hooknum == NF_IP_PRE_ROUTING) { + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { DEBUGP("completing tuples with NAT info \n"); /* we can do this, since we're unconfirmed */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == @@ -80,68 +93,43 @@ pptp_nat_expected(struct sk_buff **pskb, /* assume PNS->PAC */ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = htonl(nat_pptp_info->pns_call_id); -// ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.gre.key = -// htonl(nat_pptp_info->pac_call_id); ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = htonl(nat_pptp_info->pns_call_id); + newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + newcid = htonl(nat_pptp_info->pac_call_id); } else { /* assume PAC->PNS */ - DEBUGP("WRONG DIRECTION\n"); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key = htonl(nat_pptp_info->pac_call_id); - } - } - - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) - { - if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == htonl(ct_pptp_info->pac_call_id)) - { - /* assume PNS->PAC */ - newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - newcid = htonl(nat_pptp_info->pac_call_id); - } - else - { - /* assume PAC->PNS */ - newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; newcid = htonl(nat_pptp_info->pns_call_id); } - mr.rangesize = 1; - mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED; - mr.range[0].min_ip = mr.range[0].max_ip = newdstip; - mr.range[0].min = mr.range[0].max = - ((union ip_conntrack_manip_proto ) { newcid }); - DEBUGP("change dest ip to %u.%u.%u.%u\n", - NIPQUAD(newdstip)); - DEBUGP("change dest key to 0x%x\n", ntohl(newcid)); - ret = ip_nat_setup_info(ct, &mr, hooknum); } else { - if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == htonl(ct_pptp_info->pac_call_id)) - { - newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == + htonl(ct_pptp_info->pac_call_id)) { + /* assume PNS->PAC */ + newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; newcid = htonl(ct_pptp_info->pns_call_id); } - else - { + else { /* assume PAC->PNS */ - newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; newcid = htonl(ct_pptp_info->pac_call_id); } - - mr.rangesize = 1; - mr.range[0].flags = IP_NAT_RANGE_MAP_IPS - |IP_NAT_RANGE_PROTO_SPECIFIED; - mr.range[0].min_ip = mr.range[0].max_ip = newsrcip; - mr.range[0].min = mr.range[0].max = - ((union ip_conntrack_manip_proto ) { newcid }); - DEBUGP("change src ip to %u.%u.%u.%u\n", - NIPQUAD(newsrcip)); - DEBUGP("change 'src' key to 0x%x\n", ntohl(newcid)); - ret = ip_nat_setup_info(ct, &mr, hooknum); } + mr.rangesize = 1; + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + mr.range[0].min = mr.range[0].max = + ((union ip_conntrack_manip_proto ) { newcid }); + DEBUGP("change ip to %u.%u.%u.%u\n", + NIPQUAD(newip)); + DEBUGP("change key to 0x%x\n", ntohl(newcid)); + ret = ip_nat_setup_info(ct, &mr, hooknum); + UNLOCK_BH(&ip_pptp_lock); return ret; @@ -150,13 +138,17 @@ pptp_nat_expected(struct sk_buff **pskb, /* outbound packets == from PNS to PAC */ static inline unsigned int -pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, - size_t datalen, +pptp_outbound_pkt(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, struct ip_conntrack_expect *exp) { + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *) iph + iph->ihl*4; + struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) + ((void *)tcph + tcph->doff*4); + struct PptpControlHeader *ctlh; union pptp_ctrl_union pptpReq; struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; @@ -164,6 +156,7 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, u_int16_t msg, *cid = NULL, new_callid; + /* FIXME: size checks !!! */ ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph)); pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh)); @@ -172,11 +165,18 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REQUEST: cid = &pptpReq.ocreq->callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ /* save original call ID in nat_info */ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - new_callid = tcph->source; + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + /* save new call ID in ct info */ ct_pptp_info->pns_call_id = ntohs(new_callid); break; @@ -186,10 +186,6 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, case PPTP_CALL_CLEAR_REQUEST: cid = &pptpReq.clrreq->callID; break; - case PPTP_CALL_DISCONNECT_NOTIFY: - cid = &pptpReq.disc->callID; - break; - default: DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]); @@ -204,11 +200,6 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* no need to alter packet */ - DEBUGP("outbound control message %s\n", strMName[msg]); - DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id); - DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id); - DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id); - DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id); return NF_ACCEPT; } @@ -216,27 +207,27 @@ pptp_outbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, DEBUGP("altering call id from 0x%04x to 0x%04x\n", ntohs(*cid), ntohs(new_callid)); + /* mangle packet */ - tcph->check = ip_nat_cheat_check(*cid^0xFFFF, - new_callid, tcph->check); - *cid = new_callid; - - DEBUGP("outbound control message %s\n", strMName[msg]); - DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id); - DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id); - DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id); - DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id); + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)cid - (void *)pptph, + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid)); + return NF_ACCEPT; } /* inbound packets == from PAC to PNS */ static inline unsigned int -pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, - size_t datalen, +pptp_inbound_pkt(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, struct ip_conntrack_expect *oldexp) { + struct iphdr *iph = (*pskb)->nh.iph; + struct tcphdr *tcph = (void *) iph + iph->ihl*4; + struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) + ((void *)tcph + tcph->doff*4); + struct PptpControlHeader *ctlh; union pptp_ctrl_union pptpReq; struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; @@ -245,8 +236,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; u_int32_t old_dst_ip; - struct ip_conntrack_tuple t; + struct ip_conntrack_tuple t, inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + /* FIXME: size checks !!! */ ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph)); pptpReq.rawreq = (void *) ((void *) ctlh + sizeof(*ctlh)); @@ -262,23 +255,30 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, } old_dst_ip = oldexp->tuple.dst.ip; t = oldexp->tuple; + invert_tuplepr(&inv_t, &t); /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - /* store new callID in ct_info, so conntrack works */ - //ct_pptp_info->pac_call_id = ntohs(tcph->source); - //new_cid = htons(ct_pptp_info->pac_call_id); - /* alter expectation */ - if (t.dst.ip == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip) { + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + if (t.src.ip == orig_t->src.ip && t.dst.ip == orig_t->dst.ip) { /* expectation for PNS->PAC direction */ - t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id); t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id); + t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id); } else { /* expectation for PAC->PNS direction */ - t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - DEBUGP("EXPECTATION IN WRONG DIRECTION!!!\n"); + t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); + t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id); } if (!ip_conntrack_change_expect(oldexp, &t)) { @@ -287,13 +287,7 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, DEBUGP("can't change expect\n"); } ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_orig, &t); - /* reply keymap */ - t.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id); - t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id); - ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &t); - + ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &inv_t); break; case PPTP_IN_CALL_CONNECT: pcid = &pptpReq.iccon->peersCallID; @@ -323,9 +317,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, case PPTP_WAN_ERROR_NOTIFY: pcid = &pptpReq.wanerr->peersCallID; break; - case PPTP_SET_LINK_INFO: - pcid = &pptpReq.setlink->peersCallID; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq.disc->callID; break; + default: DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]); @@ -334,14 +329,10 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, case PPTP_START_SESSION_REQUEST: case PPTP_START_SESSION_REPLY: case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* no need to alter packet */ - DEBUGP("inbound control message %s\n", strMName[msg]); - DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id); - DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id); - DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id); - DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id); return NF_ACCEPT; } @@ -349,25 +340,21 @@ pptp_inbound_pkt(struct tcphdr *tcph, struct pptp_pkt_hdr *pptph, IP_NF_ASSERT(pcid); DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", ntohs(*pcid), ntohs(new_pcid)); - tcph->check = ip_nat_cheat_check(*pcid^0xFFFF, - new_pcid, tcph->check); - *pcid = new_pcid; + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)pcid - (void *)pptph, + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); if (new_cid) { IP_NF_ASSERT(cid); DEBUGP("altering call id from 0x%04x to 0x%04x\n", ntohs(*cid), ntohs(new_cid)); - tcph->check = ip_nat_cheat_check(*cid^0xFFFF, - new_cid, tcph->check); - *cid = new_cid; + ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - (void *)pptph, + sizeof(new_cid), (char *)&new_cid, + sizeof(new_cid)); } /* great, at least we don't need to resize packets */ - DEBUGP("inbound control message %s\n", strMName[msg]); - DEBUGP("ct->pac_call_id = %d\n", ct_pptp_info->pac_call_id); - DEBUGP("ct->pns_call_id = %d\n", ct_pptp_info->pns_call_id); - DEBUGP("nat->pac_call_id = %d\n", nat_pptp_info->pac_call_id); - DEBUGP("nat->pns_call_id = %d\n", nat_pptp_info->pns_call_id); return NF_ACCEPT; } @@ -387,12 +374,13 @@ static unsigned int tcp_help(struct ip_conntrack *ct, DEBUGP("entering\n"); - /* Only mangle things once: original direction in POST_ROUTING - and reply direction on PRE_ROUTING. */ + /* Only mangle things once: DST for original direction + and SRC for reply direction. */ dir = CTINFO2DIR(ctinfo); - if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC && dir == IP_CT_DIR_ORIGINAL) - || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST && dir == IP_CT_DIR_REPLY))) - { + if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + && dir == IP_CT_DIR_ORIGINAL) + || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST + && dir == IP_CT_DIR_REPLY))) { DEBUGP("Not touching dir %s at hook %s\n", dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" @@ -409,13 +397,11 @@ static unsigned int tcp_help(struct ip_conntrack *ct, return NF_ACCEPT; } - pptph = (struct pptp_pkt_hdr *) ((void *)tcph + tcph->doff*4); /* if it's not a control message, we can't handle it */ if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || - ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) - { + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { DEBUGP("not a pptp control packet\n"); return NF_ACCEPT; } @@ -424,10 +410,10 @@ static unsigned int tcp_help(struct ip_conntrack *ct, if (dir == IP_CT_DIR_ORIGINAL) { /* reuqests sent by client to server (PNS->PAC) */ - pptp_outbound_pkt(tcph, pptph, datalen, ct, ctinfo, exp); + pptp_outbound_pkt(pskb, ct, ctinfo, exp); } else { /* response from the server to the client (PAC->PNS) */ - pptp_inbound_pkt(tcph, pptph, datalen, ct, ctinfo, exp); + pptp_inbound_pkt(pskb, ct, ctinfo, exp); } UNLOCK_BH(&ip_pptp_lock); @@ -437,29 +423,52 @@ static unsigned int tcp_help(struct ip_conntrack *ct, /* nat helper struct for control connection */ static struct ip_nat_helper pptp_tcp_helper = { - { NULL, NULL }, - "pptp", IP_NAT_HELPER_F_ALWAYS, THIS_MODULE, - { { 0, { tcp: { port: __constant_htons(PPTP_CONTROL_PORT) } } }, - { 0, { 0 }, IPPROTO_TCP } }, - { { 0, { tcp: { port: 0xFFFF } } }, - { 0, { 0 }, 0xFFFF } }, - tcp_help, pptp_nat_expected }; + .list = { NULL, NULL }, + .name = "pptp", + .flags = IP_NAT_HELPER_F_ALWAYS, + .me = THIS_MODULE, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } + } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = 0xFFFF } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xFFFF + } + }, + .help = tcp_help, + .expect = pptp_nat_expected +}; static int __init init(void) { - DEBUGP("init_module\n" ); - - if (ip_nat_helper_register(&pptp_tcp_helper)) + DEBUGP("%s: registering NAT helper\n", __FILE__); + if (ip_nat_helper_register(&pptp_tcp_helper)) { + printk(KERN_ERR "Unable to register NAT application helper " + "for pptp\n"); return -EIO; + } - return 0; + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; } static void __exit fini(void) { DEBUGP("cleanup_module\n" ); - ip_nat_helper_unregister(&pptp_tcp_helper); + ip_nat_helper_unregister(&pptp_tcp_helper); + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); } module_init(init); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c index a985539e..a985539e 100755..100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_esp.c diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c index 25aa1786..9be95857 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -1,5 +1,5 @@ /* - * ip_nat_proto_gre.c - Version 1.11 + * ip_nat_proto_gre.c - Version 1.2 * * NAT protocol helper module for GRE. * @@ -17,7 +17,7 @@ * * Documentation about PPTP can be found in RFC 2637 * - * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> + * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) * @@ -35,7 +35,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ + ": " format, ## args) +#else #define DEBUGP(x, args...) +#endif /* is key in given range between min and max */ static int @@ -44,8 +49,15 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - return ntohl(tuple->src.u.gre.key) >= ntohl(min->gre.key) - && ntohl(tuple->src.u.gre.key) <= ntohl(max->gre.key); + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); } /* generate unique tuple ... */ @@ -122,6 +134,7 @@ gre_manip_pkt(struct iphdr *iph, size_t len, break; } if (greh->csum) { + /* FIXME: Never tested this code... */ *(gre_csum(greh)) = ip_nat_cheat_check(~*(gre_key(greh)), manip->u.gre.key, diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c index 05aefcd4..622aee05 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -40,9 +40,6 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, else portptr = &tuple->dst.u.udp.port; - if(ntohs(*portptr) == 500) - return 0;//must not be "return 1" - /* If no range specified... */ if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ diff --git a/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c b/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c index 99438ca0..2e3004db 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ip_tables.c @@ -332,7 +332,7 @@ ipt_do_table(struct sk_buff **pskb, continue; } if (table_base + v - != (void *)e + e->next_offset) { + != (void *)e + e->next_offset && !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = (void *)e + e->next_offset; @@ -374,6 +374,12 @@ ipt_do_table(struct sk_buff **pskb, if (verdict == IPT_CONTINUE) e = (void *)e + e->next_offset; + else if (verdict == IPT_RETURN) { // added -- zzz + e = back; + back = get_entry(table_base, + back->comefrom); + continue; + } else /* Verdict */ break; @@ -1169,13 +1175,11 @@ do_add_counters(void *user, unsigned int len) goto free; write_lock_bh(&t->lock); - /************************************* - * modify by tanghui @ 2006-10-11 - * for a RACE CONDITION in the "do_add_counters()" function - *************************************/ - //if (t->private->number != paddc->num_counters) { - if (t->private->number != tmp.num_counters) { - /*************************************/ + +#if 0 // removed 1.11 forward bug test + // if (t->private->number != tmp.num_counters) { // 43011: modify by tanghui @ 2006-10-11 for a RACE CONDITION in the "do_add_counters()" function +#endif + if (t->private->number != paddc->num_counters) { ret = -EINVAL; goto unlock_up_free; } @@ -1676,7 +1680,7 @@ static struct ipt_match icmp_matchstruct = { { NULL, NULL }, "icmp", &icmp_match, &icmp_checkentry, NULL }; #ifdef CONFIG_PROC_FS -static inline int print_name(const char *i, +static int print_name(const char *i, off_t start_offset, char *buffer, int length, off_t *pos, unsigned int *count) { @@ -1694,6 +1698,15 @@ static inline int print_name(const char *i, return 0; } +static inline int print_target(const struct ipt_target *t, + off_t start_offset, char *buffer, int length, + off_t *pos, unsigned int *count) +{ + if (t == &ipt_standard_target || t == &ipt_error_target) + return 0; + return print_name((char *)t, start_offset, buffer, length, pos, count); +} + static int ipt_get_tables(char *buffer, char **start, off_t offset, int length) { off_t pos = 0; @@ -1720,7 +1733,7 @@ static int ipt_get_targets(char *buffer, char **start, off_t offset, int length) if (down_interruptible(&ipt_mutex) != 0) return 0; - LIST_FIND(&ipt_target, print_name, void *, + LIST_FIND(&ipt_target, print_target, struct ipt_target *, offset, buffer, length, &pos, &count); up(&ipt_mutex); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c new file mode 100644 index 00000000..b40e7e2e --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_BCOUNT.c @@ -0,0 +1,63 @@ +/* + + BCOUNT target + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2 or later. + +*/ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ipt_BCOUNT.h> + +// #define DEBUG_BCOUNT + +static unsigned int target(struct sk_buff **pskb, unsigned int hooknum, + const struct net_device *in, const struct net_device *out, + const void *targinfo, void *userinfo) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = ip_conntrack_get(*pskb, &ctinfo); + if (ct) { + ct->bcount += (*pskb)->len; + if (ct->bcount >= 0x0FFFFFFF) ct->bcount = 0x0FFFFFFF; +#ifdef DEBUG_BCOUNT + if (net_ratelimit()) + printf(KERN_DEBUG "BCOUNT %lx %lx\n", (*pskb)->len, ct->bcount); +#endif + } + return IPT_CONTINUE; +} + +static int checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) +{ + return (targinfosize == IPT_ALIGN(sizeof(struct ipt_BCOUNT_target))); +} + +static struct ipt_target BCOUNT_target += { { NULL, NULL }, "BCOUNT", target, checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&BCOUNT_target); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&BCOUNT_target); +} + +module_init(init); +module_exit(fini); + + +MODULE_AUTHOR("Jonathan Zarate"); +MODULE_DESCRIPTION("BCOUNT target"); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c new file mode 100644 index 00000000..f7320721 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_CLASSIFY.c @@ -0,0 +1,82 @@ +/* + * This is a module which is used for setting the skb->priority field + * of an skb for qdisc classification. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_CLASSIFY.h> + +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables qdisc classification target module"); + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_classify_target_info *clinfo = targinfo; + + if((*pskb)->priority != clinfo->priority) { + (*pskb)->priority = clinfo->priority; + (*pskb)->nfcache |= NFC_ALTERED; + } + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_classify_target_info))){ + printk(KERN_ERR "CLASSIFY: invalid size (%u != %u).\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_classify_target_info))); + return 0; + } + + if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { + printk(KERN_ERR "CLASSIFY: only valid in POST_ROUTING.\n"); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "CLASSIFY: can only be called from " + "\"mangle\" table, not \"%s\".\n", + tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_classify_reg += { { NULL, NULL }, "CLASSIFY", target, checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_classify_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_classify_reg); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c new file mode 100644 index 00000000..a6038378 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_CONNMARK.c @@ -0,0 +1,128 @@ +/* This kernel module is used to modify the connection mark values, or + * to optionally restore the skb nfmark from the connection mark + * + * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> + * by Henrik Nordstrom <hno@marasystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> + +MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_DESCRIPTION("IP tables CONNMARK matching module"); +MODULE_LICENSE("GPL"); + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_CONNMARK.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_connmark_target_info *markinfo = targinfo; + unsigned long diff; + unsigned long nfmark; + unsigned long newmark; + + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); + if (ct) { + switch(markinfo->mode) { + case IPT_CONNMARK_SET: + newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; + if (newmark != ct->mark) + ct->mark = newmark; + break; + case IPT_CONNMARK_SET_RETURN: + // Set connmark and nfmark, apply mask to nfmark, do IPT_RETURN - zzz + newmark = ct->mark = markinfo->mark; + newmark &= markinfo->mask; + nfmark = (*pskb)->nfmark; + if (newmark != nfmark) { + (*pskb)->nfmark = newmark; + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_RETURN; + case IPT_CONNMARK_SAVE: + newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); + if (ct->mark != newmark) + ct->mark = newmark; + break; + case IPT_CONNMARK_RESTORE: + nfmark = (*pskb)->nfmark; + diff = (ct->mark ^ nfmark) & markinfo->mask; // zzz + if (diff != 0) { + (*pskb)->nfmark = nfmark ^ diff; + (*pskb)->nfcache |= NFC_ALTERED; + } + break; + } + } + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_connmark_target_info *matchinfo = targinfo; + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_connmark_target_info))) { + printk(KERN_WARNING "CONNMARK: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_connmark_target_info))); + return 0; + } + + if (matchinfo->mode == IPT_CONNMARK_RESTORE) { + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + } + + return 1; +} + +static struct ipt_target ipt_connmark_reg = { + .name = "CONNMARK", + .target = &target, + .checkentry = &checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_connmark_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_connmark_reg); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c new file mode 100644 index 00000000..2ba068b3 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_IMQ.c @@ -0,0 +1,78 @@ +/* This target marks packets to be enqueued to an imq device */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_IMQ.h> +#include <linux/imq.h> + +static unsigned int imq_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + struct ipt_imq_info *mr = (struct ipt_imq_info*)targinfo; + + (*pskb)->imq_flags = mr->todev | IMQ_F_ENQUEUE; + (*pskb)->nfcache |= NFC_ALTERED; + + return IPT_CONTINUE; +} + +static int imq_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_imq_info *mr; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_imq_info))) { + printk(KERN_WARNING "IMQ: invalid targinfosize\n"); + return 0; + } + mr = (struct ipt_imq_info*)targinfo; + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING + "IMQ: IMQ can only be called from \"mangle\" table, not \"%s\"\n", + tablename); + return 0; + } + + if (mr->todev > IMQ_MAX_DEVS) { + printk(KERN_WARNING + "IMQ: invalid device specified, highest is %u\n", + IMQ_MAX_DEVS); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_imq_reg = { + { NULL, NULL}, + "IMQ", + imq_target, + imq_checkentry, + NULL, + THIS_MODULE +}; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_imq_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_imq_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c new file mode 100644 index 00000000..62677279 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_MACSAVE.c @@ -0,0 +1,65 @@ +/* + + MACSAVE target + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2 or later. + +*/ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ipt_MACSAVE.h> + +static unsigned int target(struct sk_buff **pskb, unsigned int hooknum, + const struct net_device *in, const struct net_device *out, + const void *targinfo, void *userinfo) +{ +// const struct ipt_MACSAVE_target_info *info = targinfo; + struct sk_buff *skb = *pskb; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + if ((skb->mac.raw >= skb->head) && ((skb->mac.raw + ETH_HLEN) <= skb->data)) { + ct = ip_conntrack_get(skb, &ctinfo); + if (ct) { + memcpy(ct->macsave, skb->mac.ethernet->h_source, sizeof(ct->macsave)); + } + } + return IPT_CONTINUE; +} + +static int checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_MACSAVE_target_info))) { + printk(KERN_ERR "MACSAVE: Invalid data size\n"); + return 0; + } + + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_IN))) { + printk(KERN_ERR "MACSAVE: Valid only in PREROUTING, FORWARD and INPUT\n"); + return 0; + } + return 1; +} + +static struct ipt_target macsave_target += { { NULL, NULL }, "MACSAVE", target, checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&macsave_target); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&macsave_target); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c new file mode 100644 index 00000000..b97d7792 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_ROUTE.c @@ -0,0 +1,422 @@ +/* + * This implements the ROUTE target, which enables you to setup unusual + * routes not supported by the standard kernel routing table. + * + * Copyright (C) 2002 Cedric de Launois <delaunois@info.ucl.ac.be> + * + * v 1.11 2004/11/23 + * + * This software is distributed under GNU GPL v2, 1991 + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ipt_ROUTE.h> +#include <linux/netdevice.h> +#include <linux/route.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/icmp.h> +#include <net/checksum.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + + +/* Try to route the packet according to the routing keys specified in + * route_info. Keys are : + * - ifindex : + * 0 if no oif preferred, + * otherwise set to the index of the desired oif + * - route_info->gw : + * 0 if no gateway specified, + * otherwise set to the next host to which the pkt must be routed + * If success, skb->dev is the output device to which the packet must + * be sent and skb->dst is not NULL + * + * RETURN: -1 if an error occured + * 1 if the packet was succesfully routed to the + * destination desired + * 0 if the kernel routing table could not route the packet + * according to the keys specified + */ +static int route(struct sk_buff *skb, + unsigned int ifindex, + const struct ipt_route_target_info *route_info) +{ + int err; + struct rtable *rt; + struct iphdr *iph = skb->nh.iph; + struct rt_key key = { + dst:iph->daddr, + src:0, + oif:ifindex, + tos:RT_TOS(iph->tos) + }; + + /* The destination address may be overloaded by the target */ + if (route_info->gw) + key.dst = route_info->gw; + + /* Trying to route the packet using the standard routing table. */ + if ((err = ip_route_output_key(&rt, &key))) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err); + return -1; + } + + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = NULL; + + /* Success if no oif specified or if the oif correspond to the + * one desired */ + if (!ifindex || rt->u.dst.dev->ifindex == ifindex) { + skb->dst = &rt->u.dst; + skb->dev = skb->dst->dev; + return 1; + } + + /* The interface selected by the routing table is not the one + * specified by the user. This may happen because the dst address + * is one of our own addresses. + */ + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n", + NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex); + + return 0; +} + + +/* Stolen from ip_finish_output2 + * PRE : skb->dev is set to the device we are leaving by + * skb->dst is not NULL + * POST: the packet is sent with the link layer header pushed + * the packet is destroyed + */ +static void ip_direct_send(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct hh_cache *hh = dst->hh; + + if (hh) { + read_lock_bh(&hh->hh_lock); + memcpy(skb->data - 16, hh->hh_data, 16); + read_unlock_bh(&hh->hh_lock); + skb_push(skb, hh->hh_len); + hh->hh_output(skb); + } else if (dst->neighbour) + dst->neighbour->output(skb); + else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: no hdr & no neighbour cache!\n"); + kfree_skb(skb); + } +} + + +/* PRE : skb->dev is set to the device we are leaving by + * POST: - the packet is directly sent to the skb->dev device, without + * pushing the link layer header. + * - the packet is destroyed + */ +static inline int dev_direct_send(struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} + + +static unsigned int route_oif(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + unsigned int ifindex = 0; + struct net_device *dev_out = NULL; + + /* The user set the interface name to use. + * Getting the current interface index. + */ + if ((dev_out = dev_get_by_name(route_info->oif))) { + ifindex = dev_out->ifindex; + } else { + /* Unknown interface name : packet dropped */ + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif); + return NF_DROP; + } + + /* Trying the standard way of routing packets */ + switch (route(skb, ifindex, route_info)) { + case 1: + dev_put(dev_out); + if (route_info->flags & IPT_ROUTE_CONTINUE) + return IPT_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; + + case 0: + /* Failed to send to oif. Trying the hard way */ + if (route_info->flags & IPT_ROUTE_CONTINUE) + return NF_DROP; + + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: forcing the use of %i\n", + ifindex); + + /* We have to force the use of an interface. + * This interface must be a tunnel interface since + * otherwise we can't guess the hw address for + * the packet. For a tunnel interface, no hw address + * is needed. + */ + if ((dev_out->type != ARPHRD_TUNNEL) + && (dev_out->type != ARPHRD_IPGRE)) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: can't guess the hw addr !\n"); + dev_put(dev_out); + return NF_DROP; + } + + /* Send the packet. This will also free skb + * Do not go through the POST_ROUTING hook because + * skb->dst is not set and because it will probably + * get confused by the destination IP address. + */ + skb->dev = dev_out; + dev_direct_send(skb); + dev_put(dev_out); + return NF_STOLEN; + + default: + /* Unexpected error */ + dev_put(dev_out); + return NF_DROP; + } +} + + +static unsigned int route_iif(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + struct net_device *dev_in = NULL; + + /* Getting the current interface index. */ + if (!(dev_in = dev_get_by_name(route_info->iif))) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif); + return NF_DROP; + } + + skb->dev = dev_in; + dst_release(skb->dst); + skb->dst = NULL; + + netif_rx(skb); + dev_put(dev_in); + return NF_STOLEN; +} + + +static unsigned int route_gw(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + if (route(skb, 0, route_info)!=1) + return NF_DROP; + + if (route_info->flags & IPT_ROUTE_CONTINUE) + return IPT_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; +} + +/* To detect and deter routed packet loopback when using the --tee option, + * we take a page out of the raw.patch book: on the copied skb, we set up + * a fake ->nfct entry, pointing to the local &route_tee_track. We skip + * routing packets when we see they already have that ->nfct. + */ + +static struct ip_conntrack route_tee_track; + +static unsigned int ipt_route_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ipt_route_target_info *route_info = targinfo; + struct sk_buff *skb = *pskb; + unsigned int res; + + /* If we are at PREROUTING or INPUT hook + * the TTL isn't decreased by the IP stack + */ + if (hooknum == NF_IP_PRE_ROUTING || + hooknum == NF_IP_LOCAL_IN) { + + struct iphdr *iph = skb->nh.iph; + + if (iph->ttl <= 1) { + struct rtable *rt; + + if (ip_route_output(&rt, iph->saddr, iph->daddr, + RT_TOS(iph->tos) | RTO_CONN, + 0)) { + return NF_DROP; + } + + if (skb->dev == rt->u.dst.dev) { + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* this will traverse normal stack, and + * thus call conntrack on the icmp packet */ + icmp_send(skb, ICMP_TIME_EXCEEDED, + ICMP_EXC_TTL, 0); + } + + return NF_DROP; + } + + /* + * If we are at INPUT the checksum must be recalculated since + * the length could change as the result of a defragmentation. + * -- Rickard Molin + */ + if(hooknum == NF_IP_LOCAL_IN) { + iph->ttl = iph->ttl - 1; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + } else { + ip_decrease_ttl(iph); + } + } + + if ((route_info->flags & IPT_ROUTE_TEE)) { + /* + * Copy the *pskb, and route the copy. Will later return + * IPT_CONTINUE for the original skb, which should continue + * on its way as if nothing happened. The copy should be + * independantly delivered to the ROUTE --gw. + */ + skb = skb_copy(*pskb, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n"); + return IPT_CONTINUE; + } + } + + /* Tell conntrack to forget this packet since it may get confused + * when a packet is leaving with dst address == our address. + * Good idea ? Dunno. Need advice. + * + * NEW: mark the skb with our &route_tee_track, so we avoid looping + * on any already routed packet. + */ + if (!(route_info->flags & IPT_ROUTE_CONTINUE)) { + nf_conntrack_put(skb->nfct); + skb->nfct = &route_tee_track.infos[IP_CT_NEW]; + nf_conntrack_get(skb->nfct); + skb->nfcache = 0; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif + } + + if (route_info->oif[0]) { + res = route_oif(route_info, skb); + } else if (route_info->iif[0]) { + res = route_iif(route_info, skb); + } else if (route_info->gw) { + res = route_gw(route_info, skb); + } else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n"); + res = IPT_CONTINUE; + } + + if ((route_info->flags & IPT_ROUTE_TEE)) + res = IPT_CONTINUE; + + return res; +} + + +static int ipt_route_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (strcmp(tablename, "mangle") != 0) { + printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n", + tablename); + return 0; + } + + if (hook_mask & ~( (1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))) { + printk("ipt_ROUTE: bad hook\n"); + return 0; + } + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_route_target_info))) { + printk(KERN_WARNING "ipt_ROUTE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_route_target_info))); + return 0; + } + + return 1; +} + + +static struct ipt_target ipt_route_reg += { { NULL, NULL }, "ROUTE", ipt_route_target, ipt_route_checkentry, NULL, + THIS_MODULE }; + + +static int __init init(void) +{ + /* Set up fake conntrack (stolen from raw.patch): + - to never be deleted, not in any hashes */ + atomic_set(&route_tee_track.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status); + /* - and prepare the ctinfo field for REJECT/NAT. */ + route_tee_track.infos[IP_CT_NEW].master = + route_tee_track.infos[IP_CT_RELATED].master = + route_tee_track.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master = + &route_tee_track.ct_general; + /* Initialize fake conntrack so that NAT will skip it */ + route_tee_track.nat.info.initialized |= + (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST); + + if (ipt_register_target(&ipt_route_reg)) + return -EINVAL; + + return 0; +} + + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_route_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c index 99e7dfe7..07103fa5 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_TRIGGER.c @@ -62,7 +62,8 @@ LIST_HEAD(trigger_list); static void trigger_refresh(struct ipt_trigger *trig, unsigned long extra_jiffies) { - DEBUGP("%s: \n", __FUNCTION__); + DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]); + IP_NF_ASSERT(trig); WRITE_LOCK(&ip_conntrack_lock); @@ -77,7 +78,8 @@ static void trigger_refresh(struct ipt_trigger *trig, unsigned long extra_jiffie static void __del_trigger(struct ipt_trigger *trig) { - DEBUGP("%s: \n", __FUNCTION__); + DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]); + IP_NF_ASSERT(trig); MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); @@ -90,7 +92,9 @@ static void trigger_timeout(unsigned long ul_trig) { struct ipt_trigger *trig= (void *) ul_trig; - DEBUGP("trigger list %p timed out\n", trig); +// DEBUGP("trigger list %p timed out\n", trig); + DEBUGP("%s: mport=%u-%u\n", __FUNCTION__, trig->ports.mport[0], trig->ports.mport[1]); + WRITE_LOCK(&ip_conntrack_lock); __del_trigger(trig); WRITE_UNLOCK(&ip_conntrack_lock); @@ -250,7 +254,7 @@ trigger_dnat(struct sk_buff **pskb, IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW)); DEBUGP("%s: got ", __FUNCTION__); - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + DUMP_TUPLE_RAW(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); /* Alter the destination of imcoming packet. */ newrange = ((struct ip_nat_multi_range) @@ -310,7 +314,7 @@ trigger_check(const char *tablename, DEBUGP("trigger_check: size %u.\n", targinfosize); return 0; } - if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD))) { + if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { DEBUGP("trigger_check: bad hooks %x.\n", hook_mask); return 0; } diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c new file mode 100644 index 00000000..2f0a4e7a --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_TTL.c @@ -0,0 +1,110 @@ +/* TTL modification target for IP tables + * (C) 2000 by Harald Welte <laforge@gnumonks.org> + * + * Version: 1.8 + * + * This software is distributed under the terms of GNU GPL + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_TTL.h> + +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("IP tables TTL modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int ipt_ttl_target(struct sk_buff **pskb, unsigned int hooknum, + const struct net_device *in, const struct net_device *out, + const void *targinfo, void *userinfo) +{ + struct iphdr *iph = (*pskb)->nh.iph; + const struct ipt_TTL_info *info = targinfo; + u_int16_t diffs[2]; + int new_ttl; + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->ttl = new_ttl; + diffs[1] = htons(((unsigned)iph->ttl) << 8); + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + (*pskb)->nfcache |= NFC_ALTERED; + } + + return IPT_CONTINUE; +} + +static int ipt_ttl_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_TTL_info *info = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { + printk(KERN_WARNING "TTL: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_TTL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "TTL: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "TTL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) { + printk(KERN_WARNING "TTL: increment/decrement doesn't make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_TTL = { { NULL, NULL }, "TTL", + ipt_ttl_target, ipt_ttl_checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&ipt_TTL); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_TTL); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c new file mode 100644 index 00000000..7fd34562 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_account.c @@ -0,0 +1,942 @@ +/* + * accounting match (ipt_account.c) + * (C) 2003,2004 by Piotr Gasidlo (quaker@barbara.eu.org) + * + * Version: 0.1.7 + * + * This software is distributed under the terms of GNU GPL + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> +#include <linux/interrupt.h> +#include <linux/ctype.h> + +#include <linux/seq_file.h> + +#include <asm/uaccess.h> + +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/udp.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_account.h> + +#if defined(CONFIG_IP_NF_MATCH_ACCOUNT_DEBUG) + #define dprintk(format,args...) printk(format,##args) +#else + #define dprintk(format,args...) +#endif + +static char version[] = +KERN_INFO IPT_ACCOUNT_NAME " " IPT_ACCOUNT_VERSION " : Piotr Gasid³o <quaker@barbara.eu.org>, http://www.barbara.eu.org/~quaker/ipt_account/\n"; + +/* rights for files created in /proc/net/ipt_account/ */ +static int permissions = 0644; +/* maximal netmask for single table */ +static int netmask = 16; + +/* module information */ +MODULE_AUTHOR("Piotr Gasidlo <quaker@barbara.eu.org>"); +MODULE_DESCRIPTION("Traffic accounting modules"); +MODULE_LICENSE("GPL"); +MODULE_PARM(permissions,"i"); +MODULE_PARM_DESC(permissions,"permissions on /proc/net/ipt_account/* files"); +MODULE_PARM(netmask, "i"); +MODULE_PARM_DESC(netmask, "maximum *save* size of one list (netmask)"); + +/* structure with statistics counters */ +struct t_ipt_account_stat { + u_int64_t b_all, b_tcp, b_udp, b_icmp, b_other; /* byte counters for all/tcp/udp/icmp/other traffic */ + u_int64_t p_all, p_tcp, p_udp, p_icmp, p_other; /* packet counters for all/tcp/udp/icmp/other traffic */ +}; + +/* stucture with statistics counters, used when table is created with --ashort switch */ +struct t_ipt_account_stat_short { + u_int64_t b_all; /* byte counters for all traffic */ + u_int64_t p_all; /* packet counters for all traffic */ +}; + +/* structure holding to/from statistics for single ip */ +struct t_ipt_account_ip_list { + struct t_ipt_account_stat src; + struct t_ipt_account_stat dest; + unsigned long time; /* time when this record was last updated */ + +}; + +/* same as above, for tables with --ashort switch */ +struct t_ipt_account_ip_list_short { + struct t_ipt_account_stat_short src; + struct t_ipt_account_stat_short dest; + unsigned long time; +}; + +/* structure describing single table */ +struct t_ipt_account_table { + char name[IPT_ACCOUNT_NAME_LEN]; /* table name ( = filename in /proc/net/ipt_account/) */ + union { /* table with statistics for each ip in network/netmask */ + struct t_ipt_account_ip_list *l; + struct t_ipt_account_ip_list_short *s; + } ip_list; + u_int32_t network; /* network/netmask covered by table*/ + u_int32_t netmask; + u_int32_t count; + int shortlisting:1; /* show only total columns of counters */ + int use_count; /* rules counter - counting number of rules using this table */ + struct t_ipt_account_table *next; + spinlock_t ip_list_lock; + struct proc_dir_entry *status_file; +}; + +/* we must use spinlocks to avoid parallel modifications of table list */ +static spinlock_t account_lock = SPIN_LOCK_UNLOCKED; + +static struct proc_dir_entry *proc_net_ipt_account = NULL; + +/* root pointer holding list of the tables */ +static struct t_ipt_account_table *account_tables = NULL; + +/* convert ascii to ip */ +int atoip(char *buffer, u_int32_t *ip) { + + char *bufferptr = buffer; + int part, shift; + + /* zero ip */ + *ip = 0; + + /* first must be a digit */ + if (!isdigit(*bufferptr)) + return 0; + + /* parse first 3 octets (III.III.III.iii) */ + for (part = 0, shift = 24; *bufferptr && shift; bufferptr++) { + if (isdigit(*bufferptr)) { + part = part * 10 + (*bufferptr - '0'); + continue; + } + if (*bufferptr == '.') { + if (part > 255) + return 0; + *ip |= part << shift; + shift -= 8; + part = 0; + continue; + } + return 0; + } + + /* we expect more digts */ + if (!*bufferptr) + return 0; + /* parse last octet (iii.iii.iii.III) */ + for (; *bufferptr; bufferptr++) { + if (isdigit(*bufferptr)) { + part = part * 10 + (*bufferptr - '0'); + continue; + } else { + if (part > 255) + return 0; + *ip |= part; + break; + } + } + return (bufferptr - buffer); +} + +/* convert ascii to 64bit integer */ +int atoi64(char *buffer, u_int64_t *i) { + char *bufferptr = buffer; + + /* zero integer */ + *i = 0; + + while (isdigit(*bufferptr)) { + *i = *i * 10 + (*bufferptr - '0'); + bufferptr++; + } + return (bufferptr - buffer); +} + +static void *account_seq_start(struct seq_file *s, loff_t *pos) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + + unsigned int *bucket; + + spin_lock_bh(&table->ip_list_lock); + if (*pos >= table->count) + return NULL; + + bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL); + if (!bucket) + return ERR_PTR(-ENOMEM); + *bucket = *pos; + return bucket; +} + +static void *account_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + + unsigned int *bucket = (unsigned int *)v; + + *pos = ++(*bucket); + if (*pos >= table->count) { + kfree(v); + return NULL; + } + return bucket; +} + +static void account_seq_stop(struct seq_file *s, void *v) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + unsigned int *bucket = (unsigned int *)v; + kfree(bucket); + spin_unlock_bh(&table->ip_list_lock); +} + +static int account_seq_write(struct file *file, const char *ubuffer, + size_t ulength, loff_t *pos) +{ + struct proc_dir_entry *pde = ((struct seq_file *)file->private_data)->private; + struct t_ipt_account_table *table = pde->data; + char buffer[1024], *bufferptr; + int length; + + u_int32_t ip; + int len, i; + struct t_ipt_account_ip_list l; + struct t_ipt_account_ip_list_short s; + u_int64_t *p, dummy; + + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() entered.\n"); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() ulength = %zi.\n", ulength); + + length = ulength; + if (ulength > 1024) + length = 1024; + if (copy_from_user(buffer, ubuffer, length)) + return -EFAULT; + buffer[length - 1] = 0; + bufferptr = buffer; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() buffer = \'%s\' length = %i.\n", buffer, length); + + /* reset table counters */ + if (!memcmp(buffer, "reset", 5)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"reset\".\n"); + if (!table->shortlisting) { + spin_lock_bh(&table->ip_list_lock); + memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count); + spin_unlock_bh(&table->ip_list_lock); + } else { + spin_lock_bh(&table->ip_list_lock); + memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count); + spin_unlock_bh(&table->ip_list_lock); + } + return length; + } + + if (!memcmp(buffer, "ip", 2)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"ip\".\n"); + bufferptr += 2; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoip(bufferptr, &ip))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip (%ti).\n", bufferptr - buffer); + return length; /* expected ip */ + } + bufferptr += len; + if ((ip & table->netmask) != table->network) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip [%u.%u.%u.%u] from table's network/netmask [%u.%u.%u.%u/%u.%u.%u.%u].\n", HIPQUAD(ip), HIPQUAD(table->network), HIPQUAD(table->netmask)); + return length; /* expected ip from table's network/netmask */ + } + if (!table->shortlisting) { + memset(&l, 0, sizeof(struct t_ipt_account_ip_list)); + while(*bufferptr) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!memcmp(bufferptr, "bytes_src", 9)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%ti).\n", bufferptr - buffer); + p = &l.src.b_all; + bufferptr += 9; + } else if (!memcmp(bufferptr, "bytes_dest", 10)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%ti).\n", bufferptr - buffer); + p = &l.dest.b_all; + bufferptr += 10; + } else if (!memcmp(bufferptr, "packets_src", 11)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%ti).\n", bufferptr - buffer); + p = &l.src.p_all; + bufferptr += 11; + } else if (!memcmp(bufferptr, "packets_dest", 12)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%ti).\n", bufferptr - buffer); + p = &l.dest.p_all; + bufferptr += 12; + } else if (!memcmp(bufferptr, "time", 4)) { + /* time hack, ignore time tokens */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%ti).\n", bufferptr - buffer); + bufferptr += 4; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, &dummy))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", dummy, bufferptr - buffer); + bufferptr += len; + continue; /* skip time token */ + } else + return length; /* expected token */ + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + for (i = 0; i < 5; i++) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, p))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", *p, bufferptr - buffer); + bufferptr += len; + p++; + } + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n"); + spin_lock_bh(&table->ip_list_lock); + /* update counters, do not overwrite time field */ + memcpy(&table->ip_list.l[ip - table->network], &l, sizeof(struct t_ipt_account_ip_list) - sizeof(unsigned long)); + spin_unlock_bh(&table->ip_list_lock); + } else { + memset(&s, 0, sizeof(struct t_ipt_account_ip_list_short)); + while(*bufferptr) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!memcmp(bufferptr, "bytes_src", 9)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%ti).\n", bufferptr - buffer); + p = &s.src.b_all; + bufferptr += 9; + } else if (!memcmp(bufferptr, "bytes_dest", 10)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%ti).\n", bufferptr - buffer); + p = &s.dest.b_all; + bufferptr += 10; + } else if (!memcmp(bufferptr, "packets_src", 11)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%ti).\n", bufferptr - buffer); + p = &s.src.p_all; + bufferptr += 11; + } else if (!memcmp(bufferptr, "packets_dest", 12)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%ti).\n", bufferptr - buffer); + p = &s.dest.p_all; + bufferptr += 12; + } else if (!memcmp(bufferptr, "time", 4)) { + /* time hack, ignore time tokens */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%ti).\n", bufferptr - buffer); + bufferptr += 4; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, &dummy))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", dummy, bufferptr - buffer); + bufferptr += len; + continue; /* skip time token */ + } else { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected token (%ti).\n", bufferptr - buffer); + return length; /* expected token */ + } + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%ti).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%ti).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, p))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%ti).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%ti).\n", *p, bufferptr - buffer); + bufferptr += len; + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n"); + spin_lock_bh(&table->ip_list_lock); + /* update counters, do not overwrite time field */ + memcpy(&table->ip_list.s[ip - table->network], &s, sizeof(struct t_ipt_account_ip_list_short) - sizeof(unsigned long)); + spin_unlock_bh(&table->ip_list_lock); + } + } + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() left.\n"); + return length; +} + + +static int account_seq_show(struct seq_file *s, void *v) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + unsigned int *bucket = (unsigned int *)v; + + u_int32_t address = table->network + *bucket; + struct timespec last; + + if (!table->shortlisting) { + jiffies_to_timespec(jiffies - table->ip_list.l[*bucket].time, &last); + seq_printf(s, + "ip = %u.%u.%u.%u bytes_src = %llu %llu %llu %llu %llu packets_src = %llu %llu %llu %llu %llu bytes_dest = %llu %llu %llu %llu %llu packets_dest = %llu %llu %llu %llu %llu time = %lu\n", + HIPQUAD(address), + table->ip_list.l[*bucket].src.b_all, + table->ip_list.l[*bucket].src.b_tcp, + table->ip_list.l[*bucket].src.b_udp, + table->ip_list.l[*bucket].src.b_icmp, + table->ip_list.l[*bucket].src.b_other, + table->ip_list.l[*bucket].src.p_all, + table->ip_list.l[*bucket].src.p_tcp, + table->ip_list.l[*bucket].src.p_udp, + table->ip_list.l[*bucket].src.p_icmp, + table->ip_list.l[*bucket].src.p_other, + table->ip_list.l[*bucket].dest.b_all, + table->ip_list.l[*bucket].dest.b_tcp, + table->ip_list.l[*bucket].dest.b_udp, + table->ip_list.l[*bucket].dest.b_icmp, + table->ip_list.l[*bucket].dest.b_other, + table->ip_list.l[*bucket].dest.p_all, + table->ip_list.l[*bucket].dest.p_tcp, + table->ip_list.l[*bucket].dest.p_udp, + table->ip_list.l[*bucket].dest.p_icmp, + table->ip_list.l[*bucket].dest.p_other, + last.tv_sec + ); + } else { + jiffies_to_timespec(jiffies - table->ip_list.s[*bucket].time, &last); + seq_printf(s, + "ip = %u.%u.%u.%u bytes_src = %llu packets_src = %llu bytes_dest = %llu packets_dest = %llu time = %lu\n", + HIPQUAD(address), + table->ip_list.s[*bucket].src.b_all, + table->ip_list.s[*bucket].src.p_all, + table->ip_list.s[*bucket].dest.b_all, + table->ip_list.s[*bucket].dest.p_all, + last.tv_sec + ); + } + return 0; +} + +static struct seq_operations account_seq_ops = { + .start = account_seq_start, + .next = account_seq_next, + .stop = account_seq_stop, + .show = account_seq_show +}; + +static int account_seq_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &account_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + sf->private = PDE(inode); + } + return ret; +} + +static struct file_operations account_file_ops = { + .owner = THIS_MODULE, + .open = account_seq_open, + .read = seq_read, + .write = account_seq_write, + .llseek = seq_lseek, + .release = seq_release +}; + +/* do raw accounting */ +static inline void do_account(struct t_ipt_account_stat *stat, const struct sk_buff *skb) { + + /* update packet & bytes counters in *stat structure */ + stat->b_all += skb->len; + stat->p_all++; + + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + stat->b_tcp += skb->len; + stat->p_tcp++; + break; + case IPPROTO_UDP: + stat->b_udp += skb->len; + stat->p_udp++; + break; + case IPPROTO_ICMP: + stat->b_icmp += skb->len; + stat->p_icmp++; + break; + default: + stat->b_other += skb->len; + stat->p_other++; + } +} + +static inline void do_account_short(struct t_ipt_account_stat_short *stat, const struct sk_buff *skb) { + + /* update packet & bytes counters in *stat structure */ + stat->b_all += skb->len; + stat->p_all++; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + + const struct t_ipt_account_info *info = (struct t_ipt_account_info*)matchinfo; + struct t_ipt_account_table *table; + int ret; + unsigned long now; + + u_int32_t address; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() entered.\n"); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() match name = %s.\n", info->name); + + spin_lock_bh(&account_lock); + /* find the right table */ + table = account_tables; + while (table && strncmp(table->name, info->name, IPT_ACCOUNT_NAME_LEN) && (table = table->next)); + spin_unlock_bh(&account_lock); + + if (table == NULL) { + /* ups, no table with that name */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table %s not found. Leaving.\n", info->name); + return 0; + } + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table found %s\n", table->name); + + /* lock table while updating statistics */ + spin_lock_bh(&table->ip_list_lock); + + /* default: no match */ + ret = 0; + + /* get current time */ + now = jiffies; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() got packet src = %u.%u.%u.%u, dst = %u.%u.%u.%u, proto = %u.\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->nh.iph->protocol); + + /* check whether traffic from source ip address ... */ + address = ntohl(skb->nh.iph->saddr); + /* ... is being accounted by this table */ + if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) { + /* yes, account this packet */ + dprintk(KERN_INFO "ipt_account: match() accounting packet src = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol); + /* update counters this host */ + if (!table->shortlisting) { + do_account(&table->ip_list.l[address - table->network].src, skb); + table->ip_list.l[address - table->network].time = now; + /* update also counters for all hosts in this table (network address) */ + if (table->netmask != INADDR_BROADCAST) { + do_account(&table->ip_list.l[0].src, skb); + table->ip_list.l[0].time = now; + } + } else { + do_account_short(&table->ip_list.s[address - table->network].src, skb); + table->ip_list.s[address - table->network].time = now; + /* update also counters for all hosts in this table (network address) */ + if (table->netmask != INADDR_BROADCAST) { + do_account_short(&table->ip_list.s[0].src, skb); + table->ip_list.s[0].time = now; + } + } + /* yes, it's a match */ + ret = 1; + } + + /* do the same thing with destination ip address */ + address = ntohl(skb->nh.iph->daddr); + if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() accounting packet dst = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol); + if (!table->shortlisting) { + do_account(&table->ip_list.l[address - table->network].dest, skb); + table->ip_list.l[address - table->network].time = now; + if (table->netmask != INADDR_BROADCAST) { + do_account(&table->ip_list.l[0].dest, skb); + table->ip_list.s[0].time = now; + } + } else { + do_account_short(&table->ip_list.s[address - table->network].dest, skb); + table->ip_list.s[address - table->network].time = now; + if (table->netmask != INADDR_BROADCAST) { + do_account_short(&table->ip_list.s[0].dest, skb); + table->ip_list.s[0].time = now; + } + } + ret = 1; + } + spin_unlock_bh(&table->ip_list_lock); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() left.\n"); + + return ret; +} + +static int checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) +{ + const struct t_ipt_account_info *info = matchinfo; + struct t_ipt_account_table *table, *find_table, *last_table; + int ret = 0; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() entered.\n"); + + if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return 0; + if (!info->name || !info->name[0]) return 0; + + /* find whether table with this name already exists */ + spin_lock_bh(&account_lock); + find_table = account_tables; + while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) ); + if (find_table != NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name); + /* if table exists, check whether table network/netmask equals rule network/netmask */ + if (find_table->network != info->network || find_table->netmask != info->netmask || find_table->shortlisting != info->shortlisting) { + spin_unlock_bh(&account_lock); + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong parameters (not equals existing table parameters).\n"); + ret = 0; + goto failure; + } + /* increment table use count */ + find_table->use_count++; + spin_unlock_bh(&account_lock); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n"); + ret = 1; + goto failure; + } + spin_unlock_bh(&account_lock); + + /* check netmask first, before allocating memory */ + if (info->netmask < ((1 << netmask) - 1)) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() too big netmask.\n"); + ret = 0; + goto failure; + } + + /* table doesn't exist - create new */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for new table %s.\n", sizeof(struct t_ipt_account_table), info->name); + table = vmalloc(sizeof(struct t_ipt_account_table)); + if (table == NULL) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for new table %s.\n", sizeof(struct t_ipt_account_table), info->name); + ret = 0; /* was -ENOMEM */ + goto failure; + } + + /* setup table parameters */ + table->ip_list_lock = SPIN_LOCK_UNLOCKED; + table->next = NULL; + table->use_count = 1; + table->network = info->network; + table->netmask = info->netmask; + table->shortlisting = info->shortlisting; + table->count = (~table->netmask) + 1; + strncpy(table->name,info->name,IPT_ACCOUNT_NAME_LEN); + table->name[IPT_ACCOUNT_NAME_LEN - 1] = '\0'; + + /* allocate memory for table->ip_list */ + if (!table->shortlisting) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count); + table->ip_list.l = vmalloc(sizeof(struct t_ipt_account_ip_list) * table->count); + if (table->ip_list.l == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count); + ret = 0; /* was -ENOMEM */ + goto failure_table; + } + memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count); + } else { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count); + table->ip_list.s = vmalloc(sizeof(struct t_ipt_account_ip_list_short) * table->count); + if (table->ip_list.s == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %zu for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count); + ret = 0; /* was -ENOMEM */ + goto failure_table; + } + memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count); + } + + /* put table into chain */ + spin_lock_bh(&account_lock); + find_table = account_tables; + while( (last_table = find_table) && strncmp(info->name, find_table->name, IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) ); + if (find_table != NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name); + if (find_table->network != info->network || find_table->netmask != info->netmask) { + spin_unlock_bh(&account_lock); + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong network/netmask.\n"); + ret = 0; + goto failure_ip_list; + } + find_table->use_count++; + spin_unlock_bh(&account_lock); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n"); + ret = 1; + goto failure_ip_list; + } + if (!last_table) + account_tables = table; + else + last_table->next = table; + spin_unlock_bh(&account_lock); + + /* create procfs status file */ + table->status_file = create_proc_entry(table->name, permissions, proc_net_ipt_account); + if (table->status_file == NULL) { + ret = 0; /* was -ENOMEM */ + goto failure_unlink; + } + table->status_file->owner = THIS_MODULE; + table->status_file->data = table; + wmb(); +// if (!table->shortlisting) + table->status_file->proc_fops = &account_file_ops; +// else +// table->status_file->proc_fops = &account_file_ops_short; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left.\n"); + /* everything went just okey */ + return 1; + + /* do cleanup in case of failure */ +failure_unlink: + /* remove table from list */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() removing table.\n"); + spin_lock_bh(&account_lock); + last_table = NULL; + table = account_tables; + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() no table found. Leaving.\n"); + spin_unlock_bh(&account_lock); + return 0; /* was -ENOMEM */ + } + while (strncmp(info->name, table->name, IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next)); + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table already destroyed. Leaving.\n"); + spin_unlock_bh(&account_lock); + return 0; /* was -ENOMEM */ + } + if (last_table) + last_table->next = table->next; + else + account_tables = table->next; + spin_unlock_bh(&account_lock); +failure_ip_list: + /* free memory allocated for statistics table */ + if (!table->shortlisting) + vfree(table->ip_list.l); + else + vfree(table->ip_list.s); +failure_table: + /* free table */ + vfree(table); +failure: + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left. Table not created.\n"); + /* failure return */ + return ret; +} + +static void destroy(void *matchinfo, + unsigned int matchinfosize) +{ + const struct t_ipt_account_info *info = matchinfo; + struct t_ipt_account_table *table, *last_table; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() entered.\n"); + + if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return; + + /* search for table */ + spin_lock_bh(&account_lock); + last_table = NULL; + table = account_tables; + if(table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no tables found. Leaving.\n"); + spin_unlock_bh(&account_lock); + return; + } + while( strncmp(info->name,table->name,IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next) ); + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no table %s not found. Leaving.\n", info->name); + spin_unlock_bh(&account_lock); + return; + } + + /* decrement table use-count */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() decrementing use count.\n"); + table->use_count--; + if (table->use_count) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table still in use. Leaving.\n"); + spin_unlock_bh(&account_lock); + return; + } + + /* remove table if use-count is zero */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table %s not used. Removing.\n", table->name); + + /* unlink table */ + if(last_table) + last_table->next = table->next; + else + account_tables = table->next; + spin_unlock_bh(&account_lock); + + /* wait while table is still in use */ + spin_lock_bh(&table->ip_list_lock); + spin_unlock_bh(&table->ip_list_lock); + + /* remove proc entries */ + remove_proc_entry(table->name, proc_net_ipt_account); + + /* remove table */ + if (!table->shortlisting) + vfree(table->ip_list.l); + else + vfree(table->ip_list.s); + vfree(table); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() left.\n"); + return; +} + +static struct ipt_match account_match = { + .name = "account", + .match = &match, + .checkentry = &checkentry, + .destroy = &destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + int err; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() entered.\n"); + printk(version); + /* check params */ + if (netmask > 32 || netmask < 0) { + printk(KERN_INFO "account: Wrong netmask given by netmask parameter (%i). Valid is 32 to 0.\n", netmask); + err = -EINVAL; + goto doexit; + } + + /* create /proc/net/ipt_account directory */ + proc_net_ipt_account = proc_mkdir("ipt_account", proc_net); + if (!proc_net_ipt_account) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to create procfs entry.\n"); + err = -ENOMEM; + goto doexit; + } + proc_net_ipt_account->owner = THIS_MODULE; + + err = ipt_register_match(&account_match); + if (err) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to register match.\n"); + remove_proc_entry("ipt_account", proc_net); + } +doexit: + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() left.\n"); + return err; +} + +static void __exit fini(void) +{ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() entered.\n"); + + ipt_unregister_match(&account_match); + /* remove /proc/net/ipt_account/ directory */ + remove_proc_entry("ipt_account", proc_net); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() left.\n"); +} + +module_init(init); +module_exit(fini); + diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c new file mode 100644 index 00000000..63f93a14 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_bcount.c @@ -0,0 +1,59 @@ +/* + + bcount match (experimental) + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2 or later. + +*/ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ipt_bcount.h> + +// #define LOG printk +#define LOG(...) do { } while (0); + + +static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) +{ + const struct ipt_bcount_match *info = matchinfo; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); + if (!ct) return !info->invert; + return ((ct->bcount >= info->min) && (ct->bcount <= info->max)) ^ info->invert; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + return (matchsize == IPT_ALIGN(sizeof(struct ipt_bcount_match))); +} + + +static struct ipt_match bcount_match += { { NULL, NULL }, "bcount", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + LOG(KERN_INFO "ipt_bcount <" __DATE__ " " __TIME__ "> loaded\n"); + return ipt_register_match(&bcount_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&bcount_match); +} + +module_init(init); +module_exit(fini); + + +MODULE_AUTHOR("Jonathan Zarate"); +MODULE_DESCRIPTION("bcount match"); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c new file mode 100644 index 00000000..c8ee72d5 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_condition.c @@ -0,0 +1,256 @@ +/*-------------------------------------------*\ +| Netfilter Condition Module | +| | +| Description: This module allows firewall | +| rules to match using condition variables | +| stored in /proc files. | +| | +| Author: Stephane Ouellette 2002-10-22 | +| <ouellettes@videotron.ca> | +| | +| History: | +| 2003-02-10 Second version with improved | +| locking and simplified code. | +| | +| This software is distributed under the | +| terms of the GNU GPL. | +\*-------------------------------------------*/ + +#include<linux/module.h> +#include<linux/proc_fs.h> +#include<linux/spinlock.h> +#include<linux/string.h> +#include<asm/atomic.h> +#include<linux/netfilter_ipv4/ip_tables.h> +#include<linux/netfilter_ipv4/ipt_condition.h> + + +#ifndef CONFIG_PROC_FS +#error "Proc file system support is required for this module" +#endif + + +MODULE_AUTHOR("Stephane Ouellette <ouellettes@videotron.ca>"); +MODULE_DESCRIPTION("Allows rules to match against condition variables"); +MODULE_LICENSE("GPL"); + + +struct condition_variable { + struct condition_variable *next; + struct proc_dir_entry *status_proc; + atomic_t refcount; + int enabled; /* TRUE == 1, FALSE == 0 */ +}; + + +static rwlock_t list_lock; +static struct condition_variable *head = NULL; +static struct proc_dir_entry *proc_net_condition = NULL; + + +static int +ipt_condition_read_info(char *buffer, char **start, off_t offset, + int length, int *eof, void *data) +{ + struct condition_variable *var = + (struct condition_variable *) data; + + if (offset == 0) { + *start = buffer; + buffer[0] = (var->enabled) ? '1' : '0'; + buffer[1] = '\n'; + return 2; + } + + *eof = 1; + return 0; +} + + +static int +ipt_condition_write_info(struct file *file, const char *buffer, + unsigned long length, void *data) +{ + struct condition_variable *var = + (struct condition_variable *) data; + + if (length) { + /* Match only on the first character */ + switch (buffer[0]) { + case '0': + var->enabled = 0; + break; + case '1': + var->enabled = 1; + } + } + + return (int) length; +} + + +static int +match(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *matchinfo, int offset, + const void *hdr, u_int16_t datalen, int *hotdrop) +{ + const struct condition_info *info = + (const struct condition_info *) matchinfo; + struct condition_variable *var; + int condition_status = 0; + + read_lock(&list_lock); + + for (var = head; var; var = var->next) { + if (strcmp(info->name, var->status_proc->name) == 0) { + condition_status = var->enabled; + break; + } + } + + read_unlock(&list_lock); + + return condition_status ^ info->invert; +} + + + +static int +checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, unsigned int hook_mask) +{ + struct condition_info *info = (struct condition_info *) matchinfo; + struct condition_variable *var, *newvar; + + if (matchsize != IPT_ALIGN(sizeof(struct condition_info))) + return 0; + + /* The first step is to check if the condition variable already exists. */ + /* Here, a read lock is sufficient because we won't change the list */ + read_lock(&list_lock); + + for (var = head; var; var = var->next) { + if (strcmp(info->name, var->status_proc->name) == 0) { + atomic_inc(&var->refcount); + read_unlock(&list_lock); + return 1; + } + } + + read_unlock(&list_lock); + + /* At this point, we need to allocate a new condition variable */ + newvar = kmalloc(sizeof(struct condition_variable), GFP_KERNEL); + + if (!newvar) + return -ENOMEM; + + /* Create the condition variable's proc file entry */ + newvar->status_proc = create_proc_entry(info->name, 0644, proc_net_condition); + + if (!newvar->status_proc) { + /* + * There are two possibilities: + * 1- Another condition variable with the same name has been created, which is valid. + * 2- There was a memory allocation error. + */ + kfree(newvar); + read_lock(&list_lock); + + for (var = head; var; var = var->next) { + if (strcmp(info->name, var->status_proc->name) == 0) { + atomic_inc(&var->refcount); + read_unlock(&list_lock); + return 1; + } + } + + read_unlock(&list_lock); + return -ENOMEM; + } + + atomic_set(&newvar->refcount, 1); + newvar->enabled = 0; + newvar->status_proc->owner = THIS_MODULE; + newvar->status_proc->data = newvar; + wmb(); + newvar->status_proc->read_proc = ipt_condition_read_info; + newvar->status_proc->write_proc = ipt_condition_write_info; + + write_lock(&list_lock); + + newvar->next = head; + head = newvar; + + write_unlock(&list_lock); + + return 1; +} + + +static void +destroy(void *matchinfo, unsigned int matchsize) +{ + struct condition_info *info = (struct condition_info *) matchinfo; + struct condition_variable *var, *prev = NULL; + + if (matchsize != IPT_ALIGN(sizeof(struct condition_info))) + return; + + write_lock(&list_lock); + + for (var = head; var && strcmp(info->name, var->status_proc->name); + prev = var, var = var->next); + + if (var && atomic_dec_and_test(&var->refcount)) { + if (prev) + prev->next = var->next; + else + head = var->next; + + write_unlock(&list_lock); + remove_proc_entry(var->status_proc->name, proc_net_condition); + kfree(var); + } else + write_unlock(&list_lock); +} + + +static struct ipt_match condition_match = { + .name = "condition", + .match = &match, + .checkentry = &checkentry, + .destroy = &destroy, + .me = THIS_MODULE +}; + + +static int __init +init(void) +{ + int errorcode; + + rwlock_init(&list_lock); + proc_net_condition = proc_mkdir("ipt_condition", proc_net); + + if (proc_net_condition) { + errorcode = ipt_register_match(&condition_match); + + if (errorcode) + remove_proc_entry("ipt_condition", proc_net); + } else + errorcode = -EACCES; + + return errorcode; +} + + +static void __exit +fini(void) +{ + ipt_unregister_match(&condition_match); + remove_proc_entry("ipt_condition", proc_net); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c new file mode 100644 index 00000000..abf8efff --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_connlimit.c @@ -0,0 +1,222 @@ +/* + * netfilter module to limit the number of parallel tcp + * connections per IP address. + * (c) 2000 Gerd Knorr <kraxel@bytesex.org> + * Nov 2002: Martin Bene <martin.bene@icomedias.com>: + * only ignore TIME_WAIT or gone connections + * + * based on ... + * + * Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/list.h> +#include <linux/version.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_tcp.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_connlimit.h> + +#define DEBUG 0 + +MODULE_LICENSE("GPL"); + +/* we'll save the tuples of all connections we care about */ +struct ipt_connlimit_conn +{ + struct list_head list; + struct ip_conntrack_tuple tuple; +}; + +struct ipt_connlimit_data { + spinlock_t lock; + struct list_head iphash[256]; +}; + +static inline unsigned ipt_iphash(const unsigned addr) +{ + return ((addr ^ (addr >> 8) ^ (addr >> 16) ^ (addr >> 24)) & 0xff); +} + +static int count_them(struct ipt_connlimit_data *data, + u_int32_t addr, u_int32_t mask, + struct ip_conntrack *ct) +{ +#if DEBUG + const static char *tcp[] = { "none", "established", "syn_sent", "syn_recv", + "fin_wait", "time_wait", "close", "close_wait", + "last_ack", "listen" }; +#endif + int addit = 1, matches = 0; + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *found; + struct ipt_connlimit_conn *conn; + struct list_head *hash,*lh; + + spin_lock_bh(&data->lock); + tuple = ct->tuplehash[0].tuple; + hash = &data->iphash[ipt_iphash(addr & mask)]; + + /* check the saved connections */ + for (lh = hash->next; lh != hash; lh = lh->next) { + conn = list_entry(lh,struct ipt_connlimit_conn,list); + found = ip_conntrack_find_get(&conn->tuple,ct); + if (found != NULL && + 0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) && + found->ctrack->proto.tcp.state != TCP_CONNTRACK_TIME_WAIT) { + /* Just to be sure we have it only once in the list. + We should'nt see tuples twice unless someone hooks this + into a table without "-p tcp --syn" */ + addit = 0; + } +#if DEBUG + printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n", + ipt_iphash(addr & mask), + NIPQUAD(conn->tuple.src.ip), ntohs(conn->tuple.src.u.tcp.port), + NIPQUAD(conn->tuple.dst.ip), ntohs(conn->tuple.dst.u.tcp.port), + (NULL != found) ? tcp[found->ctrack->proto.tcp.state] : "gone"); +#endif + if (NULL == found) { + /* this one is gone */ + lh = lh->prev; + list_del(lh->next); + kfree(conn); + continue; + } + if (found->ctrack->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT) { + /* we don't care about connections which are + closed already -> ditch it */ + lh = lh->prev; + list_del(lh->next); + kfree(conn); + nf_conntrack_put(&found->ctrack->infos[0]); + continue; + } + if ((addr & mask) == (conn->tuple.src.ip & mask)) { + /* same source IP address -> be counted! */ + matches++; + } + nf_conntrack_put(&found->ctrack->infos[0]); + } + if (addit) { + /* save the new connection in our list */ +#if DEBUG + printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n", + ipt_iphash(addr & mask), + NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); +#endif + conn = kmalloc(sizeof(*conn),GFP_ATOMIC); + if (NULL == conn) + return -1; + memset(conn,0,sizeof(*conn)); + INIT_LIST_HEAD(&conn->list); + conn->tuple = tuple; + list_add(&conn->list,hash); + matches++; + } + spin_unlock_bh(&data->lock); + return matches; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_connlimit_info *info = matchinfo; + int connections, match; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); + if (NULL == ct) { + printk("ipt_connlimit: Oops: invalid ct state ?\n"); + *hotdrop = 1; + return 0; + } + connections = count_them(info->data,skb->nh.iph->saddr,info->mask,ct); + if (-1 == connections) { + printk("ipt_connlimit: Hmm, kmalloc failed :-(\n"); + *hotdrop = 1; /* let's free some memory :-) */ + return 0; + } + match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit); +#if DEBUG + printk("ipt_connlimit: src=%u.%u.%u.%u mask=%u.%u.%u.%u " + "connections=%d limit=%d match=%s\n", + NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask), + connections, info->limit, match ? "yes" : "no"); +#endif + + return match; +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_connlimit_info *info = matchinfo; + int i; + + /* verify size */ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info))) + return 0; + + /* refuse anything but tcp */ + if (ip->proto != IPPROTO_TCP) + return 0; + + /* init private data */ + info->data = kmalloc(sizeof(struct ipt_connlimit_data),GFP_KERNEL); + spin_lock_init(&(info->data->lock)); + for (i = 0; i < 256; i++) + INIT_LIST_HEAD(&(info->data->iphash[i])); + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchinfosize) +{ + struct ipt_connlimit_info *info = matchinfo; + struct ipt_connlimit_conn *conn; + struct list_head *hash; + int i; + + /* cleanup */ + for (i = 0; i < 256; i++) { + hash = &(info->data->iphash[i]); + while (hash != hash->next) { + conn = list_entry(hash->next,struct ipt_connlimit_conn,list); + list_del(hash->next); + kfree(conn); + } + } + kfree(info->data); +} + +static struct ipt_match connlimit_match += { { NULL, NULL }, "connlimit", &match, &check, &destroy, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&connlimit_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&connlimit_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c new file mode 100644 index 00000000..d795a339 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_connmark.c @@ -0,0 +1,83 @@ +/* This kernel module matches connection mark values set by the + * CONNMARK target + * + * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> + * by Henrik Nordstrom <hno@marasystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/skbuff.h> + +MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>"); +MODULE_DESCRIPTION("IP tables connmark match module"); +MODULE_LICENSE("GPL"); + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_connmark.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_connmark_info *info = matchinfo; + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); + if (!ct) + return 0; + + return ((ct->mark & info->mask) == info->mark) ^ info->invert; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) + return 0; + + return 1; +} + +static struct ipt_match connmark_match = { + .name = "connmark", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&connmark_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&connmark_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c new file mode 100644 index 00000000..1b682b9c --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_exp.c @@ -0,0 +1,57 @@ +/* + + Experimental Netfilter Crap + Copyright (C) 2006 Jonathan Zarate + +*/ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/file.h> +#include <net/sock.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_exp.h> +#include "../../bridge/br_private.h" + + +static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) +{ +// const struct ipt_exp_info *info = matchinfo; + + if ((skb->mac.raw >= skb->head) && ((skb->mac.raw + ETH_HLEN) <= skb->data)) { + printk(KERN_INFO "exp src=%02X:%02X:%02X:%02X:%02X:%02X dst=%02X:%02X:%02X:%02X:%02X:%02X\n", + skb->mac.ethernet->h_source[0], skb->mac.ethernet->h_source[1], skb->mac.ethernet->h_source[2], + skb->mac.ethernet->h_source[3], skb->mac.ethernet->h_source[4], skb->mac.ethernet->h_source[5], + skb->mac.ethernet->h_dest[0], skb->mac.ethernet->h_dest[1], skb->mac.ethernet->h_dest[2], + skb->mac.ethernet->h_dest[3], skb->mac.ethernet->h_dest[4], skb->mac.ethernet->h_dest[5]); + return 1; + } + printk(KERN_INFO "exp mac=%p head=%p in=%p\n", skb->mac.raw, skb->head, in); + return 0; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + return (matchsize == IPT_ALIGN(sizeof(struct ipt_exp_info))); +} + +static struct ipt_match exp_match + = { { NULL, NULL }, "exp", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + printk(KERN_INFO "exp init " __DATE__ " " __TIME__ "\n"); + return ipt_register_match(&exp_match); +} + +static void __exit fini(void) +{ + printk(KERN_INFO "exp fini\n"); + ipt_unregister_match(&exp_match); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c new file mode 100644 index 00000000..fbd1a95c --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_geoip.c @@ -0,0 +1,272 @@ +/* netfilter's kernel module for the geoip match + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Copyright (c) 2004 Cookinglinux + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <asm/uaccess.h> +#include <asm/atomic.h> + +#include <linux/netfilter_ipv4/ipt_geoip.h> +#include <linux/netfilter_ipv4/ip_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Samuel Jean, Nicolas Bouliane"); +MODULE_DESCRIPTION("iptables/netfilter's geoip match"); + +struct geoip_info *head = NULL; +static spinlock_t geoip_lock = SPIN_LOCK_UNLOCKED; + +static struct geoip_info *add_node(struct geoip_info *memcpy) +{ + struct geoip_info *p = + (struct geoip_info *)kmalloc(sizeof(struct geoip_info), GFP_KERNEL); + + struct geoip_subnet *s; + + if ((p == NULL) || (copy_from_user(p, memcpy, sizeof(struct geoip_info)) != 0)) + return NULL; + + s = (struct geoip_subnet *)kmalloc(p->count * sizeof(struct geoip_subnet), GFP_KERNEL); + if ((s == NULL) || (copy_from_user(s, p->subnets, p->count * sizeof(struct geoip_subnet)) != 0)) + return NULL; + + spin_lock_bh(&geoip_lock); + + p->subnets = s; + p->ref = 1; + p->next = head; + p->prev = NULL; + if (p->next) p->next->prev = p; + head = p; + + spin_unlock_bh(&geoip_lock); + return p; +} + +static void remove_node(struct geoip_info *p) + { + spin_lock_bh(&geoip_lock); + + if (p->next) { /* Am I following a node ? */ + p->next->prev = p->prev; + if (p->prev) p->prev->next = p->next; /* Is there a node behind me ? */ + else head = p->next; /* No? Then I was the head */ + } + + else + if (p->prev) /* Is there a node behind me ? */ + p->prev->next = NULL; + else + head = NULL; /* No, we're alone */ + + /* So now am unlinked or the only one alive, right ? + * What are you waiting ? Free up some memory! + */ + + kfree(p->subnets); + kfree(p); + + spin_unlock_bh(&geoip_lock); + return; +} + +static struct geoip_info *find_node(u_int16_t cc) +{ + struct geoip_info *p = head; + spin_lock_bh(&geoip_lock); + + while (p) { + if (p->cc == cc) { + spin_unlock_bh(&geoip_lock); + return p; + } + p = p->next; + } + spin_unlock_bh(&geoip_lock); + return NULL; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_geoip_info *info = matchinfo; + const struct geoip_info *node; /* This keeps the code sexy */ + const struct iphdr *iph = skb->nh.iph; + u_int32_t ip, j; + u_int8_t i; + + if (info->flags & IPT_GEOIP_SRC) + ip = ntohl(iph->saddr); + else + ip = ntohl(iph->daddr); + + spin_lock_bh(&geoip_lock); + for (i = 0; i < info->count; i++) { + if ((node = info->mem[i]) == NULL) { + printk(KERN_ERR "ipt_geoip: what the hell ?? '%c%c' isn't loaded into memory... skip it!\n", + COUNTRY(info->cc[i])); + + continue; + } + + for (j = 0; j < node->count; j++) + if ((ip > node->subnets[j].begin) && (ip < node->subnets[j].end)) { + spin_unlock_bh(&geoip_lock); + return (info->flags & IPT_GEOIP_INV) ? 0 : 1; + } + } + + spin_unlock_bh(&geoip_lock); + return (info->flags & IPT_GEOIP_INV) ? 1 : 0; +} + +static int geoip_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_geoip_info *info = matchinfo; + struct geoip_info *node; + u_int8_t i; + + /* FIXME: Call a function to free userspace allocated memory. + * As Martin J. said; this match might eat lot of memory + * if commited with iptables-restore --noflush + void (*gfree)(struct geoip_info *oldmem); + gfree = info->fini; + */ + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_geoip_info))) { + printk(KERN_ERR "ipt_geoip: matchsize differ, you may have forgotten to recompile me\n"); + return 0; + } + + /* If info->refcount isn't NULL, then + * it means that checkentry() already + * initialized this entry. Increase a + * refcount to prevent destroy() of + * this entry. */ + if (info->refcount != NULL) { + atomic_inc((atomic_t *)info->refcount); + return 1; + } + + + for (i = 0; i < info->count; i++) { + + if ((node = find_node(info->cc[i])) != NULL) + atomic_inc((atomic_t *)&node->ref); //increase the reference + else + if ((node = add_node(info->mem[i])) == NULL) { + printk(KERN_ERR + "ipt_geoip: unable to load '%c%c' into memory\n", + COUNTRY(info->cc[i])); + return 0; + } + + /* Free userspace allocated memory for that country. + * FIXME: It's a bit odd to call this function everytime + * we process a country. Would be nice to call + * it once after all countries've been processed. + * - SJ + * *not implemented for now* + gfree(info->mem[i]); + */ + + /* Overwrite the now-useless pointer info->mem[i] with + * a pointer to the node's kernelspace structure. + * This avoids searching for a node in the match() and + * destroy() functions. + */ + info->mem[i] = node; + } + + /* We allocate some memory and give info->refcount a pointer + * to this memory. This prevents checkentry() from increasing a refcount + * different from the one used by destroy(). + * For explanation, see http://www.mail-archive.com/netfilter-devel@lists.samba.org/msg00625.html + */ + info->refcount = kmalloc(sizeof(u_int8_t), GFP_KERNEL); + if (info->refcount == NULL) { + printk(KERN_ERR "ipt_geoip: failed to allocate `refcount' memory\n"); + return 0; + } + *(info->refcount) = 1; + + return 1; +} + +static void geoip_destroy(void *matchinfo, unsigned int matchsize) +{ + struct ipt_geoip_info *info = matchinfo; + struct geoip_info *node; /* this keeps the code sexy */ + u_int8_t i; + + /* Decrease the previously increased refcount in checkentry() + * If it's equal to 1, we know this entry is just moving + * but not removed. We simply return to avoid useless destroy() + * processing. + */ + atomic_dec((atomic_t *)info->refcount); + if (*info->refcount) + return; + + /* Don't leak my memory, you idiot. + * Bug found with nfsim.. the netfilter's best + * friend. --peejix */ + kfree(info->refcount); + + /* This entry has been removed from the table so + * decrease the refcount of all countries it is + * using. + */ + + for (i = 0; i < info->count; i++) + if ((node = info->mem[i]) != NULL) { + atomic_dec((atomic_t *)&node->ref); + + /* Free up some memory if that node isn't used + * anymore. */ + if (node->ref < 1) + remove_node(node); + } + else + /* Something strange happened. There's no memory allocated for this + * country. Please send this bug to the mailing list. */ + printk(KERN_ERR + "ipt_geoip: What happened peejix ? What happened acidmen ?\n" + "ipt_geoip: please report this bug to the maintainers\n"); + return; +} + +static struct ipt_match geoip_match += { { NULL, NULL }, "geoip", &match, &geoip_checkentry, &geoip_destroy, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&geoip_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&geoip_match); + return; +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c new file mode 100644 index 00000000..c36b2005 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_ipp2p.c @@ -0,0 +1,868 @@ +#if defined(MODVERSIONS) +#include <linux/modversions.h> +#endif +#include <linux/module.h> +#include <linux/version.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_ipp2p.h> +#include <net/tcp.h> +#include <net/udp.h> + +#define get_u8(X,O) (*(__u8 *)(X + O)) +#define get_u16(X,O) (*(__u16 *)(X + O)) +#define get_u32(X,O) (*(__u32 *)(X + O)) + +MODULE_AUTHOR("Eicke Friedrich/Klaus Degner <ipp2p@ipp2p.org>"); +MODULE_DESCRIPTION("An extension to iptables to identify P2P traffic."); +MODULE_LICENSE("GPL"); + + +/*Search for UDP eDonkey/eMule/Kad commands*/ +int +udp_search_edk (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + t += 8; + + switch (t[0]) { + case 0xe3: + { /*edonkey*/ + switch (t[1]) + { + /* client -> server status request */ + case 0x96: + if (packet_len == 14) return ((IPP2P_EDK * 100) + 50); + break; + /* server -> client status request */ + case 0x97: if (packet_len == 42) return ((IPP2P_EDK * 100) + 51); + break; + /* server description request */ + /* e3 2a ff f0 .. | size == 6 */ + case 0xa2: if ( (packet_len == 14) && ( get_u16(t,2) == __constant_htons(0xfff0) ) ) return ((IPP2P_EDK * 100) + 52); + break; + /* server description response */ + /* e3 a3 ff f0 .. | size > 40 && size < 200 */ + //case 0xa3: return ((IPP2P_EDK * 100) + 53); + // break; + case 0x9a: if (packet_len==26) return ((IPP2P_EDK * 100) + 54); + break; + + case 0x92: if (packet_len==18) return ((IPP2P_EDK * 100) + 55); + break; + } + break; + } + case 0xe4: + { + switch (t[1]) + { + /* e4 20 .. | size == 43 */ + case 0x20: if ((packet_len == 43) && (t[2] != 0x00) && (t[34] != 0x00)) return ((IPP2P_EDK * 100) + 60); + break; + /* e4 00 .. 00 | size == 35 ? */ + case 0x00: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 61); + break; + /* e4 10 .. 00 | size == 35 ? */ + case 0x10: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 62); + break; + /* e4 18 .. 00 | size == 35 ? */ + case 0x18: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 63); + break; + /* e4 52 .. | size = 44 */ + case 0x52: if (packet_len == 44 ) return ((IPP2P_EDK * 100) + 64); + break; + /* e4 58 .. | size == 6 */ + case 0x58: if (packet_len == 14 ) return ((IPP2P_EDK * 100) + 65); + break; + /* e4 59 .. | size == 2 */ + case 0x59: if (packet_len == 10 )return ((IPP2P_EDK * 100) + 66); + break; + /* e4 28 .. | packet_len == 52,77,102,127... */ + case 0x28: if (((packet_len-52) % 25) == 0) return ((IPP2P_EDK * 100) + 67); + break; + /* e4 50 xx xx | size == 4 */ + case 0x50: if (packet_len == 12) return ((IPP2P_EDK * 100) + 68); + break; + /* e4 40 xx xx | size == 48 */ + case 0x40: if (packet_len == 56) return ((IPP2P_EDK * 100) + 69); + break; + } + break; + } + } /* end of switch (t[0]) */ + return 0; +}/*udp_search_edk*/ + + +/*Search for UDP Gnutella commands*/ +int +udp_search_gnu (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + t += 8; + + if (memcmp(t, "GND", 3) == 0) return ((IPP2P_GNU * 100) + 51); + if (memcmp(t, "GNUTELLA ", 9) == 0) return ((IPP2P_GNU * 100) + 52); + return 0; +}/*udp_search_gnu*/ + + +/*Search for UDP KaZaA commands*/ +int +udp_search_kazaa (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + + if (t[packet_len-1] == 0x00){ + t += (packet_len - 6); + if (memcmp(t, "KaZaA", 5) == 0) return (IPP2P_KAZAA * 100 +50); + } + + return 0; +}/*udp_search_kazaa*/ + +/*Search for UDP DirectConnect commands*/ +int +udp_search_directconnect (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + if ((*(t + 8) == 0x24) && (*(t + packet_len - 1) == 0x7c)) { + t+=8; + if (memcmp(t, "SR ", 3) == 0) return ((IPP2P_DC * 100) + 60); + if (memcmp(t, "Ping ", 5) == 0) return ((IPP2P_DC * 100) + 61); + } + return 0; +}/*udp_search_directconnect*/ + + + +/*Search for UDP BitTorrent commands*/ +int +udp_search_bit (unsigned char *haystack, int packet_len) +{ + switch(packet_len) + { + case 24: + /* ^ 00 00 04 17 27 10 19 80 */ + if ((ntohl(get_u32(haystack, 8)) == 0x00000417) && (ntohl(get_u32(haystack, 12)) == 0x27101980)) + return (IPP2P_BIT * 100 + 50); + break; + case 44: + if (get_u32(haystack, 16) == __constant_htonl(0x00000400) && get_u32(haystack, 36) == __constant_htonl(0x00000104)) + return (IPP2P_BIT * 100 + 51); + if (get_u32(haystack, 16) == __constant_htonl(0x00000400)) + return (IPP2P_BIT * 100 + 61); + break; + case 65: + if (get_u32(haystack, 16) == __constant_htonl(0x00000404) && get_u32(haystack, 36) == __constant_htonl(0x00000104)) + return (IPP2P_BIT * 100 + 52); + if (get_u32(haystack, 16) == __constant_htonl(0x00000404)) + return (IPP2P_BIT * 100 + 62); + break; + case 67: + if (get_u32(haystack, 16) == __constant_htonl(0x00000406) && get_u32(haystack, 36) == __constant_htonl(0x00000104)) + return (IPP2P_BIT * 100 + 53); + if (get_u32(haystack, 16) == __constant_htonl(0x00000406)) + return (IPP2P_BIT * 100 + 63); + break; + case 211: + if (get_u32(haystack, 8) == __constant_htonl(0x00000405)) + return (IPP2P_BIT * 100 + 54); + break; + case 29: + if ((get_u32(haystack, 8) == __constant_htonl(0x00000401))) + return (IPP2P_BIT * 100 + 55); + break; + case 52: + if (get_u32(haystack,8) == __constant_htonl(0x00000827) && + get_u32(haystack,12) == __constant_htonl(0x37502950)) + return (IPP2P_BIT * 100 + 80); + break; + default: + /* this packet does not have a constant size */ + if (packet_len >= 40 && get_u32(haystack, 16) == __constant_htonl(0x00000402) && get_u32(haystack, 36) == __constant_htonl(0x00000104)) + return (IPP2P_BIT * 100 + 56); + break; + } + + /* some extra-bitcomet rules: + * "d1:" [a|r] "d2:id20:" + */ + if (packet_len > 30 && get_u8(haystack, 8) == 'd' && get_u8(haystack, 9) == '1' && get_u8(haystack, 10) == ':' ) + { + if (get_u8(haystack, 11) == 'a' || get_u8(haystack, 11) == 'r') + { + if (memcmp(haystack+12,"d2:id20:",8)==0) + return (IPP2P_BIT * 100 + 57); + } + } + +#if 0 + /* bitlord rules */ + /* packetlen must be bigger than 40 */ + /* first 4 bytes are zero */ + if (packet_len > 40 && get_u32(haystack, 8) == 0x00000000) + { + /* first rule: 00 00 00 00 01 00 00 xx xx xx xx 00 00 00 00*/ + if (get_u32(haystack, 12) == 0x00000000 && + get_u32(haystack, 16) == 0x00010000 && + get_u32(haystack, 24) == 0x00000000 ) + return (IPP2P_BIT * 100 + 71); + + /* 00 01 00 00 0d 00 00 xx xx xx xx 00 00 00 00*/ + if (get_u32(haystack, 12) == 0x00000001 && + get_u32(haystack, 16) == 0x000d0000 && + get_u32(haystack, 24) == 0x00000000 ) + return (IPP2P_BIT * 100 + 71); + + + } +#endif + + return 0; +}/*udp_search_bit*/ + + + +/*Search for Ares commands*/ +//#define IPP2P_DEBUG_ARES +int +search_ares (const unsigned char *payload, const u16 plen) +//int search_ares (unsigned char *haystack, int packet_len, int head_len) +{ +// const unsigned char *t = haystack + head_len; + + /* all ares packets start with */ + if (payload[1] == 0 && (plen - payload[0]) == 3) + { + switch (payload[2]) + { + case 0x5a: + /* ares connect */ + if ( plen == 6 && payload[5] == 0x05 ) return ((IPP2P_ARES * 100) + 1); + break; + case 0x09: + /* ares search, min 3 chars --> 14 bytes + * lets define a search can be up to 30 chars --> max 34 bytes + */ + if ( plen >= 14 && plen <= 34 ) return ((IPP2P_ARES * 100) + 1); + break; +#ifdef IPP2P_DEBUG_ARES + default: + printk(KERN_DEBUG "Unknown Ares command %x recognized, len: %u \n", (unsigned int) payload[2],plen); +#endif /* IPP2P_DEBUG_ARES */ + } + } + +#if 0 + /* found connect packet: 03 00 5a 04 03 05 */ + /* new version ares 1.8: 03 00 5a xx xx 05 */ + if ((plen) == 6){ /* possible connect command*/ + if ((payload[0] == 0x03) && (payload[1] == 0x00) && (payload[2] == 0x5a) && (payload[5] == 0x05)) + return ((IPP2P_ARES * 100) + 1); + } + if ((plen) == 60){ /* possible download command*/ + if ((payload[59] == 0x0a) && (payload[58] == 0x0a)){ + if (memcmp(t, "PUSH SHA1:", 10) == 0) /* found download command */ + return ((IPP2P_ARES * 100) + 2); + } + } +#endif + + return 0; +} /*search_ares*/ + +/*Search for SoulSeek commands*/ +int +search_soul (const unsigned char *payload, const u16 plen) +{ +//#define IPP2P_DEBUG_SOUL + /* match: xx xx xx xx | xx = sizeof(payload) - 4 */ + if (get_u32(payload, 0) == (plen - 4)){ + const __u32 m=get_u32(payload, 4); + /* match 00 yy yy 00, yy can be everything */ + if ( get_u8(payload, 4) == 0x00 && get_u8(payload, 7) == 0x00 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "0: Soulseek command 0x%x recognized\n",get_u32(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 1); + } + + /* next match: 01 yy 00 00 | yy can be everything */ + if ( get_u8(payload, 4) == 0x01 && get_u16(payload, 6) == 0x0000 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "1: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 2); + } + + /* other soulseek commandos are: 1-5,7,9,13-18,22,23,26,28,35-37,40-46,50,51,60,62-69,91,92,1001 */ + /* try to do this in an intelligent way */ + /* get all small commandos */ + switch(m) + { + case 7: + case 9: + case 22: + case 23: + case 26: + case 28: + case 50: + case 51: + case 60: + case 91: + case 92: + case 1001: +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "2: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 3); + } + + if (m > 0 && m < 6 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "3: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 4); + } + if (m > 12 && m < 19 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "4: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 5); + } + + if (m > 34 && m < 38 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "5: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 6); + } + + if (m > 39 && m < 47 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "6: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 7); + } + + if (m > 61 && m < 70 ) + { +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "7: Soulseek command 0x%x recognized\n",get_u16(payload, 4)); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 8); + } + +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "unknown SOULSEEK command: 0x%x, first 16 bit: 0x%x, first 8 bit: 0x%x ,soulseek ???\n",get_u32(payload, 4),get_u16(payload, 4) >> 16,get_u8(payload, 4) >> 24); +#endif /* IPP2P_DEBUG_SOUL */ + } + + /* match 14 00 00 00 01 yy 00 00 00 STRING(YY) 01 00 00 00 00 46|50 00 00 00 00 */ + /* without size at the beginning !!! */ + if ( get_u32(payload, 0) == 0x14 && get_u8(payload, 4) == 0x01 ) + { + __u32 y=get_u32(payload, 5); + /* we need 19 chars + string */ + if ( (y + 19) <= (plen) ) + { + const unsigned char *w=payload+9+y; + if (get_u32(w, 0) == 0x01 && ( get_u16(w, 4) == 0x4600 || get_u16(w, 4) == 0x5000) && get_u32(w, 6) == 0x00); +#ifdef IPP2P_DEBUG_SOUL + printk(KERN_DEBUG "Soulssek special client command recognized\n"); +#endif /* IPP2P_DEBUG_SOUL */ + return ((IPP2P_SOUL * 100) + 9); + } + } + return 0; +} + + +/*Search for WinMX commands*/ +int +search_winmx (const unsigned char *payload, const u16 plen) +{ +//#define IPP2P_DEBUG_WINMX + if (((plen) == 4) && (memcmp(payload, "SEND", 4) == 0)) return ((IPP2P_WINMX * 100) + 1); + if (((plen) == 3) && (memcmp(payload, "GET", 3) == 0)) return ((IPP2P_WINMX * 100) + 2); + //if (packet_len < (head_len + 10)) return 0; + if (plen < 10) return 0; + + if ((memcmp(payload, "SEND", 4) == 0) || (memcmp(payload, "GET", 3) == 0)){ + u16 c=4; + const u16 end=plen-2; + u8 count=0; + while (c < end) + { + if (payload[c]== 0x20 && payload[c+1] == 0x22) + { + c++; + count++; + if (count>=2) return ((IPP2P_WINMX * 100) + 3); + } + c++; + } + } + + if ( plen == 149 && payload[0] == '8' ) + { +#ifdef IPP2P_DEBUG_WINMX + printk(KERN_INFO "maybe WinMX\n"); +#endif + if (get_u32(payload,17) == 0 && get_u32(payload,21) == 0 && get_u32(payload,25) == 0 && +// get_u32(payload,33) == __constant_htonl(0x71182b1a) && get_u32(payload,37) == __constant_htonl(0x05050000) && +// get_u32(payload,133) == __constant_htonl(0x31097edf) && get_u32(payload,145) == __constant_htonl(0xdcb8f792)) + get_u16(payload,39) == 0 && get_u16(payload,135) == __constant_htons(0x7edf) && get_u16(payload,147) == __constant_htons(0xf792)) + + { +#ifdef IPP2P_DEBUG_WINMX + printk(KERN_INFO "got WinMX\n"); +#endif + return ((IPP2P_WINMX * 100) + 4); + } + } + return 0; +} /*search_winmx*/ + + +/*Search for appleJuice commands*/ +int +search_apple (const unsigned char *payload, const u16 plen) +{ + if ( (plen > 7) && (payload[6] == 0x0d) && (payload[7] == 0x0a) && (memcmp(payload, "ajprot", 6) == 0)) return (IPP2P_APPLE * 100); + + return 0; +} + + +/*Search for BitTorrent commands*/ +int +search_bittorrent (const unsigned char *payload, const u16 plen) +{ + if (plen > 20) + { + /* test for match 0x13+"BitTorrent protocol" */ + if (payload[0] == 0x13) + { + if (memcmp(payload+1, "BitTorrent protocol", 19) == 0) return (IPP2P_BIT * 100); + } + + /* get tracker commandos, all starts with GET / + * then it can follow: scrape| announce + * and then ?hash_info= + */ + if (memcmp(payload,"GET /",5) == 0) + { + /* message scrape */ + if ( memcmp(payload+5,"scrape?info_hash=",17)==0 ) return (IPP2P_BIT * 100 + 1); + /* message announce */ + if ( memcmp(payload+5,"announce?info_hash=",19)==0 ) return (IPP2P_BIT * 100 + 2); + } + } + else + { + /* bitcomet encryptes the first packet, so we have to detect another + * one later in the flow */ + /* first try failed, too many missdetections */ + //if ( size == 5 && get_u32(t,0) == __constant_htonl(1) && t[4] < 3) return (IPP2P_BIT * 100 + 3); + + /* second try: block request packets */ + if ( plen == 17 && get_u32(payload,0) == __constant_htonl(0x0d) && payload[4] == 0x06 && get_u32(payload,13) == __constant_htonl(0x4000) ) return (IPP2P_BIT * 100 + 3); + } + + return 0; +} + + + +/*check for Kazaa get command*/ +int +search_kazaa (const unsigned char *payload, const u16 plen) + +{ + if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a) && memcmp(payload, "GET /.hash=", 11) == 0) + return (IPP2P_DATA_KAZAA * 100); + + return 0; +} + + +/*check for gnutella get command*/ +int +search_gnu (const unsigned char *payload, const u16 plen) +{ + if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a)) + { + if (memcmp(payload, "GET /get/", 9) == 0) return ((IPP2P_DATA_GNU * 100) + 1); + if (memcmp(payload, "GET /uri-res/", 13) == 0) return ((IPP2P_DATA_GNU * 100) + 2); + } + return 0; +} + + +/*check for gnutella get commands and other typical data*/ +int +search_all_gnu (const unsigned char *payload, const u16 plen) +{ + + if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a)) + { + + if (memcmp(payload, "GNUTELLA CONNECT/", 17) == 0) return ((IPP2P_GNU * 100) + 1); + if (memcmp(payload, "GNUTELLA/", 9) == 0) return ((IPP2P_GNU * 100) + 2); + + + if ((memcmp(payload, "GET /get/", 9) == 0) || (memcmp(payload, "GET /uri-res/", 13) == 0)) + { + u16 c=8; + const u16 end=plen-22; + while (c < end) { + if ( payload[c] == 0x0a && payload[c+1] == 0x0d && ((memcmp(&payload[c+2], "X-Gnutella-", 11) == 0) || (memcmp(&payload[c+2], "X-Queue:", 8) == 0))) + return ((IPP2P_GNU * 100) + 3); + c++; + } + } + } + return 0; +} + + +/*check for KaZaA download commands and other typical data*/ +int +search_all_kazaa (const unsigned char *payload, const u16 plen) +{ + if ((payload[plen-2] == 0x0d) && (payload[plen-1] == 0x0a)) + { + + if (memcmp(payload, "GIVE ", 5) == 0) return ((IPP2P_KAZAA * 100) + 1); + + if (memcmp(payload, "GET /", 5) == 0) { + u16 c = 8; + const u16 end=plen-22; + while (c < end) { + if ( payload[c] == 0x0a && payload[c+1] == 0x0d && ((memcmp(&payload[c+2], "X-Kazaa-Username: ", 18) == 0) || (memcmp(&payload[c+2], "User-Agent: PeerEnabler/", 24) == 0))) + return ((IPP2P_KAZAA * 100) + 2); + c++; + } + } + } + return 0; +} + +/*fast check for edonkey file segment transfer command*/ +int +search_edk (const unsigned char *payload, const u16 plen) +{ + if (payload[0] != 0xe3) + return 0; + else { + if (payload[5] == 0x47) + return (IPP2P_DATA_EDK * 100); + else + return 0; + } +} + + + +/*intensive but slower search for some edonkey packets including size-check*/ +int +search_all_edk (const unsigned char *payload, const u16 plen) +{ + if (payload[0] != 0xe3) + return 0; + else { + //t += head_len; + const u16 cmd = get_u16(payload, 1); + if (cmd == (plen - 5)) { + switch (payload[5]) { + case 0x01: return ((IPP2P_EDK * 100) + 1); /*Client: hello or Server:hello*/ + case 0x4c: return ((IPP2P_EDK * 100) + 9); /*Client: Hello-Answer*/ + } + } + return 0; + } +} + + +/*fast check for Direct Connect send command*/ +int +search_dc (const unsigned char *payload, const u16 plen) +{ + + if (payload[0] != 0x24 ) + return 0; + else { + if (memcmp(&payload[1], "Send|", 5) == 0) + return (IPP2P_DATA_DC * 100); + else + return 0; + } + +} + + +/*intensive but slower check for all direct connect packets*/ +int +search_all_dc (const unsigned char *payload, const u16 plen) +{ +// unsigned char *t = haystack; + + if (payload[0] == 0x24 && payload[plen-1] == 0x7c) + { + const unsigned char *t=&payload[1]; + /* Client-Hub-Protocol */ + if (memcmp(t, "Lock ", 5) == 0) return ((IPP2P_DC * 100) + 1); + /* Client-Client-Protocol, some are already recognized by client-hub (like lock) */ + if (memcmp(t, "MyNick ", 7) == 0) return ((IPP2P_DC * 100) + 38); + } + return 0; +} + +/*check for mute*/ +int +search_mute (const unsigned char *payload, const u16 plen) +{ + if ( plen == 209 || plen == 345 || plen == 473 || plen == 609 || plen == 1121 ) + { + //printk(KERN_DEBUG "size hit: %u",size); + if (memcmp(payload,"PublicKey: ",11) == 0 ) + { + return ((IPP2P_MUTE * 100) + 0); + +/* if (memcmp(t+size-14,"\x0aEndPublicKey\x0a",14) == 0) + { + printk(KERN_DEBUG "end pubic key hit: %u",size); + + }*/ + } + } + return 0; +} + + +/* check for xdcc */ +int +search_xdcc (const unsigned char *payload, const u16 plen) +{ + /* search in small packets only */ + if (plen > 20 && plen < 200 && payload[plen-1] == 0x0a && payload[plen-2] == 0x0d && memcmp(payload,"PRIVMSG ",8) == 0) + { + + u16 x=10; + const u16 end=plen - 13; + + /* is seems to be a irc private massage, chedck for xdcc command */ + while (x < end) + { + if (payload[x] == ':') + { + if ( memcmp(&payload[x+1],"xdcc send #",11) == 0 ) + return ((IPP2P_XDCC * 100) + 0); + } + x++; + } + } + return 0; +} + +/* search for waste */ +int search_waste(const unsigned char *payload, const u16 plen) +{ + if ( plen >= 8 && memcmp(payload,"GET.sha1:",9) == 0) + return ((IPP2P_WASTE * 100) + 0); + + return 0; +} + + +static struct { + int command; + __u8 short_hand; /*for fucntions included in short hands*/ + int packet_len; + int (*function_name) (const unsigned char *, const u16); +} matchlist[] = { + {IPP2P_EDK,SHORT_HAND_IPP2P,20, &search_all_edk}, +// {IPP2P_DATA_KAZAA,SHORT_HAND_DATA,200, &search_kazaa}, +// {IPP2P_DATA_EDK,SHORT_HAND_DATA,60, &search_edk}, +// {IPP2P_DATA_DC,SHORT_HAND_DATA,26, &search_dc}, + {IPP2P_DC,SHORT_HAND_IPP2P,5, search_all_dc}, +// {IPP2P_DATA_GNU,SHORT_HAND_DATA,40, &search_gnu}, + {IPP2P_GNU,SHORT_HAND_IPP2P,5, &search_all_gnu}, + {IPP2P_KAZAA,SHORT_HAND_IPP2P,5, &search_all_kazaa}, + {IPP2P_BIT,SHORT_HAND_IPP2P,20, &search_bittorrent}, + {IPP2P_APPLE,SHORT_HAND_IPP2P,5, &search_apple}, + {IPP2P_SOUL,SHORT_HAND_IPP2P,5, &search_soul}, + {IPP2P_WINMX,SHORT_HAND_IPP2P,2, &search_winmx}, + {IPP2P_ARES,SHORT_HAND_IPP2P,5, &search_ares}, + {IPP2P_MUTE,SHORT_HAND_NONE,200, &search_mute}, + {IPP2P_WASTE,SHORT_HAND_NONE,5, &search_waste}, + {IPP2P_XDCC,SHORT_HAND_NONE,5, &search_xdcc}, + {0,0,0,NULL} +}; + + +static struct { + int command; + __u8 short_hand; /*for fucntions included in short hands*/ + int packet_len; + int (*function_name) (unsigned char *, int); +} udp_list[] = { + {IPP2P_KAZAA,SHORT_HAND_IPP2P,14, &udp_search_kazaa}, + {IPP2P_BIT,SHORT_HAND_IPP2P,23, &udp_search_bit}, + {IPP2P_GNU,SHORT_HAND_IPP2P,11, &udp_search_gnu}, + {IPP2P_EDK,SHORT_HAND_IPP2P,9, &udp_search_edk}, + {IPP2P_DC,SHORT_HAND_IPP2P,12, &udp_search_directconnect}, + {0,0,0,NULL} +}; + + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + const void *hdr, + u_int16_t datalen, +#endif + + int *hotdrop) +{ + const struct ipt_p2p_info *info = matchinfo; + unsigned char *haystack; + struct iphdr *ip = skb->nh.iph; + int p2p_result = 0, i = 0; +// int head_len; + int hlen = ntohs(ip->tot_len)-(ip->ihl*4); /*hlen = packet-data length*/ + + /*must not be a fragment*/ + if (offset) { + if (info->debug) printk("IPP2P.match: offset found %i \n",offset); + return 0; + } + + /*make sure that skb is linear*/ + if(skb_is_nonlinear(skb)){ + if (info->debug) printk("IPP2P.match: nonlinear skb found\n"); + return 0; + } + + + haystack=(char *)ip+(ip->ihl*4); /*haystack = packet data*/ + + switch (ip->protocol){ + case IPPROTO_TCP: /*what to do with a TCP packet*/ + { + struct tcphdr *tcph = (void *) ip + ip->ihl * 4; + + if (tcph->fin) return 0; /*if FIN bit is set bail out*/ + if (tcph->syn) return 0; /*if SYN bit is set bail out*/ + if (tcph->rst) return 0; /*if RST bit is set bail out*/ + + haystack += tcph->doff * 4; /*get TCP-Header-Size*/ + hlen -= tcph->doff * 4; + while (matchlist[i].command) { + if ((((info->cmd & matchlist[i].command) == matchlist[i].command) || + ((info->cmd & matchlist[i].short_hand) == matchlist[i].short_hand)) && + (hlen > matchlist[i].packet_len)) { + p2p_result = matchlist[i].function_name(haystack, hlen); + if (p2p_result) + { + if (info->debug) printk("IPP2P.debug:TCP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n", + p2p_result, NIPQUAD(ip->saddr),ntohs(tcph->source), NIPQUAD(ip->daddr),ntohs(tcph->dest),hlen); + return p2p_result; + } + } + i++; + } + return p2p_result; + } + + case IPPROTO_UDP: /*what to do with an UDP packet*/ + { + struct udphdr *udph = (void *) ip + ip->ihl * 4; + + while (udp_list[i].command){ + if ((((info->cmd & udp_list[i].command) == udp_list[i].command) || + ((info->cmd & udp_list[i].short_hand) == udp_list[i].short_hand)) && + (hlen > udp_list[i].packet_len)) { + p2p_result = udp_list[i].function_name(haystack, hlen); + if (p2p_result){ + if (info->debug) printk("IPP2P.debug:UDP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n", + p2p_result, NIPQUAD(ip->saddr),ntohs(udph->source), NIPQUAD(ip->daddr),ntohs(udph->dest),hlen); + return p2p_result; + } + } + i++; + } + return p2p_result; + } + + default: return 0; + } +} + + + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + /* Must specify -p tcp */ +/* if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) { + * printk("ipp2p: Only works on TCP packets, use -p tcp\n"); + * return 0; + * }*/ + return 1; +} + + + + +static struct ipt_match ipp2p_match = { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + { NULL, NULL }, + "ipp2p", + &match, + &checkentry, + NULL, + THIS_MODULE +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + .name = "ipp2p", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +#endif +}; + + +static int __init init(void) +{ + printk(KERN_INFO "IPP2P v%s loading\n", IPP2P_VERSION); + return ipt_register_match(&ipp2p_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipp2p_match); + printk(KERN_INFO "IPP2P v%s unloaded\n", IPP2P_VERSION); +} + +module_init(init); +module_exit(fini); + + diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c new file mode 100644 index 00000000..38902524 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_iprange.c @@ -0,0 +1,101 @@ +/* + * iptables module to match IP address ranges + * (c) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * Released under the terms of GNU GPLv2. + * + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_iprange.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("iptables arbitrary IP range match module"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_iprange_info *info = matchinfo; + const struct iphdr *iph = skb->nh.iph; + + + if (info->flags & IPRANGE_SRC) { + if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) + || (ntohl(iph->saddr) > ntohl(info->src.max_ip))) + ^ !!(info->flags & IPRANGE_SRC_INV)) { + DEBUGP("src IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->saddr), + info->flags & IPRANGE_SRC_INV ? "(INV) " : "", + NIPQUAD(info->src.min_ip), + NIPQUAD(info->src.max_ip)); + return 0; + } + } + if (info->flags & IPRANGE_DST) { + if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip)) + || (ntohl(iph->daddr) > ntohl(info->dst.max_ip))) + ^ !!(info->flags & IPRANGE_DST_INV)) { + DEBUGP("dst IP %u.%u.%u.%u NOT in range %s" + "%u.%u.%u.%u-%u.%u.%u.%u\n", + NIPQUAD(iph->daddr), + info->flags & IPRANGE_DST_INV ? "(INV) " : "", + NIPQUAD(info->dst.min_ip), + NIPQUAD(info->dst.max_ip)); + return 0; + } + } + return 1; +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + /* verify size */ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_iprange_info))) + return 0; + + return 1; +} + +static struct ipt_match iprange_match = +{ + .list = { NULL, NULL }, + .name = "iprange", + .match = &match, + .checkentry = &check, + .destroy = NULL, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&iprange_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&iprange_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c new file mode 100644 index 00000000..567e3847 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_layer7.c @@ -0,0 +1,570 @@ +/* + Kernel module to match application layer (OSI layer 7) + data in connections. + + http://l7-filter.sf.net + + By Matthew Strait and Ethan Sommer, 2003-2005. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version + 2 of the License, or (at your option) any later version. + http://www.gnu.org/licenses/gpl.txt + + Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be> + and cls_layer7.c (C) 2003 Matthew Strait, Ethan Sommer, Justin Levandoski +*/ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/proc_fs.h> +#include <linux/ctype.h> +#include <net/ip.h> +#include <net/tcp.h> +#include <linux/netfilter_ipv4/lockhelp.h> + +#include "regexp/regexp.c" + +#include <linux/netfilter_ipv4/ipt_layer7.h> +#include <linux/netfilter_ipv4/ip_tables.h> + +MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables application layer match module"); + +static int maxdatalen = 2048; // this is the default +MODULE_PARM(maxdatalen,"i"); +MODULE_PARM_DESC(maxdatalen,"maximum bytes of data looked at by l7-filter"); + +#if defined(CONFIG_IP_NF_MATCH_LAYER7_DEBUG) + #define DPRINTK(format,args...) printk(format,##args) +#else + #define DPRINTK(format,args...) +#endif + +#define TOTAL_PACKETS master_conntrack->layer7.numpackets + +/* Number of packets whose data we look at. +This can be modified through /proc/net/layer7_numpackets */ +static int num_packets = 10; + +static struct pattern_cache { + char * regex_string; + regexp * pattern; + struct pattern_cache * next; +} * first_pattern_cache = NULL; + +/* I'm new to locking. Here are my assumptions: + +- No one will write to /proc/net/layer7_numpackets over and over very fast; + if they did, nothing awful would happen. + +- This code will never be processing the same packet twice at the same time, + because iptables rules are traversed in order. + +- It doesn't matter if two packets from different connections are in here at + the same time, because they don't share any data. + +- It _does_ matter if two packets from the same connection are here at the same + time. In this case, we have to protect the conntracks and the list of + compiled patterns. +*/ +DECLARE_RWLOCK(ct_lock); +DECLARE_LOCK(list_lock); + +#if CONFIG_IP_NF_MATCH_LAYER7_DEBUG +/* Converts an unfriendly string into a friendly one by +replacing unprintables with periods and all whitespace with " ". */ +static char * friendly_print(unsigned char * s) +{ + char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC); + int i; + + if(!f) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in friendly_print, bailing.\n"); + return NULL; + } + + for(i = 0; i < strlen(s); i++){ + if(isprint(s[i]) && s[i] < 128) f[i] = s[i]; + else if(isspace(s[i])) f[i] = ' '; + else f[i] = '.'; + } + f[i] = '\0'; + return f; +} + +static char dec2hex(int i) +{ + switch (i) { + case 0 ... 9: + return (char)(i + '0'); + break; + case 10 ... 15: + return (char)(i - 10 + 'a'); + break; + default: + if (net_ratelimit()) + printk("Problem in dec2hex\n"); + return '\0'; + } +} + +static char * hex_print(unsigned char * s) +{ + char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC); + int i; + + if(!g) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in hex_print, bailing.\n"); + return NULL; + } + + for(i = 0; i < strlen(s); i++) { + g[i*3 ] = dec2hex(s[i]/16); + g[i*3 + 1] = dec2hex(s[i]%16); + g[i*3 + 2] = ' '; + } + g[i*3] = '\0'; + + return g; +} +#endif // DEBUG + +/* Use instead of regcomp. As we expect to be seeing the same regexps over and +over again, it make sense to cache the results. */ +static regexp * compile_and_cache(char * regex_string, char * protocol) +{ + struct pattern_cache * node = first_pattern_cache; + struct pattern_cache * last_pattern_cache = first_pattern_cache; + struct pattern_cache * tmp; + unsigned int len; + + while (node != NULL) { + if (!strcmp(node->regex_string, regex_string)) + return node->pattern; + + last_pattern_cache = node;/* points at the last non-NULL node */ + node = node->next; + } + + /* If we reach the end of the list, then we have not yet cached + the pattern for this regex. Let's do that now. + Be paranoid about running out of memory to avoid list corruption. */ + tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC); + + if(!tmp) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); + return NULL; + } + + tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC); + tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC); + tmp->next = NULL; + + if(!tmp->regex_string || !tmp->pattern) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); + kfree(tmp->regex_string); + kfree(tmp->pattern); + kfree(tmp); + return NULL; + } + + /* Ok. The new node is all ready now. */ + node = tmp; + + if(first_pattern_cache == NULL) /* list is empty */ + first_pattern_cache = node; /* make node the beginning */ + else + last_pattern_cache->next = node; /* attach node to the end */ + + /* copy the string and compile the regex */ + len = strlen(regex_string); + DPRINTK("About to compile this: \"%s\"\n", regex_string); + node->pattern = regcomp(regex_string, &len); + if ( !node->pattern ) { + if (net_ratelimit()) + printk(KERN_ERR "layer7: Error compiling regexp \"%s\" (%s)\n", regex_string, protocol); + /* pattern is now cached as NULL, so we won't try again. */ + } + + strcpy(node->regex_string, regex_string); + return node->pattern; +} + +static int can_handle(const struct sk_buff *skb) +{ + if(!skb->nh.iph) /* not IP */ + return 0; + if(skb->nh.iph->protocol != IPPROTO_TCP && + skb->nh.iph->protocol != IPPROTO_UDP && + skb->nh.iph->protocol != IPPROTO_ICMP) + return 0; + return 1; +} + +/* Returns offset the into the skb->data that the application data starts */ +static int app_data_offset(const struct sk_buff *skb) +{ + /* In case we are ported somewhere (ebtables?) where skb->nh.iph + isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */ + int ip_hl = 4*skb->nh.iph->ihl; + + if( skb->nh.iph->protocol == IPPROTO_TCP ) { + /* 12 == offset into TCP header for the header length field. + Can't get this with skb->h.th->doff because the tcphdr + struct doesn't get set when routing (this is confirmed to be + true in Netfilter as well as QoS.) */ + int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4); + + return ip_hl + tcp_hl; + } else if( skb->nh.iph->protocol == IPPROTO_UDP ) { + return ip_hl + 8; /* UDP header is always 8 bytes */ + } else if( skb->nh.iph->protocol == IPPROTO_ICMP ) { + return ip_hl + 8; /* ICMP header is 8 bytes */ + } else { + if (net_ratelimit()) + printk(KERN_ERR "layer7: tried to handle unknown protocol!\n"); + return ip_hl + 8; /* something reasonable */ + } +} + +/* handles whether there's a match when we aren't appending data anymore */ +static int match_no_append(struct ip_conntrack * conntrack, struct ip_conntrack * master_conntrack, + enum ip_conntrack_info ctinfo, enum ip_conntrack_info master_ctinfo, + struct ipt_layer7_info * info) +{ + /* If we're in here, throw the app data away */ + WRITE_LOCK(&ct_lock); + if(master_conntrack->layer7.app_data != NULL) { + + #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG + if(!master_conntrack->layer7.app_proto) { + char * f = friendly_print(master_conntrack->layer7.app_data); + char * g = hex_print(master_conntrack->layer7.app_data); + DPRINTK("\nl7-filter gave up after %d bytes (%d packets):\n%s\n", + strlen(f), + TOTAL_PACKETS, f); + kfree(f); + DPRINTK("In hex: %s\n", g); + kfree(g); + } + #endif + + kfree(master_conntrack->layer7.app_data); + master_conntrack->layer7.app_data = NULL; /* don't free again */ + } + WRITE_UNLOCK(&ct_lock); + + if(master_conntrack->layer7.app_proto){ + /* Here child connections set their .app_proto (for /proc/net/ip_conntrack) */ + WRITE_LOCK(&ct_lock); + if(!conntrack->layer7.app_proto) { + conntrack->layer7.app_proto = kmalloc(strlen(master_conntrack->layer7.app_proto)+1, GFP_ATOMIC); + if(!conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); + WRITE_UNLOCK(&ct_lock); + return 1; + } + strcpy(conntrack->layer7.app_proto, master_conntrack->layer7.app_proto); + } + WRITE_UNLOCK(&ct_lock); + + return (!strcmp(master_conntrack->layer7.app_proto, info->protocol)); + } + else { + /* If not classified, set to "unknown" to distinguish from + connections that are still being tested. */ + WRITE_LOCK(&ct_lock); + master_conntrack->layer7.app_proto = kmalloc(strlen("unknown")+1, GFP_ATOMIC); + if(!master_conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); + WRITE_UNLOCK(&ct_lock); + return 1; + } + strcpy(master_conntrack->layer7.app_proto, "unknown"); + WRITE_UNLOCK(&ct_lock); + return 0; + } +} + +/* add the new app data to the conntrack. Return number of bytes added. */ +static int add_data(struct ip_conntrack * master_conntrack, + char * app_data, int appdatalen) +{ + int length = 0, i; + int oldlength = master_conntrack->layer7.app_data_len; + + /* Strip nulls. Make everything lower case (our regex lib doesn't + do case insensitivity). Add it to the end of the current data. */ + for(i = 0; i < maxdatalen-oldlength-1 && i < appdatalen; i++) { + if(app_data[i] != '\0') { + master_conntrack->layer7.app_data[length+oldlength] = + /* the kernel version of tolower mungs 'upper ascii' */ + isascii(app_data[i])? tolower(app_data[i]) : app_data[i]; + length++; + } + } + + master_conntrack->layer7.app_data[length+oldlength] = '\0'; + master_conntrack->layer7.app_data_len = length + oldlength; + + return length; +} + +/* Returns true on match and false otherwise. */ +static int match(/* const */struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *matchinfo, + int offset, int *hotdrop) +{ + struct ipt_layer7_info * info = (struct ipt_layer7_info *)matchinfo; + enum ip_conntrack_info master_ctinfo, ctinfo; + struct ip_conntrack *master_conntrack, *conntrack; + unsigned char * app_data; + unsigned int pattern_result, appdatalen; + regexp * comppattern; + + if(!can_handle(skb)){ + DPRINTK("layer7: This is some protocol I can't handle.\n"); + return info->invert; + } + + /* Treat the parent and all its children together as one connection, + except for the purpose of setting conntrack->layer7.app_proto in the + actual connection. This makes /proc/net/ip_conntrack somewhat more + satisfying. */ + if(!(conntrack = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) || + !(master_conntrack = ip_conntrack_get((struct sk_buff *)skb, &master_ctinfo))) { + DPRINTK("layer7: packet is not from a known connection, giving up.\n"); + return info->invert; + } + + /* Try to get a master conntrack (and its master etc) for FTP, etc. */ + while (master_ct(master_conntrack) != NULL) + master_conntrack = master_ct(master_conntrack); + + if(!skb->cb[0]){ + WRITE_LOCK(&ct_lock); + master_conntrack->layer7.numpackets++;/*starts at 0 via memset*/ + WRITE_UNLOCK(&ct_lock); + } + + /* if we've classified it or seen too many packets */ + if(TOTAL_PACKETS > num_packets || + master_conntrack->layer7.app_proto) { + + pattern_result = match_no_append(conntrack, master_conntrack, ctinfo, master_ctinfo, info); + + /* skb->cb[0] == seen. Avoid doing things twice if there are two l7 + rules. I'm not sure that using cb for this purpose is correct, although + it says "put your private variables there". But it doesn't look like it + is being used for anything else in the skbs that make it here. How can + I write to cb without making the compiler angry? */ + skb->cb[0] = 1; /* marking it seen here is probably irrelevant, but consistant */ + + return (pattern_result ^ info->invert); + } + + if(skb_is_nonlinear(skb)){ + if(skb_linearize(skb, GFP_ATOMIC) != 0){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: failed to linearize packet, bailing.\n"); + return info->invert; + } + } + + /* now that the skb is linearized, it's safe to set these. */ + app_data = skb->data + app_data_offset(skb); + appdatalen = skb->tail - app_data; + + LOCK_BH(&list_lock); + /* the return value gets checked later, when we're ready to use it */ + comppattern = compile_and_cache(info->pattern, info->protocol); + UNLOCK_BH(&list_lock); + + /* On the first packet of a connection, allocate space for app data */ + WRITE_LOCK(&ct_lock); + if(TOTAL_PACKETS == 1 && !skb->cb[0] && !master_conntrack->layer7.app_data) { + master_conntrack->layer7.app_data = kmalloc(maxdatalen, GFP_ATOMIC); + if(!master_conntrack->layer7.app_data){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); + WRITE_UNLOCK(&ct_lock); + return info->invert; + } + + master_conntrack->layer7.app_data[0] = '\0'; + } + WRITE_UNLOCK(&ct_lock); + + /* Can be here, but unallocated, if numpackets is increased near + the beginning of a connection */ + if(master_conntrack->layer7.app_data == NULL) + return (info->invert); /* unmatched */ + + if(!skb->cb[0]){ + int newbytes; + WRITE_LOCK(&ct_lock); + newbytes = add_data(master_conntrack, app_data, appdatalen); + WRITE_UNLOCK(&ct_lock); + + if(newbytes == 0) { /* didn't add any data */ + skb->cb[0] = 1; + /* Didn't match before, not going to match now */ + return info->invert; + } + } + + /* If looking for "unknown", then never match. "Unknown" means that + we've given up; we're still trying with these packets. */ + if(!strcmp(info->protocol, "unknown")) { + pattern_result = 0; + /* If the regexp failed to compile, don't bother running it */ + } else if(comppattern && regexec(comppattern, master_conntrack->layer7.app_data)) { + DPRINTK("layer7: regexec positive: %s!\n", info->protocol); + pattern_result = 1; + } else pattern_result = 0; + + if(pattern_result) { + WRITE_LOCK(&ct_lock); + master_conntrack->layer7.app_proto = kmalloc(strlen(info->protocol)+1, GFP_ATOMIC); + if(!master_conntrack->layer7.app_proto){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); + WRITE_UNLOCK(&ct_lock); + return (pattern_result ^ info->invert); + } + strcpy(master_conntrack->layer7.app_proto, info->protocol); + WRITE_UNLOCK(&ct_lock); + } + + /* mark the packet seen */ + skb->cb[0] = 1; + + return (pattern_result ^ info->invert); +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_layer7_info))) + return 0; + return 1; +} + +static struct ipt_match layer7_match = { + .name = "layer7", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE +}; + +/* taken from drivers/video/modedb.c */ +static int my_atoi(const char *s) +{ + int val = 0; + + for (;; s++) { + switch (*s) { + case '0'...'9': + val = 10*val+(*s-'0'); + break; + default: + return val; + } + } +} + +/* write out num_packets to userland. */ +static int layer7_read_proc(char* page, char ** start, off_t off, int count, + int* eof, void * data) +{ + if(num_packets > 99 && net_ratelimit()) + printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n"); + + page[0] = num_packets/10 + '0'; + page[1] = num_packets%10 + '0'; + page[2] = '\n'; + page[3] = '\0'; + + *eof=1; + + return 3; +} + +/* Read in num_packets from userland */ +static int layer7_write_proc(struct file* file, const char* buffer, + unsigned long count, void *data) +{ + char * foo = kmalloc(count, GFP_ATOMIC); + + if(!foo){ + if (net_ratelimit()) + printk(KERN_ERR "layer7: out of memory, bailing. num_packets unchanged.\n"); + return count; + } + + copy_from_user(foo, buffer, count); + + num_packets = my_atoi(foo); + kfree (foo); + + /* This has an arbitrary limit to make the math easier. I'm lazy. + But anyway, 99 is a LOT! If you want more, you're doing it wrong! */ + if(num_packets > 99) { + printk(KERN_WARNING "layer7: num_packets can't be > 99.\n"); + num_packets = 99; + } else if(num_packets < 1) { + printk(KERN_WARNING "layer7: num_packets can't be < 1.\n"); + num_packets = 1; + } + + return count; +} + +/* register the proc file */ +static void layer7_init_proc(void) +{ + struct proc_dir_entry* entry; + entry = create_proc_entry("layer7_numpackets", 0644, proc_net); + entry->read_proc = layer7_read_proc; + entry->write_proc = layer7_write_proc; +} + +static void layer7_cleanup_proc(void) +{ + remove_proc_entry("layer7_numpackets", proc_net); +} + +static int __init init(void) +{ + layer7_init_proc(); + if(maxdatalen < 1) { + printk(KERN_WARNING "layer7: maxdatalen can't be < 1, using 1\n"); + maxdatalen = 1; + } + /* This is not a hard limit. It's just here to prevent people from + bringing their slow machines to a grinding halt. */ + else if(maxdatalen > 65536) { + printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, using 65536\n"); + maxdatalen = 65536; + } + return ipt_register_match(&layer7_match); +} + +static void __exit fini(void) +{ + layer7_cleanup_proc(); + ipt_unregister_match(&layer7_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c index b320e29b..d0475155 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_mac.c @@ -19,7 +19,8 @@ match(const struct sk_buff *skb, const struct ipt_mac_info *info = matchinfo; /* Is mac pointer valid? */ - return (skb->mac.raw >= skb->head + return (in != NULL // added for OUTPUT experiment -- zzz + && skb->mac.raw >= skb->head && (skb->mac.raw + ETH_HLEN) <= skb->data /* If so, compare... */ && ((memcmp(skb->mac.ethernet->h_source, info->srcaddr, ETH_ALEN) @@ -33,6 +34,7 @@ ipt_mac_checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { +#if 0 // removed for OUTPUT experiment --jz /* FORWARD isn't always valid, but it's nice to be able to do --RR */ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) @@ -40,7 +42,7 @@ ipt_mac_checkentry(const char *tablename, printk("ipt_mac: only valid for PRE_ROUTING, LOCAL_IN or FORWARD.\n"); return 0; } - +#endif if (matchsize != IPT_ALIGN(sizeof(struct ipt_mac_info))) return 0; diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c new file mode 100644 index 00000000..25fa26a4 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_macsave.c @@ -0,0 +1,62 @@ +/* + + macsave match + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2 or later. + +*/ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ipt_macsave.h> + +#define DEBUG 1 + +#ifdef DEBUG +#define DLOG printk +#else +#define DLOG(...) do { } while (0); +#endif + + +static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) +{ + const struct ipt_macsave_match_info *info = matchinfo; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); // note about cast: ip_conntrack_get() will not modify skb + if (ct) return (memcmp(ct->macsave, info->mac, sizeof(ct->macsave)) == 0) ^ info->invert; + return info->invert; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + return (matchsize == IPT_ALIGN(sizeof(struct ipt_macsave_match_info))); +} + + +static struct ipt_match macsave_match += { { NULL, NULL }, "macsave", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + DLOG(KERN_INFO "macsave match init " __DATE__ " " __TIME__ "\n"); + return ipt_register_match(&macsave_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&macsave_match); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c index ca99b764..836d3f2f 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_mport.c @@ -10,7 +10,11 @@ MODULE_LICENSE("GPL"); +#if 0 +#define duprintf(format, args...) printk(format , ## args) +#else #define duprintf(format, args...) +#endif /* Returns 1 if the port is matched by the test, 0 otherwise. */ static inline int diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c new file mode 100644 index 00000000..d7ab39cf --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_quota.c @@ -0,0 +1,88 @@ +/* + * netfilter module to enforce network quotas + * + * Sam Johnston <samj@samj.net> + * + * 30/01/05: Fixed on SMP --Pablo Neira <pablo@eurodev.net> + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_quota.h> + +MODULE_LICENSE("GPL"); + +static spinlock_t quota_lock = SPIN_LOCK_UNLOCKED; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, const void *hdr, u_int16_t datalen, int *hotdrop) +{ + struct ipt_quota_info *q = + ((struct ipt_quota_info *) matchinfo)->master; + + spin_lock_bh("a_lock); + + if (q->quota >= datalen) { + /* we can afford this one */ + q->quota -= datalen; + spin_unlock_bh("a_lock); + +#ifdef DEBUG_IPT_QUOTA + printk("IPT Quota OK: %llu datlen %d \n", q->quota, datalen); +#endif + return 1; + } + + /* so we do not allow even small packets from now on */ + q->quota = 0; + +#ifdef DEBUG_IPT_QUOTA + printk("IPT Quota Failed: %llu datlen %d \n", q->quota, datalen); +#endif + + spin_unlock_bh("a_lock); + return 0; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, unsigned int hook_mask) +{ + /* TODO: spinlocks? sanity checks? */ + struct ipt_quota_info *q = (struct ipt_quota_info *) matchinfo; + + if (matchsize != IPT_ALIGN(sizeof (struct ipt_quota_info))) + return 0; + + /* For SMP, we only want to use one set of counters. */ + q->master = q; + + return 1; +} + +static struct ipt_match quota_match + = { {NULL, NULL}, "quota", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init +init(void) +{ + return ipt_register_match("a_match); +} + +static void __exit +fini(void) +{ + ipt_unregister_match("a_match); +} + +module_init(init); +module_exit(fini); + diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c new file mode 100644 index 00000000..808ae78f --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_recent.c @@ -0,0 +1,998 @@ +/* Kernel module to check if the source address has been seen recently. */ +/* Copyright 2002-2003, Stephen Frost */ +/* Author: Stephen Frost <sfrost@snowman.net> */ +/* Project Page: http://snowman.net/projects/ipt_recent/ */ +/* This software is distributed under the terms of the GPL, Version 2 */ +/* This copyright does not cover user programs that use kernel services + * by normal system calls. */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <asm/uaccess.h> +#include <linux/ctype.h> +#include <linux/ip.h> +#include <linux/vmalloc.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_recent.h> + +#undef DEBUG +#define HASH_LOG 9 + +/* Defaults, these can be overridden on the module command-line. */ +static int ip_list_tot = 100; +static int ip_pkt_list_tot = 20; +static int ip_list_hash_size = 0; +static int ip_list_perms = 0644; +#ifdef DEBUG +static int debug = 1; +#endif + +static char version[] = +KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. http://snowman.net/projects/ipt_recent/\n"; + +MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>"); +MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); +MODULE_LICENSE("GPL"); +MODULE_PARM(ip_list_tot,"i"); +MODULE_PARM(ip_pkt_list_tot,"i"); +MODULE_PARM(ip_list_hash_size,"i"); +MODULE_PARM(ip_list_perms,"i"); +#ifdef DEBUG +MODULE_PARM(debug,"i"); +MODULE_PARM_DESC(debug,"debugging level, defaults to 1"); +#endif +MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); +MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember"); +MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs"); +MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files"); + +/* Structure of our list of recently seen addresses. */ +struct recent_ip_list { + u_int32_t addr; + u_int8_t ttl; + u_int32_t last_seen; + u_int32_t *last_pkts; + u_int32_t oldest_pkt; + u_int32_t hash_entry; + u_int32_t time_pos; +}; + +struct time_info_list { + u_int32_t position; + u_int32_t time; +}; + +/* Structure of our linked list of tables of recent lists. */ +struct recent_ip_tables { + char name[IPT_RECENT_NAME_LEN]; + int count; + int time_pos; + struct recent_ip_list *table; + struct recent_ip_tables *next; + spinlock_t list_lock; + int *hash_table; + struct time_info_list *time_info; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *status_proc; +#endif /* CONFIG_PROC_FS */ +}; + +/* Our current list of addresses we have recently seen. + * Only added to on a --set, and only updated on --set || --update + */ +static struct recent_ip_tables *r_tables = NULL; + +/* We protect r_list with this spinlock so two processors are not modifying + * the list at the same time. + */ +static spinlock_t recent_lock = SPIN_LOCK_UNLOCKED; + +/* Our /proc/net/ipt_recent entry */ +static struct proc_dir_entry *proc_net_ipt_recent = NULL; + +/* Function declaration for later. */ +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop); + +/* Function to hash a given address into the hash table of table_size size */ +int hash_func(unsigned int addr, int table_size) +{ + int result = 0; + unsigned int value = addr; + do { result ^= value; } while((value >>= HASH_LOG)); + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n", + result & (table_size - 1), + addr, + table_size); +#endif + + return(result & (table_size - 1)); +} + +#ifdef CONFIG_PROC_FS +/* This is the function which produces the output for our /proc output + * interface which lists each IP address, the last seen time and the + * other recent times the address was seen. + */ + +static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +{ + int len = 0, count, last_len = 0, pkt_count; + off_t pos = 0; + off_t begin = 0; + struct recent_ip_tables *curr_table; + + curr_table = (struct recent_ip_tables*) data; + + spin_lock_bh(&curr_table->list_lock); + for(count = 0; count < ip_list_tot; count++) { + if(!curr_table->table[count].addr) continue; + last_len = len; + len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr)); + len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl); + len += sprintf(buffer+len,"last_seen: %u ",curr_table->table[count].last_seen); + len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt); + len += sprintf(buffer+len,"last_pkts: %u",curr_table->table[count].last_pkts[0]); + for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) { + if(!curr_table->table[count].last_pkts[pkt_count]) break; + len += sprintf(buffer+len,", %u",curr_table->table[count].last_pkts[pkt_count]); + } + len += sprintf(buffer+len,"\n"); + pos = begin + len; + if(pos < offset) { len = 0; begin = pos; } + if(pos > offset + length) { len = last_len; break; } + } + + *start = buffer + (offset - begin); + len -= (offset - begin); + if(len > length) len = length; + + spin_unlock_bh(&curr_table->list_lock); + return len; +} + +/* ip_recent_ctrl provides an interface for users to modify the table + * directly. This allows adding entries, removing entries, and + * flushing the entire table. + * This is done by opening up the appropriate table for writing and + * sending one of: + * xx.xx.xx.xx -- Add entry to table with current time + * +xx.xx.xx.xx -- Add entry to table with current time + * -xx.xx.xx.xx -- Remove entry from table + * clear -- Flush table, remove all entries + */ + +static int ip_recent_ctrl(struct file *file, const char *input, unsigned long size, void *data) +{ + static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; + u_int32_t val; + int base, used = 0; + char c, *cp; + union iaddr { + uint8_t bytes[4]; + uint32_t word; + } res; + uint8_t *pp = res.bytes; + int digit; + + char buffer[20]; + int len, check_set = 0, count; + u_int32_t addr = 0; + struct sk_buff *skb; + struct ipt_recent_info *info; + struct recent_ip_tables *curr_table; + + curr_table = (struct recent_ip_tables*) data; + + if(size > 20) len = 20; else len = size; + + if(copy_from_user(buffer,input,len)) return -EFAULT; + + if(len < 20) buffer[len] = '\0'; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer); +#endif + + cp = buffer; + while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; } + + /* Check if we are asked to flush the entire table */ + if(!memcmp(cp,"clear",5)) { + used += 5; + spin_lock_bh(&curr_table->list_lock); + curr_table->time_pos = 0; + for(count = 0; count < ip_list_hash_size; count++) { + curr_table->hash_table[count] = -1; + } + for(count = 0; count < ip_list_tot; count++) { + curr_table->table[count].last_seen = 0; + curr_table->table[count].addr = 0; + curr_table->table[count].ttl = 0; + memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); + curr_table->table[count].oldest_pkt = 0; + curr_table->table[count].time_pos = 0; + curr_table->time_info[count].position = count; + curr_table->time_info[count].time = 0; + } + spin_unlock_bh(&curr_table->list_lock); + return used; + } + + check_set = IPT_RECENT_SET; + switch(*cp) { + case '+': check_set = IPT_RECENT_SET; cp++; used++; break; + case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break; + default: if(!isdigit(*cp)) return (used+1); break; + } + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set); +#endif + /* Get addr (effectively inet_aton()) */ + /* Shamelessly stolen from libc, a function in the kernel for doing + * this would, of course, be greatly preferred, but our options appear + * to be rather limited, so we will just do it ourselves here. + */ + res.word = 0; + + c = *cp; + for(;;) { + if(!isdigit(c)) return used; + val = 0; base = 10; digit = 0; + if(c == '0') { + c = *++cp; + if(c == 'x' || c == 'X') base = 16, c = *++cp; + else { base = 8; digit = 1; } + } + for(;;) { + if(isascii(c) && isdigit(c)) { + if(base == 8 && (c == '8' || c == '0')) return used; + val = (val * base) + (c - '0'); + c = *++cp; + digit = 1; + } else if(base == 16 && isascii(c) && isxdigit(c)) { + val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A')); + c = *++cp; + digit = 1; + } else break; + } + if(c == '.') { + if(pp > res.bytes + 2 || val > 0xff) return used; + *pp++ = val; + c = *++cp; + } else break; + } + used = cp - buffer; + if(c != '\0' && (!isascii(c) || !isspace(c))) return used; + if(c == '\n') used++; + if(!digit) return used; + + if(val > max[pp - res.bytes]) return used; + addr = res.word | htonl(val); + + if(!addr && check_set == IPT_RECENT_SET) return used; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used); +#endif + + /* Set up and just call match */ + info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL); + if(!info) { return -ENOMEM; } + info->seconds = 0; + info->hit_count = 0; + info->check_set = check_set; + info->invert = 0; + info->side = IPT_RECENT_SOURCE; + strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN); + info->name[IPT_RECENT_NAME_LEN-1] = '\0'; + + skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL); + if (!skb) { + used = -ENOMEM; + goto out_free_info; + } + skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL); + if (!skb->nh.iph) { + used = -ENOMEM; + goto out_free_skb; + } + + skb->nh.iph->saddr = addr; + skb->nh.iph->daddr = 0; + /* Clear ttl since we have no way of knowing it */ + skb->nh.iph->ttl = 0; + match(skb,NULL,NULL,info,0,NULL,sizeof(struct ipt_recent_info),NULL); + + kfree(skb->nh.iph); +out_free_skb: + kfree(skb); +out_free_info: + kfree(info); + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used); +#endif + return used; +} + +#endif /* CONFIG_PROC_FS */ + +/* 'match' is our primary function, called by the kernel whenever a rule is + * hit with our module as an option to it. + * What this function does depends on what was specifically asked of it by + * the user: + * --set -- Add or update last seen time of the source address of the packet + * -- matchinfo->check_set == IPT_RECENT_SET + * --rcheck -- Just check if the source address is in the list + * -- matchinfo->check_set == IPT_RECENT_CHECK + * --update -- If the source address is in the list, update last_seen + * -- matchinfo->check_set == IPT_RECENT_UPDATE + * --remove -- If the source address is in the list, remove it + * -- matchinfo->check_set == IPT_RECENT_REMOVE + * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds + * -- matchinfo->seconds + * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times + * -- matchinfo->hit_count + * --seconds and --hitcount can be combined + */ +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + int pkt_count, hits_found, ans; + unsigned long now; + const struct ipt_recent_info *info = matchinfo; + u_int32_t addr = 0, time_temp; + u_int8_t ttl = skb->nh.iph->ttl; + int *hash_table; + int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1; + struct time_info_list *time_info; + struct recent_ip_tables *curr_table; + struct recent_ip_tables *last_table; + struct recent_ip_list *r_list; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n"); +#endif + + /* Default is false ^ info->invert */ + ans = info->invert; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name); +#endif + + /* if out != NULL then routing has been done and TTL changed. + * We change it back here internally for match what came in before routing. */ + if(out) ttl++; + + /* Find the right table */ + spin_lock_bh(&recent_lock); + curr_table = r_tables; + while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) ); + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name); +#endif + + spin_unlock_bh(&recent_lock); + + /* Table with this name not found, match impossible */ + if(!curr_table) { return ans; } + + /* Make sure no one is changing the list while we work with it */ + spin_lock_bh(&curr_table->list_lock); + + r_list = curr_table->table; + if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr; + + if(!addr) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr); +#endif + spin_unlock_bh(&curr_table->list_lock); + return ans; + } + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl); +#endif + + /* Get jiffies now in case they changed while we were waiting for a lock */ + now = jiffies; + hash_table = curr_table->hash_table; + time_info = curr_table->time_info; + + orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size); + /* Hash entry at this result used */ + /* Check for TTL match if requested. If TTL is zero then a match would never + * happen, so match regardless of existing TTL in that case. Zero means the + * entry was added via the /proc interface anyway, so we will just use the + * first TTL we get for that IP address. */ + if(info->check_set & IPT_RECENT_TTL) { + while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr && + (!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) { + /* Collision in hash table */ + hash_result = (hash_result + 1) % ip_list_hash_size; + } + } else { + while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) { + /* Collision in hash table */ + hash_result = (hash_result + 1) % ip_list_hash_size; + } + } + + if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) { + /* IP not in list and not asked to SET */ + spin_unlock_bh(&curr_table->list_lock); + return ans; + } + + /* Check if we need to handle the collision, do not need to on REMOVE */ + if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n", + orig_hash_result, + hash_result, + r_list[hash_table[orig_hash_result]].addr, + addr); +#endif + + /* We had a collision. + * orig_hash_result is where we started, hash_result is where we ended up. + * So, swap them because we are likely to see the same guy again sooner */ +#ifdef DEBUG + if(debug) { + printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]); + printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n", + r_list[hash_table[orig_hash_result]].hash_entry); + } +#endif + + r_list[hash_table[orig_hash_result]].hash_entry = hash_result; + + + temp = hash_table[orig_hash_result]; +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]); +#endif + hash_table[orig_hash_result] = hash_table[hash_result]; + hash_table[hash_result] = temp; + temp = hash_result; + hash_result = orig_hash_result; + orig_hash_result = temp; + time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result]; + if(hash_table[hash_result] != -1) { + r_list[hash_table[hash_result]].hash_entry = hash_result; + time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; + } + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n"); +#endif + } + + if(hash_table[hash_result] == -1) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n", + hash_result, addr); +#endif + + /* New item found and IPT_RECENT_SET, so we need to add it */ + location = time_info[curr_table->time_pos].position; + hash_table[r_list[location].hash_entry] = -1; + hash_table[hash_result] = location; + memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); + r_list[location].time_pos = curr_table->time_pos; + r_list[location].addr = addr; + r_list[location].ttl = ttl; + r_list[location].last_seen = now; + r_list[location].oldest_pkt = 1; + r_list[location].last_pkts[0] = now; + r_list[location].hash_entry = hash_result; + time_info[curr_table->time_pos].time = r_list[location].last_seen; + curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot; + + ans = !info->invert; + } else { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n", + hash_result, + addr); +#endif + + /* Existing item found */ + location = hash_table[hash_result]; + /* We have a match on address, now to make sure it meets all requirements for a + * full match. */ + if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) { + if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert; + if(info->seconds && !info->hit_count) { + if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert; + } + if(info->seconds && info->hit_count) { + for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { + if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++; + } + if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; + } + if(info->hit_count && !info->seconds) { + for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { + if(r_list[location].last_pkts[pkt_count] == 0) break; + hits_found++; + } + if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; + } + } +#ifdef DEBUG + if(debug) { + if(ans) + printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr); + else + printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr); + } +#endif + + /* If and only if we have been asked to SET, or to UPDATE (on match) do we add the + * current timestamp to the last_seen. */ + if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n"); +#endif + /* Have to update our time info */ + time_loc = r_list[location].time_pos; + time_info[time_loc].time = now; + time_info[time_loc].position = location; + while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { + time_temp = time_info[time_loc].time; + time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; + time_info[(time_loc+1)%ip_list_tot].time = time_temp; + time_temp = time_info[time_loc].position; + time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; + time_info[(time_loc+1)%ip_list_tot].position = time_temp; + r_list[time_info[time_loc].position].time_pos = time_loc; + r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; + time_loc = (time_loc+1) % ip_list_tot; + } + r_list[location].time_pos = time_loc; + r_list[location].ttl = ttl; + r_list[location].last_pkts[r_list[location].oldest_pkt] = now; + r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot; + r_list[location].last_seen = now; + } + /* If we have been asked to remove the entry from the list, just set it to 0 */ + if(info->check_set & IPT_RECENT_REMOVE) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result); +#endif + /* Check if this is part of a collision chain */ + while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) { + orig_hash_result++; + if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) { + /* Found collision chain, how deep does this rabbit hole go? */ +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n"); +#endif + end_collision_chain = orig_hash_result; + } + } + if(end_collision_chain != -1) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n"); +#endif + /* Part of a collision chain, swap it with the end of the chain + * before removing. */ + r_list[hash_table[end_collision_chain]].hash_entry = hash_result; + temp = hash_table[end_collision_chain]; + hash_table[end_collision_chain] = hash_table[hash_result]; + hash_table[hash_result] = temp; + time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; + hash_result = end_collision_chain; + r_list[hash_table[hash_result]].hash_entry = hash_result; + time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; + } + location = hash_table[hash_result]; + hash_table[r_list[location].hash_entry] = -1; + time_loc = r_list[location].time_pos; + time_info[time_loc].time = 0; + time_info[time_loc].position = location; + while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { + time_temp = time_info[time_loc].time; + time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; + time_info[(time_loc+1)%ip_list_tot].time = time_temp; + time_temp = time_info[time_loc].position; + time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; + time_info[(time_loc+1)%ip_list_tot].position = time_temp; + r_list[time_info[time_loc].position].time_pos = time_loc; + r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; + time_loc = (time_loc+1) % ip_list_tot; + } + r_list[location].time_pos = time_loc; + r_list[location].last_seen = 0; + r_list[location].addr = 0; + r_list[location].ttl = 0; + memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(u_int32_t)); + r_list[location].oldest_pkt = 0; + ans = !info->invert; + } + spin_unlock_bh(&curr_table->list_lock); + return ans; + } + + spin_unlock_bh(&curr_table->list_lock); +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n"); +#endif + return ans; +} + +/* This function is to verify that the rule given during the userspace iptables + * command is correct. + * If the command is valid then we check if the table name referred to by the + * rule exists, if not it is created. + */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + int flag = 0, c; + u_int32_t *hold; + const struct ipt_recent_info *info = matchinfo; + struct recent_ip_tables *curr_table, *find_table, *last_table; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n"); +#endif + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return 0; + + /* seconds and hit_count only valid for CHECK/UPDATE */ + if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; } + if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; } + if(info->check_set & IPT_RECENT_CHECK) flag++; + if(info->check_set & IPT_RECENT_UPDATE) flag++; + + /* One and only one of these should ever be set */ + if(flag != 1) return 0; + + /* Name must be set to something */ + if(!info->name || !info->name[0]) return 0; + + /* Things look good, create a list for this if it does not exist */ + /* Lock the linked list while we play with it */ + spin_lock_bh(&recent_lock); + + /* Look for an entry with this name already created */ + /* Finds the end of the list and the entry before the end if current name does not exist */ + find_table = r_tables; + while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); + + /* If a table already exists just increment the count on that table and return */ + if(find_table) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name); +#endif + find_table->count++; + spin_unlock_bh(&recent_lock); + return 1; + } + + spin_unlock_bh(&recent_lock); + + /* Table with this name not found */ + /* Allocate memory for new linked list item */ + +#ifdef DEBUG + if(debug) { + printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name); + printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables)); + } +#endif + + curr_table = vmalloc(sizeof(struct recent_ip_tables)); + if(curr_table == NULL) return -ENOMEM; + + curr_table->list_lock = SPIN_LOCK_UNLOCKED; + curr_table->next = NULL; + curr_table->count = 1; + curr_table->time_pos = 0; + strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN); + curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0'; + + /* Allocate memory for this table and the list of packets in each entry. */ +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n", + sizeof(struct recent_ip_list)*ip_list_tot, + info->name); +#endif + + curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot); + if(curr_table->table == NULL) { vfree(curr_table); return -ENOMEM; } + memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", + sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); +#endif + + hold = vmalloc(sizeof(u_int32_t)*ip_pkt_list_tot*ip_list_tot); +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); +#endif + if(hold == NULL) { + printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n"); + vfree(curr_table->table); + vfree(curr_table); + return -ENOMEM; + } + for(c = 0; c < ip_list_tot; c++) { + curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot; + } + + /* Allocate memory for the hash table */ +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n", + sizeof(int)*ip_list_hash_size); +#endif + + curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size); + if(!curr_table->hash_table) { + printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n"); + vfree(hold); + vfree(curr_table->table); + vfree(curr_table); + return -ENOMEM; + } + + for(c = 0; c < ip_list_hash_size; c++) { + curr_table->hash_table[c] = -1; + } + + /* Allocate memory for the time info */ +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n", + sizeof(struct time_info_list)*ip_list_tot); +#endif + + curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot); + if(!curr_table->time_info) { + printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n"); + vfree(curr_table->hash_table); + vfree(hold); + vfree(curr_table->table); + vfree(curr_table); + return -ENOMEM; + } + for(c = 0; c < ip_list_tot; c++) { + curr_table->time_info[c].position = c; + curr_table->time_info[c].time = 0; + } + + /* Put the new table in place */ + spin_lock_bh(&recent_lock); + find_table = r_tables; + while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); + + /* If a table already exists just increment the count on that table and return */ + if(find_table) { + find_table->count++; + spin_unlock_bh(&recent_lock); +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name); +#endif + vfree(curr_table->time_info); + vfree(curr_table->hash_table); + vfree(hold); + vfree(curr_table->table); + vfree(curr_table); + return 1; + } + if(!last_table) r_tables = curr_table; else last_table->next = curr_table; + + spin_unlock_bh(&recent_lock); + +#ifdef CONFIG_PROC_FS + /* Create our proc 'status' entry. */ + curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent); + if (!curr_table->status_proc) { + printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n"); + /* Destroy the created table */ + spin_lock_bh(&recent_lock); + last_table = NULL; + curr_table = r_tables; + if(!curr_table) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n"); +#endif + spin_unlock_bh(&recent_lock); + return -ENOMEM; + } + while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); + if(!curr_table) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n"); +#endif + spin_unlock_bh(&recent_lock); + return -ENOMEM; + } + if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; + spin_unlock_bh(&recent_lock); + vfree(curr_table->time_info); + vfree(curr_table->hash_table); + vfree(hold); + vfree(curr_table->table); + vfree(curr_table); + return -ENOMEM; + } + + curr_table->status_proc->owner = THIS_MODULE; + curr_table->status_proc->data = curr_table; + wmb(); + curr_table->status_proc->read_proc = ip_recent_get_info; + curr_table->status_proc->write_proc = ip_recent_ctrl; +#endif /* CONFIG_PROC_FS */ + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n"); +#endif + + return 1; +} + +/* This function is called in the event that a rule matching this module is + * removed. + * When this happens we need to check if there are no other rules matching + * the table given. If that is the case then we remove the table and clean + * up its memory. + */ +static void +destroy(void *matchinfo, unsigned int matchsize) +{ + const struct ipt_recent_info *info = matchinfo; + struct recent_ip_tables *curr_table, *last_table; + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n"); +#endif + + if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return; + + /* Lock the linked list while we play with it */ + spin_lock_bh(&recent_lock); + + /* Look for an entry with this name already created */ + /* Finds the end of the list and the entry before the end if current name does not exist */ + last_table = NULL; + curr_table = r_tables; + if(!curr_table) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n"); +#endif + spin_unlock_bh(&recent_lock); + return; + } + while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); + + /* If a table does not exist then do nothing and return */ + if(!curr_table) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n"); +#endif + spin_unlock_bh(&recent_lock); + return; + } + + curr_table->count--; + + /* If count is still non-zero then there are still rules referenceing it so we do nothing */ + if(curr_table->count) { +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n"); +#endif + spin_unlock_bh(&recent_lock); + return; + } + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n"); +#endif + + /* Count must be zero so we remove this table from the list */ + if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; + + spin_unlock_bh(&recent_lock); + + /* lock to make sure any late-runners still using this after we removed it from + * the list finish up then remove everything */ + spin_lock_bh(&curr_table->list_lock); + spin_unlock_bh(&curr_table->list_lock); + +#ifdef CONFIG_PROC_FS + if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent); +#endif /* CONFIG_PROC_FS */ + vfree(curr_table->table[0].last_pkts); + vfree(curr_table->table); + vfree(curr_table->hash_table); + vfree(curr_table->time_info); + vfree(curr_table); + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n"); +#endif + + return; +} + +/* This is the structure we pass to ipt_register to register our + * module with iptables. + */ +static struct ipt_match recent_match = { + .name = "recent", + .match = &match, + .checkentry = &checkentry, + .destroy = &destroy, + .me = THIS_MODULE +}; + +/* Kernel module initialization. */ +static int __init init(void) +{ + int count; + + printk(version); + proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net); + if(!proc_net_ipt_recent) return -ENOMEM; + + if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) { + printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n"); + ip_list_hash_size = 0; + } + + if(!ip_list_hash_size) { + ip_list_hash_size = ip_list_tot*3; + count = 2*2; + while(ip_list_hash_size > count) count = count*2; + ip_list_hash_size = count; + } + +#ifdef DEBUG + if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size); +#endif + + return ipt_register_match(&recent_match); +} + +/* Kernel module destruction. */ +static void __exit fini(void) +{ + ipt_unregister_match(&recent_match); + + remove_proc_entry("ipt_recent",proc_net); +} + +/* Register our module with the kernel. */ +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c new file mode 100644 index 00000000..a18b89d0 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_string.c @@ -0,0 +1,218 @@ +/* Kernel module to match a string into a packet. + * + * Copyright (C) 2000 Emmanuel Roger <winfield@freegates.be> + * + * ChangeLog + * 19.02.2002: Gianni Tedesco <gianni@ecsc.co.uk> + * Fixed SMP re-entrancy problem using per-cpu data areas + * for the skip/shift tables. + * 02.05.2001: Gianni Tedesco <gianni@ecsc.co.uk> + * Fixed kernel panic, due to overrunning boyer moore string + * tables. Also slightly tweaked heuristic for deciding what + * search algo to use. + * 27.01.2001: Gianni Tedesco <gianni@ecsc.co.uk> + * Implemented Boyer Moore Sublinear search algorithm + * alongside the existing linear search based on memcmp(). + * Also a quick check to decide which method to use on a per + * packet basis. + */ + +#include <linux/smp.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/file.h> +#include <net/sock.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_string.h> + +MODULE_LICENSE("GPL"); + +struct string_per_cpu { + int *skip; + int *shift; + int *len; +}; + +struct string_per_cpu *bm_string_data=NULL; + +/* Boyer Moore Sublinear string search - VERY FAST */ +char *search_sublinear (char *needle, char *haystack, int needle_len, int haystack_len) +{ + int M1, right_end, sk, sh; + int ended, j, i; + + int *skip, *shift, *len; + + /* use data suitable for this CPU */ + shift=bm_string_data[smp_processor_id()].shift; + skip=bm_string_data[smp_processor_id()].skip; + len=bm_string_data[smp_processor_id()].len; + + /* Setup skip/shift tables */ + M1 = right_end = needle_len-1; + for (i = 0; i < BM_MAX_HLEN; i++) skip[i] = needle_len; + for (i = 0; needle[i]; i++) skip[needle[i]] = M1 - i; + + for (i = 1; i < needle_len; i++) { + for (j = 0; j < needle_len && needle[M1 - j] == needle[M1 - i - j]; j++); + len[i] = j; + } + + shift[0] = 1; + for (i = 1; i < needle_len; i++) shift[i] = needle_len; + for (i = M1; i > 0; i--) shift[len[i]] = i; + ended = 0; + + for (i = 0; i < needle_len; i++) { + if (len[i] == M1 - i) ended = i; + if (ended) shift[i] = ended; + } + + /* Do the search*/ + while (right_end < haystack_len) + { + for (i = 0; i < needle_len && haystack[right_end - i] == needle[M1 - i]; i++); + if (i == needle_len) { + return haystack+(right_end - M1); + } + + sk = skip[haystack[right_end - i]]; + sh = shift[i]; + right_end = max(right_end - i + sk, right_end + sh); + } + + return NULL; +} + +/* Linear string search based on memcmp() */ +char *search_linear (char *needle, char *haystack, int needle_len, int haystack_len) +{ + char *k = haystack + (haystack_len-needle_len); + char *t = haystack; + + while ( t <= k ) { + if (memcmp(t, needle, needle_len) == 0) + return t; + t++; + } + + return NULL; +} + + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_string_info *info = matchinfo; + struct iphdr *ip = skb->nh.iph; + int hlen, nlen; + char *needle, *haystack; + proc_ipt_search search=search_linear; + + if ( !ip ) return 0; + + /* get lenghts, and validate them */ + nlen=info->len; + hlen=ntohs(ip->tot_len)-(ip->ihl*4); + if ( nlen > hlen ) return 0; + + needle=(char *)&info->string; + haystack=(char *)ip+(ip->ihl*4); + + /* The sublinear search comes in to its own + * on the larger packets */ + if ( (hlen>IPT_STRING_HAYSTACK_THRESH) && + (nlen>IPT_STRING_NEEDLE_THRESH) ) { + if ( hlen < BM_MAX_HLEN ) { + search=search_sublinear; + }else{ + if (net_ratelimit()) + printk(KERN_INFO "ipt_string: Packet too big " + "to attempt sublinear string search " + "(%d bytes)\n", hlen ); + } + } + + return ((search(needle, haystack, nlen, hlen)!=NULL) ^ info->invert); +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info))) + return 0; + + return 1; +} + +void string_freeup_data(void) +{ + int c; + + if ( bm_string_data ) { + for(c=0; c<smp_num_cpus; c++) { + if ( bm_string_data[c].shift ) kfree(bm_string_data[c].shift); + if ( bm_string_data[c].skip ) kfree(bm_string_data[c].skip); + if ( bm_string_data[c].len ) kfree(bm_string_data[c].len); + } + kfree(bm_string_data); + } +} + +static struct ipt_match string_match += { { NULL, NULL }, "string", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + int c; + size_t tlen; + size_t alen; + + tlen=sizeof(struct string_per_cpu)*smp_num_cpus; + alen=sizeof(int)*BM_MAX_HLEN; + + /* allocate array of structures */ + if ( !(bm_string_data=kmalloc(tlen,GFP_KERNEL)) ) { + return 0; + } + + memset(bm_string_data, 0, tlen); + + /* allocate our skip/shift tables */ + for(c=0; c<smp_num_cpus; c++) { + if ( !(bm_string_data[c].shift=kmalloc(alen, GFP_KERNEL)) ) + goto alloc_fail; + if ( !(bm_string_data[c].skip=kmalloc(alen, GFP_KERNEL)) ) + goto alloc_fail; + if ( !(bm_string_data[c].len=kmalloc(alen, GFP_KERNEL)) ) + goto alloc_fail; + } + + return ipt_register_match(&string_match); + +alloc_fail: + string_freeup_data(); + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&string_match); + string_freeup_data(); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c index 216098fc..d3484bd9 100644 --- a/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_time.c @@ -10,6 +10,7 @@ 2001-26-09 Fabrice MARIE <fabrice@netfilter.org> : force the match to be in LOCAL_IN or PRE_ROUTING only. 2001-30-11 Fabrice : added the possibility to use the match in FORWARD/OUTPUT with a little hack, added Nguyen Dang Phuoc Dong <dongnd@tlnet.com.vn> patch to support timezones. + 2004-05-02 Fabrice : added support for date matching, from an idea of Fabien COELHO. */ #include <linux/module.h> @@ -19,7 +20,7 @@ #include <linux/time.h> MODULE_AUTHOR("Fabrice MARIE <fabrice@netfilter.org>"); -MODULE_DESCRIPTION("Match arrival timestamp"); +MODULE_DESCRIPTION("Match arrival timestamp/date"); MODULE_LICENSE("GPL"); struct tm @@ -53,7 +54,8 @@ match(const struct sk_buff *skb, { const struct ipt_time_info *info = matchinfo; /* match info for rule */ struct tm currenttime; /* time human readable */ - unsigned int packet_time; + u_int8_t days_of_week[7] = {64, 32, 16, 8, 4, 2, 1}; + u_int16_t packet_time; struct timeval kerneltimeval; time_t packet_local_time; @@ -66,22 +68,21 @@ match(const struct sk_buff *skb, else packet_local_time = skb->stamp.tv_sec; + /* First we make sure we are in the date start-stop boundaries */ + if ((packet_local_time < info->date_start) || (packet_local_time > info->date_stop)) + return 0; /* We are outside the date boundaries */ + /* Transform the timestamp of the packet, in a human readable form */ localtime(&packet_local_time, ¤ttime); /* check if we match this timestamp, we start by the days... */ - if (!((1 << currenttime.tm_wday) & info->days_match)) + if ((days_of_week[currenttime.tm_wday] & info->days_match) != days_of_week[currenttime.tm_wday]) return 0; /* the day doesn't match */ /* ... check the time now */ - packet_time = (currenttime.tm_hour * 60 * 60) + (currenttime.tm_min * 60) + currenttime.tm_sec; - if (info->time_start < info->time_stop) { - if ((packet_time < info->time_start) || (packet_time > info->time_stop)) - return 0; - } else { - if ((packet_time < info->time_start) && (packet_time > info->time_stop)) - return 0; - } + packet_time = (currenttime.tm_hour * 60) + currenttime.tm_min; + if ((packet_time < info->time_start) || (packet_time > info->time_stop)) + return 0; /* here we match ! */ return 1; @@ -96,24 +97,25 @@ checkentry(const char *tablename, { struct ipt_time_info *info = matchinfo; /* match info for rule */ - /* First, check that we are in the correct hook */ - /* PRE_ROUTING, LOCAL_IN or FROWARD */ + /* First, check that we are in the correct hooks */ if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) { printk("ipt_time: error, only valid for PRE_ROUTING, LOCAL_IN, FORWARD and OUTPUT)\n"); return 0; } - - /* always use kerneltime */ + /* we use the kerneltime if we are in forward or output */ info->kerneltime = 1; + if (hook_mask & ~((1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) + /* we use the skb time */ + info->kerneltime = 0; /* Check the size */ - if (matchsize < IPT_ALIGN(sizeof(struct ipt_time_info))) + if (matchsize != IPT_ALIGN(sizeof(struct ipt_time_info))) return 0; /* Now check the coherence of the data ... */ - if ((info->time_start > 86399) || /* 24*60*60-1 = 86399*/ - (info->time_stop > 86399)) + if ((info->time_start > 1439) || /* 23*60+59 = 1439*/ + (info->time_stop > 1439)) { printk(KERN_WARNING "ipt_time: invalid argument\n"); return 0; @@ -122,8 +124,12 @@ checkentry(const char *tablename, return 1; } -static struct ipt_match time_match -= { { NULL, NULL }, "time", &match, &checkentry, NULL, THIS_MODULE }; +static struct ipt_match time_match = { + .name = "time", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c new file mode 100644 index 00000000..0c749563 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_u32.c @@ -0,0 +1,211 @@ +/* Kernel module to match u32 packet content. */ + +/* +U32 tests whether quantities of up to 4 bytes extracted from a packet +have specified values. The specification of what to extract is general +enough to find data at given offsets from tcp headers or payloads. + + --u32 tests + The argument amounts to a program in a small language described below. + tests := location = value | tests && location = value + value := range | value , range + range := number | number : number + a single number, n, is interpreted the same as n:n + n:m is interpreted as the range of numbers >=n and <=m + location := number | location operator number + operator := & | << | >> | @ + + The operators &, <<, >>, && mean the same as in c. The = is really a set + membership operator and the value syntax describes a set. The @ operator + is what allows moving to the next header and is described further below. + + *** Until I can find out how to avoid it, there are some artificial limits + on the size of the tests: + - no more than 10 ='s (and 9 &&'s) in the u32 argument + - no more than 10 ranges (and 9 commas) per value + - no more than 10 numbers (and 9 operators) per location + + To describe the meaning of location, imagine the following machine that + interprets it. There are three registers: + A is of type char*, initially the address of the IP header + B and C are unsigned 32 bit integers, initially zero + + The instructions are: + number B = number; + C = (*(A+B)<<24)+(*(A+B+1)<<16)+(*(A+B+2)<<8)+*(A+B+3) + &number C = C&number + <<number C = C<<number + >>number C = C>>number + @number A = A+C; then do the instruction number + Any access of memory outside [skb->head,skb->end] causes the match to fail. + Otherwise the result of the computation is the final value of C. + + Whitespace is allowed but not required in the tests. + However the characters that do occur there are likely to require + shell quoting, so it's a good idea to enclose the arguments in quotes. + +Example: + match IP packets with total length >= 256 + The IP header contains a total length field in bytes 2-3. + --u32 "0&0xFFFF=0x100:0xFFFF" + read bytes 0-3 + AND that with FFFF (giving bytes 2-3), + and test whether that's in the range [0x100:0xFFFF] + +Example: (more realistic, hence more complicated) + match icmp packets with icmp type 0 + First test that it's an icmp packet, true iff byte 9 (protocol) = 1 + --u32 "6&0xFF=1 && ... + read bytes 6-9, use & to throw away bytes 6-8 and compare the result to 1 + Next test that it's not a fragment. + (If so it might be part of such a packet but we can't always tell.) + n.b. This test is generally needed if you want to match anything + beyond the IP header. + The last 6 bits of byte 6 and all of byte 7 are 0 iff this is a complete + packet (not a fragment). Alternatively, you can allow first fragments + by only testing the last 5 bits of byte 6. + ... 4&0x3FFF=0 && ... + Last test: the first byte past the IP header (the type) is 0 + This is where we have to use the @syntax. The length of the IP header + (IHL) in 32 bit words is stored in the right half of byte 0 of the + IP header itself. + ... 0>>22&0x3C@0>>24=0" + The first 0 means read bytes 0-3, + >>22 means shift that 22 bits to the right. Shifting 24 bits would give + the first byte, so only 22 bits is four times that plus a few more bits. + &3C then eliminates the two extra bits on the right and the first four + bits of the first byte. + For instance, if IHL=5 then the IP header is 20 (4 x 5) bytes long. + In this case bytes 0-1 are (in binary) xxxx0101 yyzzzzzz, + >>22 gives the 10 bit value xxxx0101yy and &3C gives 010100. + @ means to use this number as a new offset into the packet, and read + four bytes starting from there. This is the first 4 bytes of the icmp + payload, of which byte 0 is the icmp type. Therefore we simply shift + the value 24 to the right to throw out all but the first byte and compare + the result with 0. + +Example: + tcp payload bytes 8-12 is any of 1, 2, 5 or 8 + First we test that the packet is a tcp packet (similar to icmp). + --u32 "6&0xFF=6 && ... + Next, test that it's not a fragment (same as above). + ... 0>>22&0x3C@12>>26&0x3C@8=1,2,5,8" + 0>>22&3C as above computes the number of bytes in the IP header. + @ makes this the new offset into the packet, which is the start of the + tcp header. The length of the tcp header (again in 32 bit words) is + the left half of byte 12 of the tcp header. The 12>>26&3C + computes this length in bytes (similar to the IP header before). + @ makes this the new offset, which is the start of the tcp payload. + Finally 8 reads bytes 8-12 of the payload and = checks whether the + result is any of 1, 2, 5 or 8 +*/ + +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter_ipv4/ipt_u32.h> +#include <linux/netfilter_ipv4/ip_tables.h> + +/* #include <asm-i386/timex.h> for timing */ + +MODULE_AUTHOR("Don Cohen <don@isis.cs3-inc.com>"); +MODULE_DESCRIPTION("IP tables u32 matching module"); +MODULE_LICENSE("GPL"); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_u32 *data = matchinfo; + int testind, i; + unsigned char* origbase = (char*)skb->nh.iph; + unsigned char* base = origbase; + unsigned char* head = skb->head; + unsigned char* end = skb->end; + int nnums, nvals; + u_int32_t pos, val; + /* unsigned long long cycles1, cycles2, cycles3, cycles4; + cycles1 = get_cycles(); */ + + for (testind=0; testind < data->ntests; testind++) { + base = origbase; /* reset for each test */ + pos = data->tests[testind].location[0].number; + if (base+pos+3 > end || base+pos < head) + return 0; + val = (base[pos]<<24) + (base[pos+1]<<16) + + (base[pos+2]<<8) + base[pos+3]; + nnums = data->tests[testind].nnums; + for (i=1; i < nnums; i++) { + u_int32_t number = data->tests[testind].location[i].number; + switch (data->tests[testind].location[i].nextop) { + case IPT_U32_AND: + val = val & number; + break; + case IPT_U32_LEFTSH: + val = val << number; + break; + case IPT_U32_RIGHTSH: + val = val >> number; + break; + case IPT_U32_AT: + base = base + val; + pos = number; + if (base+pos+3 > end || base+pos < head) + return 0; + val = (base[pos]<<24) + (base[pos+1]<<16) + + (base[pos+2]<<8) + base[pos+3]; + break; + } + } + nvals = data->tests[testind].nvalues; + for (i=0; i < nvals; i++) { + if ((data->tests[testind].value[i].min <= val) && + (val <= data->tests[testind].value[i].max)) { + break; + } + } + if (i >= data->tests[testind].nvalues) { + /* cycles2 = get_cycles(); + printk("failed %d in %d cycles\n", testind, + cycles2-cycles1); */ + return 0; + } + } + /* cycles2 = get_cycles(); + printk("succeeded in %d cycles\n", cycles2-cycles1); */ + return 1; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_u32))) + return 0; + return 1; +} + +static struct ipt_match u32_match += { { NULL, NULL }, "u32", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_match(&u32_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&u32_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c b/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c new file mode 100644 index 00000000..c32a860a --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/ipt_web.c @@ -0,0 +1,246 @@ +/* + + web (experimental) + HTTP client request match + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2 or later. + +*/ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv4/ipt_web.h> + +MODULE_AUTHOR("Jonathan Zarate"); +MODULE_DESCRIPTION("HTTP client request match (experimental)"); +MODULE_LICENSE("GPL"); + + +// #define LOG printk +#define LOG(...) do { } while (0); + + +static int find(const char *data, const char *tail, const char *text) +{ + int n, o; + int dlen; + const char *p, *e; + + while ((data < tail) && (*data == ' ')) ++data; + while ((tail > data) && (*(tail - 1) == ' ')) --tail; + + dlen = tail - data; + +#if 0 + { + char tmp[128]; + int z; + z = sizeof(tmp) - 1; + if (z > dlen) z = dlen; + memcpy(tmp, data, z); + tmp[z] = 0; + LOG(KERN_INFO "find in '%s'\n", tmp); + } +#endif + + // 012345 + // text + // ^text + // text$ + // ^text$ + // 012345 + + while (*text) { + n = o = strlen(text); + if (*text == '^') { + --n; + if (*(text + n) == '$') { + // exact + --n; + if ((dlen == n) && (memcmp(data, text + 1, n) == 0)) { + LOG(KERN_INFO "matched %s\n", text); + return 1; + } + } + else { + // begins with + if ((dlen >= n) && (memcmp(data, text + 1, n) == 0)) { + LOG(KERN_INFO "matched %s\n", text); + return 1; + } + } + } + else if (*(text + n - 1) == '$') { + // ends with + --n; + if (memcmp(tail - n, text, n) == 0) { + LOG(KERN_INFO "matched %s\n", text); + return 1; + } + } + else { + // contains + p = data; + e = tail - n; + while (p <= e) { + if (memcmp(p, text, n) == 0) { + LOG(KERN_INFO "matched %s\n", text); + return 1; + } + ++p; + } + } + + text += o + 1; + } + return 0; +} + +static inline const char *findend(const char *data, const char *tail, int min) +{ + int n = tail - data; + if (n >= min) { + while (data < tail) { + if (*data == '\r') return data; + ++data; + } + } + return NULL; +} + +static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const void *matchinfo, int offset, const void *hdr, u_int16_t datalen, int *hotdrop) +{ + const struct ipt_web_info *info; + const struct tcphdr *tcph; + const char *data; + const char *tail; + const char *p, *q; + int doff, dlen; + + info = matchinfo; + + if (offset != 0) return info->invert; + + tcph = hdr; + doff = (tcph->doff * 4); + data = (char *)tcph + doff; + dlen = datalen - doff; + +#if 0 + printk(KERN_INFO "datalen=%u dlen=%d doff=%d\n", datalen, dlen, doff); + char tmp[16]; + memcpy(tmp, data, sizeof(tmp)); + tmp[sizeof(tmp) - 1] = 0; + printk(KERN_INFO "[%s]\n", tmp); +#endif + + // POST / HTTP/1.0$$$$ + // GET / HTTP/1.0$$$$ + // 1234567890123456789 + if (dlen < 18) return info->invert; + + // "GET " or "POST" + __u32 sig = *(__u32 *)data; + if ((sig != __constant_htonl(0x47455420)) && (sig != __constant_htonl(0x504f5354))) { + return info->invert; + } + + tail = data + dlen; + if (dlen > 1024) { + dlen = 1024; + tail = data + 1024; + } + + + // POST / HTTP/1.0$$$$ + // GET / HTTP/1.0$$$$ -- minimum + // 0123456789012345678 + // 9876543210 + if (((p = findend(data + 14, tail, 18)) == NULL) || (memcmp(p - 9, " HTTP/", 6) != 0)) + return info->invert; + +#if 0 + { + const char *qq = info->text; + while (*qq) { + printk(KERN_INFO "text=%s\n", qq); + qq += strlen(qq) + 1; + } + } +#endif + + switch (info->mode) { + case IPT_WEB_HTTP: + return !info->invert; + case IPT_WEB_HORE: + // entire request line, else host line + if (find(data + 4, p - 9, info->text)) return !info->invert; + break; + case IPT_WEB_PATH: + // left side of '?' or entire line + q = data += 4; + p -= 9; + while ((q < p) && (*q != '?')) ++q; + return find(data, q, info->text) ^ info->invert; + case IPT_WEB_QUERY: + // right side of '?' or none + q = data + 4; + p -= 9; + while ((q < p) && (*q != '?')) ++q; + if (q >= p) return info->invert; + return find(q + 1, p, info->text) ^ info->invert; + case IPT_WEB_RURI: + // entire request line + return find(data + 4, p - 9, info->text) ^ info->invert; + default: + // shutup compiler + break; + } + + // else, IPT_WEB_HOST + + while (1) { + data = p + 2; // skip previous \r\n + p = findend(data, tail, 8); // p = current line's \r + if (p == NULL) return 0; + +#if 0 + char tmp[64]; + memcpy(tmp, data, 32); + tmp[32] = 0; + printk(KERN_INFO "data=[%s]\n", tmp); +#endif + + if (memcmp(data, "Host: ", 6) == 0) + return find(data + 6, p, info->text) ^ info->invert; + } + + return !info->invert; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) +{ + return (matchsize == IPT_ALIGN(sizeof(struct ipt_web_info))); +} + + +static struct ipt_match web_match += { { NULL, NULL }, "web", &match, &checkentry, NULL, THIS_MODULE }; + +static int __init init(void) +{ +// LOG(KERN_INFO "ipt_web <" __DATE__ " " __TIME__ "> loaded\n"); + return ipt_register_match(&web_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&web_match); +} + +module_init(init); +module_exit(fini); diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c new file mode 100644 index 00000000..31ef35b9 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.c @@ -0,0 +1,1195 @@ +/* + * regcomp and regexec -- regsub and regerror are elsewhere + * @(#)regexp.c 1.3 of 18 April 87 + * + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. + * + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. + * + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. + * + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * + * Beware that some of this code is subtly aware of the way operator + * precedence is structured in regular expressions. Serious changes in + * regular-expression syntax might require a total rethink. + * + * This code was modified by Ethan Sommer to work within the kernel + * (it now uses kmalloc etc..) + * + * Modified slightly by Matthew Strait to use more modern C. + */ + +#include "regexp.h" +#include "regmagic.h" + +/* added by ethan and matt. Lets it work in both kernel and user space. +(So iptables can use it, for instance.) Yea, it goes both ways... */ +#if __KERNEL__ + #define malloc(foo) kmalloc(foo,GFP_ATOMIC) +#else + #define printk(format,args...) printf(format,##args) +#endif + +void regerror(char * s) +{ + printk("<3>Regexp: %s\n", s); + /* NOTREACHED */ +} + +/* + * The "internal use only" fields in regexp.h are present to pass info from + * compile to execute that permits the execute phase to run lots faster on + * simple cases. They are: + * + * regstart char that must begin a match; '\0' if none obvious + * reganch is the match anchored (at beginning-of-line only)? + * regmust string (pointer into program) that match must include, or NULL + * regmlen length of regmust string + * + * Regstart and reganch permit very fast decisions on suitable starting points + * for a match, cutting down the work a lot. Regmust permits fast rejection + * of lines that cannot possibly match. The regmust tests are costly enough + * that regcomp() supplies a regmust only if the r.e. contains something + * potentially expensive (at present, the only such thing detected is * or + + * at the start of the r.e., which can involve a lot of backup). Regmlen is + * supplied because the test in regexec() needs it and regcomp() is computing + * it anyway. + */ + +/* + * Structure for regexp "program". This is essentially a linear encoding + * of a nondeterministic finite-state machine (aka syntax charts or + * "railroad normal form" in parsing technology). Each node is an opcode + * plus a "next" pointer, possibly plus an operand. "Next" pointers of + * all nodes except BRANCH implement concatenation; a "next" pointer with + * a BRANCH on both ends of it is connecting two alternatives. (Here we + * have one of the subtle syntax dependencies: an individual BRANCH (as + * opposed to a collection of them) is never concatenated with anything + * because of operator precedence.) The operand of some types of node is + * a literal string; for others, it is a node leading into a sub-FSM. In + * particular, the operand of a BRANCH node is the first node of the branch. + * (NB this is *not* a tree structure: the tail of the branch connects + * to the thing following the set of BRANCHes.) The opcodes are: + */ + +/* definition number opnd? meaning */ +#define END 0 /* no End of program. */ +#define BOL 1 /* no Match "" at beginning of line. */ +#define EOL 2 /* no Match "" at end of line. */ +#define ANY 3 /* no Match any one character. */ +#define ANYOF 4 /* str Match any character in this string. */ +#define ANYBUT 5 /* str Match any character not in this string. */ +#define BRANCH 6 /* node Match this alternative, or the next... */ +#define BACK 7 /* no Match "", "next" ptr points backward. */ +#define EXACTLY 8 /* str Match this string. */ +#define NOTHING 9 /* no Match empty string. */ +#define STAR 10 /* node Match this (simple) thing 0 or more times. */ +#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ +#define OPEN 20 /* no Mark this point in input as start of #n. */ + /* OPEN+1 is number 1, etc. */ +#define CLOSE 30 /* no Analogous to OPEN. */ + +/* + * Opcode notes: + * + * BRANCH The set of branches constituting a single choice are hooked + * together with their "next" pointers, since precedence prevents + * anything being concatenated to any individual branch. The + * "next" pointer of the last BRANCH in a choice points to the + * thing following the whole choice. This is also where the + * final "next" pointer of each individual branch points; each + * branch starts with the operand node of a BRANCH node. + * + * BACK Normal "next" pointers all implicitly point forward; BACK + * exists to make loop structures possible. + * + * STAR,PLUS '?', and complex '*' and '+', are implemented as circular + * BRANCH structures using BACK. Simple cases (one character + * per match) are implemented with STAR and PLUS for speed + * and to minimize recursive plunges. + * + * OPEN,CLOSE ...are numbered at compile time. + */ + +/* + * A node is one char of opcode followed by two chars of "next" pointer. + * "Next" pointers are stored as two 8-bit pieces, high order first. The + * value is a positive offset from the opcode of the node containing it. + * An operand, if any, simply follows the node. (Note that much of the + * code generation knows about this implicit relationship.) + * + * Using two bytes for the "next" pointer is vast overkill for most things, + * but allows patterns to get big without disasters. + */ +#define OP(p) (*(p)) +#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) +#define OPERAND(p) ((p) + 3) + +/* + * See regmagic.h for one further detail of program structure. + */ + + +/* + * Utility definitions. + */ +#ifndef CHARBITS +#define UCHARAT(p) ((int)*(unsigned char *)(p)) +#else +#define UCHARAT(p) ((int)*(p)&CHARBITS) +#endif + +#define FAIL(m) { regerror(m); return(NULL); } +#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') +#define META "^$.[()|?+*\\" + +/* + * Flags to be passed up and down. + */ +#define HASWIDTH 01 /* Known never to match null string. */ +#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ +#define SPSTART 04 /* Starts with * or +. */ +#define WORST 0 /* Worst case. */ + +/* + * Global work variables for regcomp(). + */ +static char *regparse; /* Input-scan pointer. */ +static int regnpar; /* () count. */ +static char regdummy; +static char *regcode; /* Code-emit pointer; ®dummy = don't. */ +static long regsize; /* Code size. */ + +/* + * Forward declarations for regcomp()'s friends. + */ +#ifndef STATIC +#define STATIC static +#endif +STATIC char *reg(int paren,int *flagp); +STATIC char *regbranch(int *flagp); +STATIC char *regpiece(int *flagp); +STATIC char *regatom(int *flagp); +STATIC char *regnode(char op); +STATIC char *regnext(char *p); +STATIC void regc(char b); +STATIC void reginsert(char op, char *opnd); +STATIC void regtail(char *p, char *val); +STATIC void regoptail(char *p, char *val); + + +__kernel_size_t my_strcspn(const char *s1,const char *s2) +{ + char *scan1; + char *scan2; + int count; + + count = 0; + for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) { + for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */ + if (*scan1 == *scan2++) + return(count); + count++; + } + return(count); +} + +/* + - regcomp - compile a regular expression into internal code + * + * We can't allocate space until we know how big the compiled form will be, + * but we can't compile it (and thus know how big it is) until we've got a + * place to put the code. So we cheat: we compile it twice, once with code + * generation turned off and size counting turned on, and once "for real". + * This also means that we don't allocate space until we are sure that the + * thing really will compile successfully, and we never have to move the + * code and thus invalidate pointers into it. (Note that it has to be in + * one piece because free() must be able to free it all.) + * + * Beware that the optimization-preparation code in here knows about some + * of the structure of the compiled regexp. + */ +regexp * +regcomp(char *exp,int *patternsize) +{ + register regexp *r; + register char *scan; + register char *longest; + register int len; + int flags; + /* commented out by ethan + extern char *malloc(); + */ + + if (exp == NULL) + FAIL("NULL argument"); + + /* First pass: determine size, legality. */ + regparse = exp; + regnpar = 1; + regsize = 0L; + regcode = ®dummy; + regc(MAGIC); + if (reg(0, &flags) == NULL) + return(NULL); + + /* Small enough for pointer-storage convention? */ + if (regsize >= 32767L) /* Probably could be 65535L. */ + FAIL("regexp too big"); + + /* Allocate space. */ + *patternsize=sizeof(regexp) + (unsigned)regsize; + r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); + if (r == NULL) + FAIL("out of space"); + + /* Second pass: emit code. */ + regparse = exp; + regnpar = 1; + regcode = r->program; + regc(MAGIC); + if (reg(0, &flags) == NULL) + return(NULL); + + /* Dig out information for optimizations. */ + r->regstart = '\0'; /* Worst-case defaults. */ + r->reganch = 0; + r->regmust = NULL; + r->regmlen = 0; + scan = r->program+1; /* First BRANCH. */ + if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ + scan = OPERAND(scan); + + /* Starting-point info. */ + if (OP(scan) == EXACTLY) + r->regstart = *OPERAND(scan); + else if (OP(scan) == BOL) + r->reganch++; + + /* + * If there's something expensive in the r.e., find the + * longest literal string that must appear and make it the + * regmust. Resolve ties in favor of later strings, since + * the regstart check works with the beginning of the r.e. + * and avoiding duplication strengthens checking. Not a + * strong reason, but sufficient in the absence of others. + */ + if (flags&SPSTART) { + longest = NULL; + len = 0; + for (; scan != NULL; scan = regnext(scan)) + if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { + longest = OPERAND(scan); + len = strlen(OPERAND(scan)); + } + r->regmust = longest; + r->regmlen = len; + } + } + + return(r); +} + +/* + - reg - regular expression, i.e. main body or parenthesized thing + * + * Caller must absorb opening parenthesis. + * + * Combining parenthesis handling with the base level of regular expression + * is a trifle forced, but the need to tie the tails of the branches to what + * follows makes it hard to avoid. + */ +static char * +reg(int paren, int *flagp /* Parenthesized? */ ) +{ + register char *ret; + register char *br; + register char *ender; + register int parno = 0; /* 0 makes gcc happy */ + int flags; + + *flagp = HASWIDTH; /* Tentatively. */ + + /* Make an OPEN node, if parenthesized. */ + if (paren) { + if (regnpar >= NSUBEXP) + FAIL("too many ()"); + parno = regnpar; + regnpar++; + ret = regnode(OPEN+parno); + } else + ret = NULL; + + /* Pick up the branches, linking them together. */ + br = regbranch(&flags); + if (br == NULL) + return(NULL); + if (ret != NULL) + regtail(ret, br); /* OPEN -> first. */ + else + ret = br; + if (!(flags&HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags&SPSTART; + while (*regparse == '|') { + regparse++; + br = regbranch(&flags); + if (br == NULL) + return(NULL); + regtail(ret, br); /* BRANCH -> BRANCH. */ + if (!(flags&HASWIDTH)) + *flagp &= ~HASWIDTH; + *flagp |= flags&SPSTART; + } + + /* Make a closing node, and hook it on the end. */ + ender = regnode((paren) ? CLOSE+parno : END); + regtail(ret, ender); + + /* Hook the tails of the branches to the closing node. */ + for (br = ret; br != NULL; br = regnext(br)) + regoptail(br, ender); + + /* Check for proper termination. */ + if (paren && *regparse++ != ')') { + FAIL("unmatched ()"); + } else if (!paren && *regparse != '\0') { + if (*regparse == ')') { + FAIL("unmatched ()"); + } else + FAIL("junk on end"); /* "Can't happen". */ + /* NOTREACHED */ + } + + return(ret); +} + +/* + - regbranch - one alternative of an | operator + * + * Implements the concatenation operator. + */ +static char * +regbranch(int *flagp) +{ + register char *ret; + register char *chain; + register char *latest; + int flags; + + *flagp = WORST; /* Tentatively. */ + + ret = regnode(BRANCH); + chain = NULL; + while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { + latest = regpiece(&flags); + if (latest == NULL) + return(NULL); + *flagp |= flags&HASWIDTH; + if (chain == NULL) /* First piece. */ + *flagp |= flags&SPSTART; + else + regtail(chain, latest); + chain = latest; + } + if (chain == NULL) /* Loop ran zero times. */ + (void) regnode(NOTHING); + + return(ret); +} + +/* + - regpiece - something followed by possible [*+?] + * + * Note that the branching code sequences used for ? and the general cases + * of * and + are somewhat optimized: they use the same NOTHING node as + * both the endmarker for their branch list and the body of the last branch. + * It might seem that this node could be dispensed with entirely, but the + * endmarker role is not redundant. + */ +static char * +regpiece(int *flagp) +{ + register char *ret; + register char op; + register char *next; + int flags; + + ret = regatom(&flags); + if (ret == NULL) + return(NULL); + + op = *regparse; + if (!ISMULT(op)) { + *flagp = flags; + return(ret); + } + + if (!(flags&HASWIDTH) && op != '?') + FAIL("*+ operand could be empty"); + *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); + + if (op == '*' && (flags&SIMPLE)) + reginsert(STAR, ret); + else if (op == '*') { + /* Emit x* as (x&|), where & means "self". */ + reginsert(BRANCH, ret); /* Either x */ + regoptail(ret, regnode(BACK)); /* and loop */ + regoptail(ret, ret); /* back */ + regtail(ret, regnode(BRANCH)); /* or */ + regtail(ret, regnode(NOTHING)); /* null. */ + } else if (op == '+' && (flags&SIMPLE)) + reginsert(PLUS, ret); + else if (op == '+') { + /* Emit x+ as x(&|), where & means "self". */ + next = regnode(BRANCH); /* Either */ + regtail(ret, next); + regtail(regnode(BACK), ret); /* loop back */ + regtail(next, regnode(BRANCH)); /* or */ + regtail(ret, regnode(NOTHING)); /* null. */ + } else if (op == '?') { + /* Emit x? as (x|) */ + reginsert(BRANCH, ret); /* Either x */ + regtail(ret, regnode(BRANCH)); /* or */ + next = regnode(NOTHING); /* null. */ + regtail(ret, next); + regoptail(ret, next); + } + regparse++; + if (ISMULT(*regparse)) + FAIL("nested *?+"); + + return(ret); +} + +/* + - regatom - the lowest level + * + * Optimization: gobbles an entire sequence of ordinary characters so that + * it can turn them into a single node, which is smaller to store and + * faster to run. Backslashed characters are exceptions, each becoming a + * separate node; the code is simpler that way and it's not worth fixing. + */ +static char * +regatom(int *flagp) +{ + register char *ret; + int flags; + + *flagp = WORST; /* Tentatively. */ + + switch (*regparse++) { + case '^': + ret = regnode(BOL); + break; + case '$': + ret = regnode(EOL); + break; + case '.': + ret = regnode(ANY); + *flagp |= HASWIDTH|SIMPLE; + break; + case '[': { + register int class; + register int classend; + + if (*regparse == '^') { /* Complement of range. */ + ret = regnode(ANYBUT); + regparse++; + } else + ret = regnode(ANYOF); + if (*regparse == ']' || *regparse == '-') + regc(*regparse++); + while (*regparse != '\0' && *regparse != ']') { + if (*regparse == '-') { + regparse++; + if (*regparse == ']' || *regparse == '\0') + regc('-'); + else { + class = UCHARAT(regparse-2)+1; + classend = UCHARAT(regparse); + if (class > classend+1) + FAIL("invalid [] range"); + for (; class <= classend; class++) + regc(class); + regparse++; + } + } else + regc(*regparse++); + } + regc('\0'); + if (*regparse != ']') + FAIL("unmatched []"); + regparse++; + *flagp |= HASWIDTH|SIMPLE; + } + break; + case '(': + ret = reg(1, &flags); + if (ret == NULL) + return(NULL); + *flagp |= flags&(HASWIDTH|SPSTART); + break; + case '\0': + case '|': + case ')': + FAIL("internal urp"); /* Supposed to be caught earlier. */ + break; + case '?': + case '+': + case '*': + FAIL("?+* follows nothing"); + break; + case '\\': + if (*regparse == '\0') + FAIL("trailing \\"); + ret = regnode(EXACTLY); + regc(*regparse++); + regc('\0'); + *flagp |= HASWIDTH|SIMPLE; + break; + default: { + register int len; + register char ender; + + regparse--; + len = my_strcspn((const char *)regparse, (const char *)META); + if (len <= 0) + FAIL("internal disaster"); + ender = *(regparse+len); + if (len > 1 && ISMULT(ender)) + len--; /* Back off clear of ?+* operand. */ + *flagp |= HASWIDTH; + if (len == 1) + *flagp |= SIMPLE; + ret = regnode(EXACTLY); + while (len > 0) { + regc(*regparse++); + len--; + } + regc('\0'); + } + break; + } + + return(ret); +} + +/* + - regnode - emit a node + */ +static char * /* Location. */ +regnode(char op) +{ + register char *ret; + register char *ptr; + + ret = regcode; + if (ret == ®dummy) { + regsize += 3; + return(ret); + } + + ptr = ret; + *ptr++ = op; + *ptr++ = '\0'; /* Null "next" pointer. */ + *ptr++ = '\0'; + regcode = ptr; + + return(ret); +} + +/* + - regc - emit (if appropriate) a byte of code + */ +static void +regc(char b) +{ + if (regcode != ®dummy) + *regcode++ = b; + else + regsize++; +} + +/* + - reginsert - insert an operator in front of already-emitted operand + * + * Means relocating the operand. + */ +static void +reginsert(char op, char* opnd) +{ + register char *src; + register char *dst; + register char *place; + + if (regcode == ®dummy) { + regsize += 3; + return; + } + + src = regcode; + regcode += 3; + dst = regcode; + while (src > opnd) + *--dst = *--src; + + place = opnd; /* Op node, where operand used to be. */ + *place++ = op; + *place++ = '\0'; + *place++ = '\0'; +} + +/* + - regtail - set the next-pointer at the end of a node chain + */ +static void +regtail(char *p, char *val) +{ + register char *scan; + register char *temp; + register int offset; + + if (p == ®dummy) + return; + + /* Find last node. */ + scan = p; + for (;;) { + temp = regnext(scan); + if (temp == NULL) + break; + scan = temp; + } + + if (OP(scan) == BACK) + offset = scan - val; + else + offset = val - scan; + *(scan+1) = (offset>>8)&0377; + *(scan+2) = offset&0377; +} + +/* + - regoptail - regtail on operand of first argument; nop if operandless + */ +static void +regoptail(char *p, char *val) +{ + /* "Operandless" and "op != BRANCH" are synonymous in practice. */ + if (p == NULL || p == ®dummy || OP(p) != BRANCH) + return; + regtail(OPERAND(p), val); +} + +/* + * regexec and friends + */ + +/* + * Global work variables for regexec(). + */ +static char *reginput; /* String-input pointer. */ +static char *regbol; /* Beginning of input, for ^ check. */ +static char **regstartp; /* Pointer to startp array. */ +static char **regendp; /* Ditto for endp. */ + +/* + * Forwards. + */ +STATIC int regtry(regexp *prog, char *string); +STATIC int regmatch(char *prog); +STATIC int regrepeat(char *p); + +#ifdef DEBUG +int regnarrate = 0; +void regdump(); +STATIC char *regprop(char *op); +#endif + +/* + - regexec - match a regexp against a string + */ +int +regexec(regexp *prog, char *string) +{ + register char *s; + + /* Be paranoid... */ + if (prog == NULL || string == NULL) { + printk("<3>Regexp: NULL parameter\n"); + return(0); + } + + /* Check validity of program. */ + if (UCHARAT(prog->program) != MAGIC) { + printk("<3>Regexp: corrupted program\n"); + return(0); + } + + /* If there is a "must appear" string, look for it. */ + if (prog->regmust != NULL) { + s = string; + while ((s = strchr(s, prog->regmust[0])) != NULL) { + if (strncmp(s, prog->regmust, prog->regmlen) == 0) + break; /* Found it. */ + s++; + } + if (s == NULL) /* Not present. */ + return(0); + } + + /* Mark beginning of line for ^ . */ + regbol = string; + + /* Simplest case: anchored match need be tried only once. */ + if (prog->reganch) + return(regtry(prog, string)); + + /* Messy cases: unanchored match. */ + s = string; + if (prog->regstart != '\0') + /* We know what char it must start with. */ + while ((s = strchr(s, prog->regstart)) != NULL) { + if (regtry(prog, s)) + return(1); + s++; + } + else + /* We don't -- general case. */ + do { + if (regtry(prog, s)) + return(1); + } while (*s++ != '\0'); + + /* Failure. */ + return(0); +} + +/* + - regtry - try match at specific point + */ +static int /* 0 failure, 1 success */ +regtry(regexp *prog, char *string) +{ + register int i; + register char **sp; + register char **ep; + + reginput = string; + regstartp = prog->startp; + regendp = prog->endp; + + sp = prog->startp; + ep = prog->endp; + for (i = NSUBEXP; i > 0; i--) { + *sp++ = NULL; + *ep++ = NULL; + } + if (regmatch(prog->program + 1)) { + prog->startp[0] = string; + prog->endp[0] = reginput; + return(1); + } else + return(0); +} + +/* + - regmatch - main matching routine + * + * Conceptually the strategy is simple: check to see whether the current + * node matches, call self recursively to see whether the rest matches, + * and then act accordingly. In practice we make some effort to avoid + * recursion, in particular by going through "ordinary" nodes (that don't + * need to know whether the rest of the match failed) by a loop instead of + * by recursion. + */ +static int /* 0 failure, 1 success */ +regmatch(char *prog) +{ + register char *scan = prog; /* Current node. */ + char *next; /* Next node. */ + +#ifdef DEBUG + if (scan != NULL && regnarrate) + fprintf(stderr, "%s(\n", regprop(scan)); +#endif + while (scan != NULL) { +#ifdef DEBUG + if (regnarrate) + fprintf(stderr, "%s...\n", regprop(scan)); +#endif + next = regnext(scan); + + switch (OP(scan)) { + case BOL: + if (reginput != regbol) + return(0); + break; + case EOL: + if (*reginput != '\0') + return(0); + break; + case ANY: + if (*reginput == '\0') + return(0); + reginput++; + break; + case EXACTLY: { + register int len; + register char *opnd; + + opnd = OPERAND(scan); + /* Inline the first character, for speed. */ + if (*opnd != *reginput) + return(0); + len = strlen(opnd); + if (len > 1 && strncmp(opnd, reginput, len) != 0) + return(0); + reginput += len; + } + break; + case ANYOF: + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) + return(0); + reginput++; + break; + case ANYBUT: + if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) + return(0); + reginput++; + break; + case NOTHING: + case BACK: + break; + case OPEN+1: + case OPEN+2: + case OPEN+3: + case OPEN+4: + case OPEN+5: + case OPEN+6: + case OPEN+7: + case OPEN+8: + case OPEN+9: { + register int no; + register char *save; + + no = OP(scan) - OPEN; + save = reginput; + + if (regmatch(next)) { + /* + * Don't set startp if some later + * invocation of the same parentheses + * already has. + */ + if (regstartp[no] == NULL) + regstartp[no] = save; + return(1); + } else + return(0); + } + break; + case CLOSE+1: + case CLOSE+2: + case CLOSE+3: + case CLOSE+4: + case CLOSE+5: + case CLOSE+6: + case CLOSE+7: + case CLOSE+8: + case CLOSE+9: + { + register int no; + register char *save; + + no = OP(scan) - CLOSE; + save = reginput; + + if (regmatch(next)) { + /* + * Don't set endp if some later + * invocation of the same parentheses + * already has. + */ + if (regendp[no] == NULL) + regendp[no] = save; + return(1); + } else + return(0); + } + break; + case BRANCH: { + register char *save; + + if (OP(next) != BRANCH) /* No choice. */ + next = OPERAND(scan); /* Avoid recursion. */ + else { + do { + save = reginput; + if (regmatch(OPERAND(scan))) + return(1); + reginput = save; + scan = regnext(scan); + } while (scan != NULL && OP(scan) == BRANCH); + return(0); + /* NOTREACHED */ + } + } + break; + case STAR: + case PLUS: { + register char nextch; + register int no; + register char *save; + register int min; + + /* + * Lookahead to avoid useless match attempts + * when we know what character comes next. + */ + nextch = '\0'; + if (OP(next) == EXACTLY) + nextch = *OPERAND(next); + min = (OP(scan) == STAR) ? 0 : 1; + save = reginput; + no = regrepeat(OPERAND(scan)); + while (no >= min) { + /* If it could work, try it. */ + if (nextch == '\0' || *reginput == nextch) + if (regmatch(next)) + return(1); + /* Couldn't or didn't -- back up. */ + no--; + reginput = save + no; + } + return(0); + } + break; + case END: + return(1); /* Success! */ + break; + default: + printk("<3>Regexp: memory corruption\n"); + return(0); + break; + } + + scan = next; + } + + /* + * We get here only if there's trouble -- normally "case END" is + * the terminating point. + */ + printk("<3>Regexp: corrupted pointers\n"); + return(0); +} + +/* + - regrepeat - repeatedly match something simple, report how many + */ +static int +regrepeat(char *p) +{ + register int count = 0; + register char *scan; + register char *opnd; + + scan = reginput; + opnd = OPERAND(p); + switch (OP(p)) { + case ANY: + count = strlen(scan); + scan += count; + break; + case EXACTLY: + while (*opnd == *scan) { + count++; + scan++; + } + break; + case ANYOF: + while (*scan != '\0' && strchr(opnd, *scan) != NULL) { + count++; + scan++; + } + break; + case ANYBUT: + while (*scan != '\0' && strchr(opnd, *scan) == NULL) { + count++; + scan++; + } + break; + default: /* Oh dear. Called inappropriately. */ + printk("<3>Regexp: internal foulup\n"); + count = 0; /* Best compromise. */ + break; + } + reginput = scan; + + return(count); +} + +/* + - regnext - dig the "next" pointer out of a node + */ +static char* +regnext(char *p) +{ + register int offset; + + if (p == ®dummy) + return(NULL); + + offset = NEXT(p); + if (offset == 0) + return(NULL); + + if (OP(p) == BACK) + return(p-offset); + else + return(p+offset); +} + +#ifdef DEBUG + +STATIC char *regprop(); + +/* + - regdump - dump a regexp onto stdout in vaguely comprehensible form + */ +void +regdump(regexp *r) +{ + register char *s; + register char op = EXACTLY; /* Arbitrary non-END op. */ + register char *next; + /* extern char *strchr(); */ + + + s = r->program + 1; + while (op != END) { /* While that wasn't END last time... */ + op = OP(s); + printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ + next = regnext(s); + if (next == NULL) /* Next ptr. */ + printf("(0)"); + else + printf("(%d)", (s-r->program)+(next-s)); + s += 3; + if (op == ANYOF || op == ANYBUT || op == EXACTLY) { + /* Literal string, where present. */ + while (*s != '\0') { + putchar(*s); + s++; + } + s++; + } + putchar('\n'); + } + + /* Header fields of interest. */ + if (r->regstart != '\0') + printf("start `%c' ", r->regstart); + if (r->reganch) + printf("anchored "); + if (r->regmust != NULL) + printf("must have \"%s\"", r->regmust); + printf("\n"); +} + +/* + - regprop - printable representation of opcode + */ +static char * +regprop(char *op) +{ +#define BUFLEN 50 + register char *p; + static char buf[BUFLEN]; + + strcpy(buf, ":"); + + switch (OP(op)) { + case BOL: + p = "BOL"; + break; + case EOL: + p = "EOL"; + break; + case ANY: + p = "ANY"; + break; + case ANYOF: + p = "ANYOF"; + break; + case ANYBUT: + p = "ANYBUT"; + break; + case BRANCH: + p = "BRANCH"; + break; + case EXACTLY: + p = "EXACTLY"; + break; + case NOTHING: + p = "NOTHING"; + break; + case BACK: + p = "BACK"; + break; + case END: + p = "END"; + break; + case OPEN+1: + case OPEN+2: + case OPEN+3: + case OPEN+4: + case OPEN+5: + case OPEN+6: + case OPEN+7: + case OPEN+8: + case OPEN+9: + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN); + p = NULL; + break; + case CLOSE+1: + case CLOSE+2: + case CLOSE+3: + case CLOSE+4: + case CLOSE+5: + case CLOSE+6: + case CLOSE+7: + case CLOSE+8: + case CLOSE+9: + snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE); + p = NULL; + break; + case STAR: + p = "STAR"; + break; + case PLUS: + p = "PLUS"; + break; + default: + printk("<3>Regexp: corrupted opcode\n"); + break; + } + if (p != NULL) + strncat(buf, p, BUFLEN-strlen(buf)); + return(buf); +} +#endif + + diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h new file mode 100644 index 00000000..fda9a7c4 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regexp.h @@ -0,0 +1,40 @@ +/* + * Definitions etc. for regexp(3) routines. + * + * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], + * not the System V one. + */ + +#ifndef REGEXP_H +#define REGEXP_H + +/* +http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h , +which contains a version of this library, says: + + * + * NSUBEXP must be at least 10, and no greater than 117 or the parser + * will not work properly. + * + +However, it looks rather like this library is limited to 10. If you think +otherwise, let us know. +*/ + +#define NSUBEXP 10 +typedef struct regexp { + char *startp[NSUBEXP]; + char *endp[NSUBEXP]; + char regstart; /* Internal use only. */ + char reganch; /* Internal use only. */ + char *regmust; /* Internal use only. */ + int regmlen; /* Internal use only. */ + char program[1]; /* Unwarranted chumminess with compiler. */ +} regexp; + +regexp * regcomp(char *exp, int *patternsize); +int regexec(regexp *prog, char *string); +void regsub(regexp *prog, char *source, char *dest); +void regerror(char *s); + +#endif diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h b/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h new file mode 100644 index 00000000..5acf4478 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regmagic.h @@ -0,0 +1,5 @@ +/* + * The first byte of the regexp internal "program" is actually this magic + * number; the start node begins in the second byte. + */ +#define MAGIC 0234 diff --git a/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c b/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c new file mode 100644 index 00000000..339631f0 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/regexp/regsub.c @@ -0,0 +1,95 @@ +/* + * regsub + * @(#)regsub.c 1.3 of 2 April 86 + * + * Copyright (c) 1986 by University of Toronto. + * Written by Henry Spencer. Not derived from licensed software. + * + * Permission is granted to anyone to use this software for any + * purpose on any computer system, and to redistribute it freely, + * subject to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of + * this software, no matter how awful, even if they arise + * from defects in it. + * + * 2. The origin of this software must not be misrepresented, either + * by explicit claim or by omission. + * + * 3. Altered versions must be plainly marked as such, and must not + * be misrepresented as being the original software. + * + * + * This code was modified by Ethan Sommer to work within the kernel + * (it now uses kmalloc etc..) + * + */ +#include "regexp.h" +#include "regmagic.h" +#include <linux/string.h> + + +#ifndef CHARBITS +#define UCHARAT(p) ((int)*(unsigned char *)(p)) +#else +#define UCHARAT(p) ((int)*(p)&CHARBITS) +#endif + +#if 0 +//void regerror(char * s) +//{ +// printk("regexp(3): %s", s); +// /* NOTREACHED */ +//} +#endif + +/* + - regsub - perform substitutions after a regexp match + */ +void +regsub(regexp * prog, char * source, char * dest) +{ + register char *src; + register char *dst; + register char c; + register int no; + register int len; + + /* Not necessary and gcc doesn't like it -MLS */ + /*extern char *strncpy();*/ + + if (prog == NULL || source == NULL || dest == NULL) { + regerror("NULL parm to regsub"); + return; + } + if (UCHARAT(prog->program) != MAGIC) { + regerror("damaged regexp fed to regsub"); + return; + } + + src = source; + dst = dest; + while ((c = *src++) != '\0') { + if (c == '&') + no = 0; + else if (c == '\\' && '0' <= *src && *src <= '9') + no = *src++ - '0'; + else + no = -1; + + if (no < 0) { /* Ordinary character. */ + if (c == '\\' && (*src == '\\' || *src == '&')) + c = *src++; + *dst++ = c; + } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { + len = prog->endp[no] - prog->startp[no]; + (void) strncpy(dst, prog->startp[no], len); + dst += len; + if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ + regerror("damaged match string"); + return; + } + } + } + *dst++ = '\0'; +} diff --git a/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c b/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c new file mode 100644 index 00000000..a84cab09 --- /dev/null +++ b/release/src/linux/linux/net/ipv4/netfilter/tomato_ct.c @@ -0,0 +1,181 @@ +/* + + tomato_ct.c + Copyright (C) 2006 Jonathan Zarate + + Licensed under GNU GPL v2. + +*/ +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> + +// #define TEST_HASHDIST + + +#ifdef TEST_HASHDIST +static int hashdist_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +{ + struct list_head *h; + struct list_head *e; + int i; + int n; + int count; + char *buf; + int max; + + // do this the easy way... + max = ip_conntrack_htable_size * sizeof("12345\t12345\n"); + buf = kmalloc(max + 1, GFP_KERNEL); + if (buf == NULL) return 0; + + n = 0; + max -= sizeof("12345\t12345\n"); + + READ_LOCK(&ip_conntrack_lock); + + for (i = 0; i < ip_conntrack_htable_size; ++i) { + count = 0; + h = &ip_conntrack_hash[i]; + if (h) { + e = h; + while (e->next != h) { + ++count; + e = e->next; + } + } + + n += sprintf(buf + n, "%d\t%d\n", i, count); + if (n > max) { + printk("hashdist: %d > %d\n", n, max); + break; + } + } + + READ_UNLOCK(&ip_conntrack_lock); + + if (offset < n) { + n = n - offset; + if (n > length) { + n = length; + *eof = 0; + } + else { + *eof = 1; + } + memcpy(buffer, buf + offset, n); + *start = buffer; + } + else { + n = 0; + *eof = 1; + } + + kfree(buf); + return n; +} +#endif + + +static void interate_all(void (*func)(struct ip_conntrack *, unsigned long), unsigned long data) +{ + int i; + struct list_head *h; + struct list_head *e; + + WRITE_LOCK(&ip_conntrack_lock); + for (i = 0; i < ip_conntrack_htable_size; ++i) { + h = &ip_conntrack_hash[i]; + if (h) { + e = h; + while (e->next != h) { + e = e->next; + func(((struct ip_conntrack_tuple_hash *)e)->ctrack, data); + } + } + } + WRITE_UNLOCK(&ip_conntrack_lock); +} + +static void expireearly(struct ip_conntrack *ct, unsigned long data) +{ + if (ct->timeout.expires > data) { + if (del_timer(&ct->timeout)) { + ct->timeout.expires = data; + add_timer(&ct->timeout); + } + } +} + +static int expireearly_write(struct file *file, const char *buffer, unsigned long length, void *data) +{ + char s[8]; + unsigned long n; + + if ((length > 0) && (length < 6)) { + memcpy(s, buffer, length); + s[length] = 0; + n = simple_strtoul(s, NULL, 10); + if (n < 10) n = 10; + else if (n > 86400) n = 86400; + + interate_all(expireearly, jiffies + (n * HZ)); + } + +/* + if ((length > 0) && (buffer[0] == '1')) { + interate_all(expireearly, jiffies + (20 * HZ)); + } +*/ + + return length; +} + + +static void clearmarks(struct ip_conntrack *ct, unsigned long data) +{ + ct->mark = 0; +} + +static int clearmarks_write(struct file *file, const char *buffer, unsigned long length, void *data) +{ + if ((length > 0) && (buffer[0] == '1')) { + interate_all(clearmarks, 0); + } + return length; +} + +static int __init init(void) +{ + struct proc_dir_entry *p; + + printk(__FILE__ " [" __DATE__ " " __TIME__ "]\n"); + +#ifdef TEST_HASHDIST + p = create_proc_entry("hash_dist", 0400, proc_net); + if (p) p->read_proc = hashdist_read; +#endif + + p = create_proc_entry("expire_early", 0200, proc_net); + if (p) p->write_proc = expireearly_write; + + p = create_proc_entry("clear_marks", 0200, proc_net); + if (p) p->write_proc = clearmarks_write; +
+ return 0; +} + +static void __exit fini(void) +{ +#ifdef TEST_HASHDIST + remove_proc_entry("hash_dist", proc_net); +#endif + remove_proc_entry("expire_early", proc_net); + remove_proc_entry("clear_marks", proc_net); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); diff --git a/release/src/linux/linux/net/ipv4/route.c b/release/src/linux/linux/net/ipv4/route.c index dfae0871..f3cf20df 100644 --- a/release/src/linux/linux/net/ipv4/route.c +++ b/release/src/linux/linux/net/ipv4/route.c @@ -2465,6 +2465,7 @@ void __init ip_rt_init(void) panic("IP: failed to allocate ip_dst_cache\n"); goal = num_physpages >> (26 - PAGE_SHIFT); +// goal = num_physpages >> (21 - PAGE_SHIFT); for (order = 0; (1UL << order) < goal; order++) /* NOTHING */; @@ -2494,9 +2495,18 @@ void __init ip_rt_init(void) rt_hash_table[i].chain = NULL; } +// ip_rt_max_size = (rt_hash_mask + 1) * 2; +// ipv4_dst_ops.gc_thresh = (ip_rt_max_size / 4); + ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); ip_rt_max_size = (rt_hash_mask + 1) * 16; +// printk("gc_thresh=%d\n", ipv4_dst_ops.gc_thresh); +// printk("ip_rt_max_size=%d\n", ip_rt_max_size); +// printk("rt_hash_mask=%d\n", rt_hash_mask); +// printk("goal=%d\n", goal); + + devinet_init(); ip_fib_init(); diff --git a/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c b/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c index 1f4081a9..7fe16445 100644 --- a/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c +++ b/release/src/linux/linux/net/ipv4/sysctl_net_ipv4.c @@ -221,6 +221,18 @@ ctl_table ipv4_table[] = { &sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_TCP_TW_REUSE, "tcp_tw_reuse", &sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_VEGAS, "tcp_vegas_cong_avoid", + &sysctl_tcp_vegas_cong_avoid, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_TCP_VEGAS_ALPHA, "tcp_vegas_alpha", + &sysctl_tcp_vegas_alpha, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_TCP_VEGAS_BETA, "tcp_vegas_beta", + &sysctl_tcp_vegas_beta, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_TCP_VEGAS_GAMMA, "tcp_vegas_gamma", + &sysctl_tcp_vegas_gamma, sizeof(int), 0644, NULL, + &proc_dointvec}, {0} }; diff --git a/release/src/linux/linux/net/ipv4/tcp_input.c b/release/src/linux/linux/net/ipv4/tcp_input.c index 8c99dd52..243e2991 100644 --- a/release/src/linux/linux/net/ipv4/tcp_input.c +++ b/release/src/linux/linux/net/ipv4/tcp_input.c @@ -87,6 +87,16 @@ int sysctl_tcp_stdurg = 0; int sysctl_tcp_rfc1337 = 0; int sysctl_tcp_max_orphans = NR_FILE; +int sysctl_tcp_vegas_cong_avoid = 0; + +/* Default values of the Vegas variables, in fixed-point representation + * with V_PARAM_SHIFT bits to the right of the binary point. + */ +#define V_PARAM_SHIFT 1 +int sysctl_tcp_vegas_alpha = 1<<V_PARAM_SHIFT; +int sysctl_tcp_vegas_beta = 3<<V_PARAM_SHIFT; +int sysctl_tcp_vegas_gamma = 1<<V_PARAM_SHIFT; + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -399,6 +409,42 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b tcp_grow_window(sk, tp, skb); } +/* Set up a new TCP connection, depending on whether it should be + * using Vegas or not. + */ +void tcp_vegas_init(struct tcp_opt *tp) +{ + if (sysctl_tcp_vegas_cong_avoid) { + tp->vegas.do_vegas = 1; + tp->vegas.baseRTT = 0x7fffffff; + tcp_vegas_enable(tp); + } else + tcp_vegas_disable(tp); +} + +/* Do RTT sampling needed for Vegas. + * Basically we: + * o min-filter RTT samples from within an RTT to get the current + * propagation delay + queuing delay (we are min-filtering to try to + * avoid the effects of delayed ACKs) + * o min-filter RTT samples from a much longer window (forever for now) + * to find the propagation delay (baseRTT) + */ +static inline void vegas_rtt_calc(struct tcp_opt *tp, __u32 rtt) +{ + __u32 vrtt = rtt + 1; /* Never allow zero rtt or baseRTT */ + + /* Filter to find propagation delay: */ + if (vrtt < tp->vegas.baseRTT) + tp->vegas.baseRTT = vrtt; + + /* Find the min RTT during the last RTT to find + * the current prop. delay + queuing delay: + */ + tp->vegas.minRTT = min(tp->vegas.minRTT, vrtt); + tp->vegas.cntRTT++; +} + /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge @@ -412,6 +458,9 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt) { long m = mrtt; /* RTT */ + if (tcp_vegas_enabled(tp)) + vegas_rtt_calc(tp, mrtt); + /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. @@ -1013,7 +1062,7 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_sync_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tp->ca_state = TCP_CA_Loss; + tcp_set_ca_state(tp, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); } @@ -1375,7 +1424,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp) tcp_moderate_cwnd(tp); return 1; } - tp->ca_state = TCP_CA_Open; + tcp_set_ca_state(tp, TCP_CA_Open); return 0; } @@ -1435,7 +1484,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp) tp->retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) - tp->ca_state = TCP_CA_Open; + tcp_set_ca_state(tp, TCP_CA_Open); return 1; } return 0; @@ -1466,7 +1515,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) state = TCP_CA_Disorder; if (tp->ca_state != state) { - tp->ca_state = state; + tcp_set_ca_state(tp, state); tp->high_seq = tp->snd_nxt; } tcp_moderate_cwnd(tp); @@ -1540,7 +1589,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { tcp_complete_cwr(tp); - tp->ca_state = TCP_CA_Open; + tcp_set_ca_state(tp, TCP_CA_Open); } break; @@ -1551,7 +1600,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * catching for all duplicate ACKs. */ IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; - tp->ca_state = TCP_CA_Open; + tcp_set_ca_state(tp, TCP_CA_Open); } break; @@ -1625,7 +1674,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } tp->snd_cwnd_cnt = 0; - tp->ca_state = TCP_CA_Recovery; + tcp_set_ca_state(tp, TCP_CA_Recovery); } if (is_dupack || tcp_head_timedout(sk, tp)) @@ -1696,7 +1745,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) +static __inline__ void reno_cong_avoid(struct tcp_opt *tp) { if (tp->snd_cwnd <= tp->snd_ssthresh) { /* In "safe" area, increase. */ @@ -1716,6 +1765,236 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } +/* This is based on the congestion detection/avoidance scheme described in + * Lawrence S. Brakmo and Larry L. Peterson. + * "TCP Vegas: End to end congestion avoidance on a global internet." + * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480, + * October 1995. Available from: + * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps + * + * See http://www.cs.arizona.edu/xkernel/ for their implementation. + * The main aspects that distinguish this implementation from the + * Arizona Vegas implementation are: + * o We do not change the loss detection or recovery mechanisms of + * Linux in any way. Linux already recovers from losses quite well, + * using fine-grained timers, NewReno, and FACK. + * o To avoid the performance penalty imposed by increasing cwnd + * only every-other RTT during slow start, we increase during + * every RTT during slow start, just like Reno. + * o Largely to allow continuous cwnd growth during slow start, + * we use the rate at which ACKs come back as the "actual" + * rate, rather than the rate at which data is sent. + * o To speed convergence to the right rate, we set the cwnd + * to achieve the right ("actual") rate when we exit slow start. + * o To filter out the noise caused by delayed ACKs, we use the + * minimum RTT sample observed during the last RTT to calculate + * the actual rate. + * o When the sender re-starts from idle, it waits until it has + * received ACKs for an entire flight of new data before making + * a cwnd adjustment decision. The original Vegas implementation + * assumed senders never went idle. + */ +static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) +{ + /* The key players are v_beg_snd_una and v_beg_snd_nxt. + * + * These are so named because they represent the approximate values + * of snd_una and snd_nxt at the beginning of the current RTT. More + * precisely, they represent the amount of data sent during the RTT. + * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt, + * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding + * bytes of data have been ACKed during the course of the RTT, giving + * an "actual" rate of: + * + * (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration) + * + * Unfortunately, v_beg_snd_una is not exactly equal to snd_una, + * because delayed ACKs can cover more than one segment, so they + * don't line up nicely with the boundaries of RTTs. + * + * Another unfortunate fact of life is that delayed ACKs delay the + * advance of the left edge of our send window, so that the number + * of bytes we send in an RTT is often less than our cwnd will allow. + * So we keep track of our cwnd separately, in v_beg_snd_cwnd. + */ + + if (after(ack, tp->vegas.beg_snd_nxt)) { + /* Do the Vegas once-per-RTT cwnd adjustment. */ + u32 old_wnd, old_snd_cwnd; + + + /* Here old_wnd is essentially the window of data that was + * sent during the previous RTT, and has all + * been acknowledged in the course of the RTT that ended + * with the ACK we just received. Likewise, old_snd_cwnd + * is the cwnd during the previous RTT. + */ + old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) / + tp->mss_cache; + old_snd_cwnd = tp->vegas.beg_snd_cwnd; + + /* Save the extent of the current window so we can use this + * at the end of the next RTT. + */ + tp->vegas.beg_snd_una = tp->vegas.beg_snd_nxt; + tp->vegas.beg_snd_nxt = tp->snd_nxt; + tp->vegas.beg_snd_cwnd = tp->snd_cwnd; + + /* Take into account the current RTT sample too, to + * decrease the impact of delayed acks. This double counts + * this sample since we count it for the next window as well, + * but that's not too awful, since we're taking the min, + * rather than averaging. + */ + vegas_rtt_calc(tp, seq_rtt); + + /* We do the Vegas calculations only if we got enough RTT + * samples that we can be reasonably sure that we got + * at least one RTT sample that wasn't from a delayed ACK. + * If we only had 2 samples total, + * then that means we're getting only 1 ACK per RTT, which + * means they're almost certainly delayed ACKs. + * If we have 3 samples, we should be OK. + */ + + if (tp->vegas.cntRTT <= 2) { + /* We don't have enough RTT samples to do the Vegas + * calculation, so we'll behave like Reno. + */ + if (tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd++; + } else { + u32 rtt, target_cwnd, diff; + + /* We have enough RTT samples, so, using the Vegas + * algorithm, we determine if we should increase or + * decrease cwnd, and by how much. + */ + + /* Pluck out the RTT we are using for the Vegas + * calculations. This is the min RTT seen during the + * last RTT. Taking the min filters out the effects + * of delayed ACKs, at the cost of noticing congestion + * a bit later. + */ + rtt = tp->vegas.minRTT; + + /* Calculate the cwnd we should have, if we weren't + * going too fast. + * + * This is: + * (actual rate in segments) * baseRTT + * We keep it as a fixed point number with + * V_PARAM_SHIFT bits to the right of the binary point. + */ + target_cwnd = ((old_wnd * tp->vegas.baseRTT) + << V_PARAM_SHIFT) / rtt; + + /* Calculate the difference between the window we had, + * and the window we would like to have. This quantity + * is the "Diff" from the Arizona Vegas papers. + * + * Again, this is a fixed point number with + * V_PARAM_SHIFT bits to the right of the binary + * point. + */ + diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; + + if (tp->snd_cwnd < tp->snd_ssthresh) { + /* Slow start. */ + if (diff > sysctl_tcp_vegas_gamma) { + /* Going too fast. Time to slow down + * and switch to congestion avoidance. + */ + tp->snd_ssthresh = 2; + + /* Set cwnd to match the actual rate + * exactly: + * cwnd = (actual rate) * baseRTT + * Then we add 1 because the integer + * truncation robs us of full link + * utilization. + */ + tp->snd_cwnd = min(tp->snd_cwnd, + (target_cwnd >> + V_PARAM_SHIFT)+1); + + } + } else { + /* Congestion avoidance. */ + u32 next_snd_cwnd; + + /* Figure out where we would like cwnd + * to be. + */ + if (diff > sysctl_tcp_vegas_beta) { + /* The old window was too fast, so + * we slow down. + */ + next_snd_cwnd = old_snd_cwnd - 1; + } else if (diff < sysctl_tcp_vegas_alpha) { + /* We don't have enough extra packets + * in the network, so speed up. + */ + next_snd_cwnd = old_snd_cwnd + 1; + } else { + /* Sending just as fast as we + * should be. + */ + next_snd_cwnd = old_snd_cwnd; + } + + /* Adjust cwnd upward or downward, toward the + * desired value. + */ + if (next_snd_cwnd > tp->snd_cwnd) + tp->snd_cwnd++; + else if (next_snd_cwnd < tp->snd_cwnd) + tp->snd_cwnd--; + } + } + + /* Wipe the slate clean for the next RTT. */ + tp->vegas.cntRTT = 0; + tp->vegas.minRTT = 0x7fffffff; + } + + /* The following code is executed for every ack we receive, + * except for conditions checked in should_advance_cwnd() + * before the call to tcp_cong_avoid(). Mainly this means that + * we only execute this code if the ack actually acked some + * data. + */ + + /* If we are in slow start, increase our cwnd in response to this ACK. + * (If we are not in slow start then we are in congestion avoidance, + * and adjust our congestion window only once per RTT. See the code + * above.) + */ + if (tp->snd_cwnd <= tp->snd_ssthresh) + tp->snd_cwnd++; + + /* to keep cwnd from growing without bound */ + tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); + + /* Make sure that we are never so timid as to reduce our cwnd below + * 2 MSS. + * + * Going below 2 MSS would risk huge delayed ACKs from our receiver. + */ + tp->snd_cwnd = max(tp->snd_cwnd, 2U); + + tp->snd_cwnd_stamp = tcp_time_stamp; +} + +static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) +{ + if (tcp_vegas_enabled(tp)) + vegas_cong_avoid(tp, ack, seq_rtt); + else + reno_cong_avoid(tp); +} + /* Restart timer after forward progress on connection. * RFC2988 recommends to restart timer to now+rto. */ @@ -1730,7 +2009,7 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) } /* Remove acknowledged frames from the retransmission queue. */ -static int tcp_clean_rtx_queue(struct sock *sk) +static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; @@ -1813,6 +2092,7 @@ static int tcp_clean_rtx_queue(struct sock *sk) } } #endif + *seq_rtt_p = seq_rtt; return acked; } @@ -1900,6 +2180,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; u32 prior_in_flight; + s32 seq_rtt; int prior_packets; /* If the ack is newer than sent or older than previous acks @@ -1947,17 +2228,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) prior_in_flight = tcp_packets_in_flight(tp); /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk); + flag |= tcp_clean_rtx_queue(sk, &seq_rtt); if (tcp_ack_is_dubious(tp, flag)) { /* Advanve CWND, if state allows this. */ - if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd && + if ((flag&FLAG_DATA_ACKED) && + (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd) && tcp_may_raise_cwnd(tp, flag)) - tcp_cong_avoid(tp); + tcp_cong_avoid(tp, ack, seq_rtt); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { - if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd) - tcp_cong_avoid(tp); + if ((flag & FLAG_DATA_ACKED) && + (tcp_vegas_enabled(tp) || prior_in_flight >= tp->snd_cwnd)) + tcp_cong_avoid(tp, ack, seq_rtt); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) diff --git a/release/src/linux/linux/net/ipv4/tcp_minisocks.c b/release/src/linux/linux/net/ipv4/tcp_minisocks.c index b69cc32c..6fdb7681 100644 --- a/release/src/linux/linux/net/ipv4/tcp_minisocks.c +++ b/release/src/linux/linux/net/ipv4/tcp_minisocks.c @@ -715,7 +715,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->snd_cwnd = 2; newtp->snd_cwnd_cnt = 0; - newtp->ca_state = TCP_CA_Open; + tcp_set_ca_state(newtp, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); newtp->send_head = NULL; @@ -783,6 +783,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); + tcp_vegas_init(newtp); TCP_INC_STATS_BH(TcpPassiveOpens); } return newsk; diff --git a/release/src/linux/linux/net/ipv4/tcp_output.c b/release/src/linux/linux/net/ipv4/tcp_output.c index 35cbbbf7..3fd4871f 100644 --- a/release/src/linux/linux/net/ipv4/tcp_output.c +++ b/release/src/linux/linux/net/ipv4/tcp_output.c @@ -105,6 +105,9 @@ static void tcp_cwnd_restart(struct tcp_opt *tp) u32 restart_cwnd = tcp_init_cwnd(tp); u32 cwnd = tp->snd_cwnd; + if (tcp_is_vegas(tp)) + tcp_vegas_enable(tp); + tp->snd_ssthresh = tcp_current_ssthresh(tp); restart_cwnd = min(restart_cwnd, cwnd); @@ -223,6 +226,19 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED + (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)); } + + /* + * If the connection is idle and we are restarting, + * then we don't want to do any Vegas calculations + * until we get fresh RTT samples. So when we + * restart, we reset our Vegas state to a clean + * slate. After we get acks for this flight of + * packets, _then_ we can make Vegas calculations + * again. + */ + if (tcp_is_vegas(tp) && tcp_packets_in_flight(tp) == 0) + tcp_vegas_enable(tp); + th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; skb_set_owner_w(skb, sk); @@ -800,7 +816,7 @@ void tcp_simple_retransmit(struct sock *sk) tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->prior_ssthresh = 0; tp->undo_marker = 0; - tp->ca_state = TCP_CA_Loss; + tcp_set_ca_state(tp, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk); } @@ -1181,6 +1197,7 @@ static inline void tcp_connect_init(struct sock *sk) tp->window_clamp = dst->window; tp->advmss = dst->advmss; tcp_initialize_rcv_mss(sk); + tcp_vegas_init(tp); tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), @@ -1231,6 +1248,7 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; + tcp_vegas_init(tp); /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; |