From 4aca87515a5083ae0e31ce3177189fd43b6d05ac Mon Sep 17 00:00:00 2001
From: Andreas Baumann <mail@andreasbaumann.cc>
Date: Sat, 3 Jan 2015 13:58:15 +0100
Subject: patch to Vanilla Tomato 1.28

---
 release/src/linux/linux/net/sched/Config.in     |    3 +-
 release/src/linux/linux/net/sched/Makefile      |    1 +
 release/src/linux/linux/net/sched/sch_api.c     |    3 +
 release/src/linux/linux/net/sched/sch_esfq.c    |  652 ++++++++
 release/src/linux/linux/net/sched/sch_fifo.c    |   15 +-
 release/src/linux/linux/net/sched/sch_generic.c |   13 +-
 release/src/linux/linux/net/sched/sch_hfsc.c    | 1817 +++++++++++++++++++++++
 release/src/linux/linux/net/sched/sch_htb.c     |  255 ++--
 release/src/linux/linux/net/sched/sch_ingress.c |    4 -
 release/src/linux/linux/net/sched/sch_sfq.c     |    8 +-
 10 files changed, 2669 insertions(+), 102 deletions(-)
 create mode 100644 release/src/linux/linux/net/sched/sch_esfq.c
 create mode 100644 release/src/linux/linux/net/sched/sch_hfsc.c

(limited to 'release/src/linux/linux/net/sched')

diff --git a/release/src/linux/linux/net/sched/Config.in b/release/src/linux/linux/net/sched/Config.in
index 8e203456..468fdf2a 100644
--- a/release/src/linux/linux/net/sched/Config.in
+++ b/release/src/linux/linux/net/sched/Config.in
@@ -5,13 +5,14 @@ tristate '  CBQ packet scheduler' CONFIG_NET_SCH_CBQ
 tristate '  HTB packet scheduler' CONFIG_NET_SCH_HTB
 tristate '  CSZ packet scheduler' CONFIG_NET_SCH_CSZ
 #tristate '  H-PFQ packet scheduler' CONFIG_NET_SCH_HPFQ
-#tristate '  H-FSC packet scheduler' CONFIG_NET_SCH_HFCS
+tristate '  H-FSC packet scheduler' CONFIG_NET_SCH_HFSC
 if [ "$CONFIG_ATM" = "y" ]; then
    bool '  ATM pseudo-scheduler' CONFIG_NET_SCH_ATM
 fi
 tristate '  The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO
 tristate '  RED queue' CONFIG_NET_SCH_RED
 tristate '  SFQ queue' CONFIG_NET_SCH_SFQ
+tristate '  ESFQ queue' CONFIG_NET_SCH_ESFQ
 tristate '  TEQL queue' CONFIG_NET_SCH_TEQL
 tristate '  TBF queue' CONFIG_NET_SCH_TBF
 tristate '  GRED queue' CONFIG_NET_SCH_GRED
diff --git a/release/src/linux/linux/net/sched/Makefile b/release/src/linux/linux/net/sched/Makefile
index e48e5c3e..49cf71e7 100644
--- a/release/src/linux/linux/net/sched/Makefile
+++ b/release/src/linux/linux/net/sched/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_NET_SCH_HPFQ)	+= sch_hpfq.o
 obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
 obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
+obj-$(CONFIG_NET_SCH_ESFQ)	+= sch_esfq.o
 obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
 obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
 obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
diff --git a/release/src/linux/linux/net/sched/sch_api.c b/release/src/linux/linux/net/sched/sch_api.c
index a5d8945e..ae384433 100644
--- a/release/src/linux/linux/net/sched/sch_api.c
+++ b/release/src/linux/linux/net/sched/sch_api.c
@@ -1232,6 +1232,9 @@ int __init pktsched_init(void)
 #ifdef CONFIG_NET_SCH_SFQ
 	INIT_QDISC(sfq);
 #endif
+#ifdef CONFIG_NET_SCH_ESFQ
+	INIT_QDISC(esfq);
+#endif
 #ifdef CONFIG_NET_SCH_TBF
 	INIT_QDISC(tbf);
 #endif
diff --git a/release/src/linux/linux/net/sched/sch_esfq.c b/release/src/linux/linux/net/sched/sch_esfq.c
new file mode 100644
index 00000000..26640f18
--- /dev/null
+++ b/release/src/linux/linux/net/sched/sch_esfq.c
@@ -0,0 +1,652 @@
+/*
+ * net/sched/sch_esfq.c	Extended Stochastic Fairness Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:	Alexander Atanasov, <alex@ssi.bg>
+ *		Added dynamic depth,limit,divisor,hash_kind options.
+ *		Added dst and src hashes.
+ *
+ * 		Alexander Clouter, <alex@digriz.org.uk>
+ *		Ported ESFQ to Linux 2.6.
+ *
+ * 		Corey Hickey, <bugfood-c@fatooh.org>
+ *		Maintenance of the Linux 2.6 port.
+ *		Added fwmark hash (thanks to Robert Kurjata)
+ *		Added direct hashing for src, dst, and fwmark.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/*	Stochastic Fairness Queuing algorithm.
+	For more comments look at sch_sfq.c.
+	The difference is that you can change limit, depth,
+	hash table size and choose 7 hash types.
+
+	classic:	same as in sch_sfq.c
+	dst:		destination IP address
+	src:		source IP address
+	fwmark:         netfilter mark value
+	dst_direct:
+	src_direct:
+	fwmark_direct:  direct hashing of the above sources
+
+	TODO:
+		make sfq_change work.
+*/
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned int esfq_index;
+
+struct esfq_head
+{
+	esfq_index	next;
+	esfq_index	prev;
+};
+
+struct esfq_sched_data
+{
+/* Parameters */
+	int		perturb_period;
+	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	int		limit;
+	unsigned	depth;
+	unsigned	hash_divisor;
+	unsigned	hash_kind;
+/* Variables */
+	struct timer_list perturb_timer;
+	int		perturbation;
+	esfq_index	tail;		/* Index of current slot in round */
+	esfq_index	max_depth;	/* Maximal depth */
+
+	esfq_index	*ht;			/* Hash table */
+	esfq_index	*next;			/* Active slots link */
+	short		*allot;			/* Current allotment per slot */
+	unsigned short	*hash;			/* Hash value indexed by slots */
+	struct sk_buff_head	*qs;		/* Slot queue */
+	struct esfq_head	*dep;		/* Linked list of slots, indexed by depth */
+	unsigned	dyn_min;	/* For dynamic divisor adjustment; minimum value seen */
+	unsigned	dyn_max;	/*                                 maximum value seen */
+	unsigned	dyn_range;	/*	        		   saved range */
+};
+
+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
+{
+	int pert = q->perturbation;
+
+	if (pert)
+		h = (h<<pert) ^ (h>>(0x1F - pert));
+
+	h = ntohl(h) * 2654435761UL;
+	return h & (q->hash_divisor-1);
+}
+
+/* Hash input values directly into the "nearest" slot, taking into account the
+ * range of input values seen. This is most useful when the hash table is at
+ * least as large as the range of possible values. */
+static __inline__ unsigned esfq_hash_direct(struct esfq_sched_data *q, u32 h)
+{
+	/* adjust minimum and maximum */
+	if (h < q->dyn_min || h > q->dyn_max) {
+		q->dyn_min = h < q->dyn_min ? h : q->dyn_min;
+		q->dyn_max = h > q->dyn_max ? h : q->dyn_max;
+
+		/* find new range */
+		if ((q->dyn_range = q->dyn_max - q->dyn_min) >= q->hash_divisor)
+			printk(KERN_WARNING "ESFQ: (direct hash) Input range %u is larger than hash "
+					"table. See ESFQ README for details.\n", q->dyn_range);
+	}
+
+	/* hash input values into slot numbers */
+	if (q->dyn_min == q->dyn_max)
+		return 0; /* only one value seen; avoid division by 0 */
+	else
+		return (h - q->dyn_min) * (q->hash_divisor - 1) / q->dyn_range;
+}
+
+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
+{
+	int pert = q->perturbation;
+
+	/* Have we any rotation primitives? If not, WHY? */
+	h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+	h ^= h>>10;
+	return h & (q->hash_divisor-1);
+}
+
+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
+{
+	u32 h, h2;
+	u32 hs;
+	u32 nfm;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+	{
+		struct iphdr *iph = skb->nh.iph;
+		h = iph->daddr;
+		hs = iph->saddr;
+		nfm = skb->nfmark;
+		h2 = hs^iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    (iph->protocol == IPPROTO_TCP ||
+		     iph->protocol == IPPROTO_UDP ||
+		     iph->protocol == IPPROTO_SCTP ||
+		     iph->protocol == IPPROTO_DCCP ||
+		     iph->protocol == IPPROTO_ESP))
+			h2 ^= *(((u32*)iph) + iph->ihl);
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6):
+	{
+		struct ipv6hdr *iph = skb->nh.ipv6h;
+		h = iph->daddr.s6_addr32[3];
+		hs = iph->saddr.s6_addr32[3];
+		nfm = skb->nfmark;
+		h2 = hs^iph->nexthdr;
+		if (iph->nexthdr == IPPROTO_TCP ||
+		    iph->nexthdr == IPPROTO_UDP ||
+		    iph->nexthdr == IPPROTO_SCTP ||
+		    iph->nexthdr == IPPROTO_DCCP ||
+		    iph->nexthdr == IPPROTO_ESP)
+			h2 ^= *(u32*)&iph[1];
+		break;
+	}
+	default:
+		h = (u32)(unsigned long)skb->dst;
+		hs = (u32)(unsigned long)skb->sk;
+		nfm = skb->nfmark;
+		h2 = hs^skb->protocol;
+	}
+	switch(q->hash_kind)
+	{
+	case TCA_SFQ_HASH_CLASSIC:
+		return esfq_fold_hash_classic(q, h, h2);
+	case TCA_SFQ_HASH_DST:
+		return esfq_hash_u32(q,h);
+	case TCA_SFQ_HASH_DSTDIR:
+		return esfq_hash_direct(q, ntohl(h));
+	case TCA_SFQ_HASH_SRC:
+		return esfq_hash_u32(q,hs);
+	case TCA_SFQ_HASH_SRCDIR:
+		return esfq_hash_direct(q, ntohl(hs));
+#ifdef CONFIG_NETFILTER
+	case TCA_SFQ_HASH_FWMARK:
+		return esfq_hash_u32(q,nfm);
+	case TCA_SFQ_HASH_FWMARKDIR:
+		return esfq_hash_direct(q,nfm);
+#endif
+	default:
+		if (net_ratelimit())
+			printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
+	}
+	return esfq_fold_hash_classic(q, h, h2);
+}
+
+static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
+{
+	esfq_index p, n;
+	int d = q->qs[x].qlen + q->depth;
+
+	p = d;
+	n = q->dep[d].next;
+	q->dep[x].next = n;
+	q->dep[x].prev = p;
+	q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
+{
+	esfq_index p, n;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+
+	if (n == p && q->max_depth == q->qs[x].qlen + 1)
+		q->max_depth--;
+
+	esfq_link(q, x);
+}
+
+static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
+{
+	esfq_index p, n;
+	int d;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+	d = q->qs[x].qlen;
+	if (q->max_depth < d)
+		q->max_depth = d;
+
+	esfq_link(q, x);
+}
+
+static int esfq_drop(struct Qdisc *sch)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	esfq_index d = q->max_depth;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	/* Queue is full! Find the longest slot and
+	   drop a packet from it */
+
+	if (d > 1) {
+		esfq_index x = q->dep[d+q->depth].next;
+		skb = q->qs[x].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[x]);
+		kfree_skb(skb);
+		esfq_dec(q, x);
+		sch->q.qlen--;
+		sch->stats.drops++;
+		return len;
+	}
+
+	if (d == 1) {
+		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+		d = q->next[q->tail];
+		q->next[q->tail] = q->next[d];
+		q->allot[q->next[d]] += q->quantum;
+		skb = q->qs[d].prev;
+		len = skb->len;
+		__skb_unlink(skb, &q->qs[d]);
+		kfree_skb(skb);
+		esfq_dec(q, d);
+		sch->q.qlen--;
+		q->ht[q->hash[d]] = q->depth;
+		sch->stats.drops++;
+		return len;
+	}
+
+	return 0;
+}
+
+static int
+esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	unsigned hash = esfq_hash(q, skb);
+	unsigned depth = q->depth;
+	esfq_index x;
+
+	x = q->ht[hash];
+	if (x == depth) {
+		q->ht[hash] = x = q->dep[depth].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_tail(&q->qs[x], skb);
+	esfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == depth) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit-1) {
+		sch->stats.bytes += skb->len;
+		sch->stats.packets++;
+		return 0;
+	}
+
+	esfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+static int
+esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	unsigned hash = esfq_hash(q, skb);
+	unsigned depth = q->depth;
+	esfq_index x;
+
+	x = q->ht[hash];
+	if (x == depth) {
+		q->ht[hash] = x = q->dep[depth].next;
+		q->hash[x] = hash;
+	}
+	__skb_queue_head(&q->qs[x], skb);
+	esfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == depth) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen < q->limit - 1) {
+//		sch->stats.requeues++;
+		return 0;
+	}
+
+	sch->stats.drops++;
+	esfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+esfq_dequeue(struct Qdisc* sch)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	struct sk_buff *skb;
+	unsigned depth = q->depth;
+	esfq_index a, old_a;
+
+	/* No active slots */
+	if (q->tail == depth)
+		return NULL;
+
+	a = old_a = q->next[q->tail];
+
+	/* Grab packet */
+	skb = __skb_dequeue(&q->qs[a]);
+	esfq_dec(q, a);
+	sch->q.qlen--;
+
+	/* Is the slot empty? */
+	if (q->qs[a].qlen == 0) {
+		q->ht[q->hash[a]] = depth;
+		a = q->next[a];
+		if (a == old_a) {
+			q->tail = depth;
+			return skb;
+		}
+		q->next[q->tail] = a;
+		q->allot[a] += q->quantum;
+	} else if ((q->allot[a] -= skb->len) <= 0) {
+		q->tail = a;
+		a = q->next[a];
+		q->allot[a] += q->quantum;
+	}
+
+	return skb;
+}
+
+static void
+esfq_reset(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+
+	while ((skb = esfq_dequeue(sch)) != NULL)
+		kfree_skb(skb);
+}
+
+static void esfq_perturbation(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc*)arg;
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+
+	q->perturbation = net_random()&0x1F;
+
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	}
+}
+
+/*
+static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	struct tc_esfq_qopt *ctl = RTA_DATA(opt);
+	int old_perturb = q->perturb_period;
+
+	if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+		return -EINVAL;
+
+	sch_tree_lock(sch);
+	q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+	q->perturb_period = ctl->perturb_period*HZ;
+//	q->hash_divisor = ctl->divisor;
+//	q->tail = q->limit = q->depth = ctl->flows;
+
+	if (ctl->limit)
+		q->limit = min_t(u32, ctl->limit, q->depth);
+
+	if (ctl->hash_kind) {
+		q->hash_kind = ctl->hash_kind;
+		if (q->hash_kind !=  TCA_SFQ_HASH_CLASSIC)
+			q->perturb_period = 0;
+	}
+
+	// is sch_tree_lock enough to do this ?
+	while (sch->q.qlen >= q->limit-1)
+		esfq_drop(sch);
+
+	if (old_perturb)
+		del_timer(&q->perturb_timer);
+	if (q->perturb_period) {
+		q->perturb_timer.expires = jiffies + q->perturb_period;
+		add_timer(&q->perturb_timer);
+	} else {
+		q->perturbation = 0;
+	}
+	sch_tree_unlock(sch);
+	return 0;
+}
+*/
+
+static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	struct tc_esfq_qopt *ctl;
+	esfq_index p = ~0UL/2;
+	int i;
+
+	if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+		return -EINVAL;
+
+	init_timer(&q->perturb_timer);
+	q->perturb_timer.data = (unsigned long)sch;
+	q->perturb_timer.function = esfq_perturbation;
+	q->perturbation = 0;
+	q->hash_kind = TCA_SFQ_HASH_CLASSIC;
+	q->max_depth = 0;
+	q->dyn_min = ~0U; /* maximum value for this type */
+	q->dyn_max = 0;  /* dyn_min/dyn_max will be set properly upon first packet */
+	if (opt == NULL) {
+		q->quantum = psched_mtu(sch->dev);
+		q->perturb_period = 0;
+		q->hash_divisor = 1024;
+		q->tail = q->limit = q->depth = 128;
+
+	} else {
+		ctl = RTA_DATA(opt);
+		q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+		q->perturb_period = ctl->perturb_period*HZ;
+		q->hash_divisor = ctl->divisor ? : 1024;
+		q->tail = q->limit = q->depth = ctl->flows ? : 128;
+
+		if ( q->depth > p - 1 )
+			return -EINVAL;
+
+		if (ctl->limit)
+			q->limit = min_t(u32, ctl->limit, q->depth);
+
+		if (ctl->hash_kind) {
+			q->hash_kind = ctl->hash_kind;
+		}
+
+		if (q->perturb_period) {
+			q->perturb_timer.expires = jiffies + q->perturb_period;
+			add_timer(&q->perturb_timer);
+		}
+	}
+
+	q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
+	if (!q->ht)
+		goto err_case;
+
+	q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
+	if (!q->dep)
+		goto err_case;
+	q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
+	if (!q->next)
+		goto err_case;
+
+	q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
+	if (!q->allot)
+		goto err_case;
+	q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
+	if (!q->hash)
+		goto err_case;
+	q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
+	if (!q->qs)
+		goto err_case;
+
+	for (i=0; i< q->hash_divisor; i++)
+		q->ht[i] = q->depth;
+	for (i=0; i<q->depth; i++) {
+		skb_queue_head_init(&q->qs[i]);
+		q->dep[i+q->depth].next = i+q->depth;
+		q->dep[i+q->depth].prev = i+q->depth;
+	}
+
+	for (i=0; i<q->depth; i++)
+		esfq_link(q, i);
+	return 0;
+err_case:
+	del_timer(&q->perturb_timer);
+	if (q->ht)
+		kfree(q->ht);
+	if (q->dep)
+		kfree(q->dep);
+	if (q->next)
+		kfree(q->next);
+	if (q->allot)
+		kfree(q->allot);
+	if (q->hash)
+		kfree(q->hash);
+	if (q->qs)
+		kfree(q->qs);
+	return -ENOBUFS;
+}
+
+static void esfq_destroy(struct Qdisc *sch)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	del_timer(&q->perturb_timer);
+	if(q->ht)
+		kfree(q->ht);
+	if(q->dep)
+		kfree(q->dep);
+	if(q->next)
+		kfree(q->next);
+	if(q->allot)
+		kfree(q->allot);
+	if(q->hash)
+		kfree(q->hash);
+	if(q->qs)
+		kfree(q->qs);
+}
+
+static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+	unsigned char	 *b = skb->tail;
+	struct tc_esfq_qopt opt;
+
+	opt.quantum = q->quantum;
+	opt.perturb_period = q->perturb_period/HZ;
+
+	opt.limit = q->limit;
+	opt.divisor = q->hash_divisor;
+	opt.flows = q->depth;
+	opt.hash_kind = q->hash_kind;
+
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct Qdisc_ops esfq_qdisc_ops =
+{
+	.next		=	NULL,
+	.cl_ops		=	NULL,
+	.id		=	"esfq",
+	.priv_size	=	sizeof(struct esfq_sched_data),
+	.enqueue	=	esfq_enqueue,
+	.dequeue	=	esfq_dequeue,
+	.requeue	=	esfq_requeue,
+	.drop		=	esfq_drop,
+	.init		=	esfq_init,
+	.reset		=	esfq_reset,
+	.destroy	=	esfq_destroy,
+	.change		=	NULL, /* esfq_change - needs more work */
+	.dump		=	esfq_dump,
+//	.owner		=	THIS_MODULE,
+};
+
+static int __init esfq_module_init(void)
+{
+	return register_qdisc(&esfq_qdisc_ops);
+}
+static void __exit esfq_module_exit(void)
+{
+	unregister_qdisc(&esfq_qdisc_ops);
+}
+module_init(esfq_module_init)
+module_exit(esfq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/release/src/linux/linux/net/sched/sch_fifo.c b/release/src/linux/linux/net/sched/sch_fifo.c
index d8ce46f2..3a7741e9 100644
--- a/release/src/linux/linux/net/sched/sch_fifo.c
+++ b/release/src/linux/linux/net/sched/sch_fifo.c
@@ -46,7 +46,7 @@ bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data;
 
-	if (sch->stats.backlog <= q->limit) {
+	if (sch->stats.backlog + skb->len <= q->limit) {
 		__skb_queue_tail(&sch->q, skb);
 		sch->stats.backlog += skb->len;
 		sch->stats.bytes += skb->len;
@@ -87,9 +87,10 @@ fifo_drop(struct Qdisc* sch)
 
 	skb = __skb_dequeue_tail(&sch->q);
 	if (skb) {
-		sch->stats.backlog -= skb->len;
+		int len = skb->len;
+		sch->stats.backlog -= len;
 		kfree_skb(skb);
-		return 1;
+		return len;
 	}
 	return 0;
 }
@@ -106,7 +107,7 @@ pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct fifo_sched_data *q = (struct fifo_sched_data *)sch->data;
 
-	if (sch->q.qlen <= q->limit) {
+	if (sch->q.qlen < q->limit) {
 		__skb_queue_tail(&sch->q, skb);
 		sch->stats.bytes += skb->len;
 		sch->stats.packets++;
@@ -139,10 +140,12 @@ static int fifo_init(struct Qdisc *sch, struct rtattr *opt)
 	struct fifo_sched_data *q = (void*)sch->data;
 
 	if (opt == NULL) {
+		unsigned int limit = sch->dev->tx_queue_len ? : 1;
+
 		if (sch->ops == &bfifo_qdisc_ops)
-			q->limit = sch->dev->tx_queue_len*sch->dev->mtu;
+			q->limit = limit*sch->dev->mtu;
 		else	
-			q->limit = sch->dev->tx_queue_len;
+			q->limit = limit;
 	} else {
 		struct tc_fifo_qopt *ctl = RTA_DATA(opt);
 		if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
diff --git a/release/src/linux/linux/net/sched/sch_generic.c b/release/src/linux/linux/net/sched/sch_generic.c
index 7b0d49e7..ca30d124 100644
--- a/release/src/linux/linux/net/sched/sch_generic.c
+++ b/release/src/linux/linux/net/sched/sch_generic.c
@@ -29,6 +29,9 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+#include <linux/imq.h>
+#endif
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -79,6 +82,10 @@ int qdisc_restart(struct net_device *dev)
 	struct Qdisc *q = dev->qdisc;
 	struct sk_buff *skb;
 
+	/* BRCM: bail out if queue is null */
+	if (!q)
+		return 0;
+
 	/* Dequeue packet */
 	if ((skb = q->dequeue(q)) != NULL) {
 		if (spin_trylock(&dev->xmit_lock)) {
@@ -89,7 +96,11 @@ int qdisc_restart(struct net_device *dev)
 			spin_unlock(&dev->queue_lock);
 
 			if (!netif_queue_stopped(dev)) {
-				if (netdev_nit)
+				if (netdev_nit
+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
+				    && !(skb->imq_flags & IMQ_F_ENQUEUE)
+#endif
+				    )
 					dev_queue_xmit_nit(skb, dev);
 
 				if (dev->hard_start_xmit(skb, dev) == 0) {
diff --git a/release/src/linux/linux/net/sched/sch_hfsc.c b/release/src/linux/linux/net/sched/sch_hfsc.c
new file mode 100644
index 00000000..0b6e6d38
--- /dev/null
+++ b/release/src/linux/linux/net/sched/sch_hfsc.c
@@ -0,0 +1,1817 @@
+/*
+ * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * 2003-10-17 - Ported from altq
+ */
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+
+#define HFSC_DEBUG 0
+
+/*
+ * kernel internal service curve representation:
+ *   coordinates are given by 64 bit unsigned integers.
+ *   x-axis: unit is clock count.
+ *   y-axis: unit is byte.
+ *
+ *   The service curve parameters are converted to the internal
+ *   representation. The slope values are scaled to avoid overflow.
+ *   the inverse slope values as well as the y-projection of the 1st
+ *   segment are kept in order to to avoid 64-bit divide operations
+ *   that are expensive on 32-bit architectures.
+ */
+
+struct internal_sc
+{
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc
+{
+	u64	x;	/* current starting position on x-axis */
+	u64	y;	/* current starting position on y-axis */
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+enum hfsc_class_flags
+{
+	HFSC_RSC = 0x1,
+	HFSC_FSC = 0x2,
+	HFSC_USC = 0x4
+};
+
+struct hfsc_class
+{
+	u32		classid;	/* class id */
+	unsigned int	refcnt;		/* usage count */
+
+	struct tc_stats	stats;		/* generic statistics */
+	unsigned int	level;		/* class level in hierarchy */
+	struct tcf_proto *filter_list;	/* filter list */
+	unsigned int	filter_cnt;	/* filter count */
+
+	struct hfsc_sched *sched;	/* scheduler data */
+	struct hfsc_class *cl_parent;	/* parent class */
+	struct list_head siblings;	/* sibling classes */
+	struct list_head children;	/* child classes */
+	struct Qdisc	*qdisc;		/* leaf qdisc */
+
+	rb_node_t el_node;		/* qdisc's eligible tree member */
+	rb_root_t vt_tree;		/* active children sorted by cl_vt */
+	rb_node_t vt_node;		/* parent's vt_tree member */
+	rb_root_t cf_tree;		/* active children sorted by cl_f */
+	rb_node_t cf_node;		/* parent's cf_heap member */
+	struct list_head hlist;		/* hash list member */
+	struct list_head dlist;		/* drop list member */
+
+	u64	cl_total;		/* total work in bytes */
+	u64	cl_cumul;		/* cumulative work in bytes done by
+					   real-time criteria */
+
+	u64 	cl_d;			/* deadline*/
+	u64 	cl_e;			/* eligible time */
+	u64	cl_vt;			/* virtual time */
+	u64	cl_f;			/* time when this class will fit for
+					   link-sharing, max(myf, cfmin) */
+	u64	cl_myf;			/* my fit-time (calculated from this
+					   class's own upperlimit curve) */
+	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
+					   history dependence) */
+	u64	cl_cfmin;		/* earliest children's fit-time (used
+					   with cl_myf to obtain cl_f) */
+	u64	cl_cvtmin;		/* minimal virtual time among the
+					   children fit for link-sharing
+					   (monotonic within a period) */
+	u64	cl_vtadj;		/* intra-period cumulative vt
+					   adjustment */
+	u64	cl_vtoff;		/* inter-period cumulative vt offset */
+	u64	cl_cvtmax;		/* max child's vt in the last period */
+	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
+	u64	cl_pcvtoff;		/* parent's cvtoff at initalization
+					   time */
+
+	struct internal_sc cl_rsc;	/* internal real-time service curve */
+	struct internal_sc cl_fsc;	/* internal fair service curve */
+	struct internal_sc cl_usc;	/* internal upperlimit service curve */
+	struct runtime_sc cl_deadline;	/* deadline curve */
+	struct runtime_sc cl_eligible;	/* eligible curve */
+	struct runtime_sc cl_virtual;	/* virtual curve */
+	struct runtime_sc cl_ulimit;	/* upperlimit curve */
+
+	unsigned long	cl_flags;	/* which curves are valid */
+	unsigned long	cl_vtperiod;	/* vt period sequence number */
+	unsigned long	cl_parentperiod;/* parent's vt period sequence number*/
+	unsigned long	cl_nactive;	/* number of active children */
+};
+
+#define HFSC_HSIZE	16
+
+struct hfsc_sched
+{
+	u16	defcls;				/* default class id */
+	struct hfsc_class root;			/* root class */
+	struct list_head clhash[HFSC_HSIZE];	/* class hash */
+	rb_root_t eligible;			/* eligible tree */
+	struct list_head droplist;		/* active leaf class list (for
+						   dropping) */
+	struct sk_buff_head requeue;		/* requeued packet */
+	struct timer_list wd_timer;		/* watchdog timer */
+};
+
+/*
+ * macros
+ */
+#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
+#include <linux/time.h>
+#undef PSCHED_GET_TIME
+#define PSCHED_GET_TIME(stamp)						\
+do {									\
+	struct timeval tv;						\
+	do_gettimeofday(&tv);						\
+	(stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec;			\
+} while (0)
+#endif
+
+#if HFSC_DEBUG
+#define ASSERT(cond)							\
+do {									\
+	if (unlikely(!(cond)))						\
+		printk("assertion %s failed at %s:%i (%s)\n",		\
+		       #cond, __FILE__, __LINE__, __FUNCTION__);	\
+} while (0)
+#else
+#define ASSERT(cond)
+#endif /* HFSC_DEBUG */
+
+#define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
+
+
+/*
+ * eligible tree holds backlogged classes being sorted by their eligible times.
+ * there is one eligible tree per hfsc instance.
+ */
+
+static void
+eltree_insert(struct hfsc_class *cl)
+{
+	rb_node_t **p = &cl->sched->eligible.rb_node;
+	rb_node_t *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, el_node);
+		if (cl->cl_e >= cl1->cl_e)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->el_node, parent, p);
+	rb_insert_color(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_update(struct hfsc_class *cl)
+{
+	eltree_remove(cl);
+	eltree_insert(cl);
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_mindl(struct hfsc_sched *q, u64 cur_time)
+{
+	struct hfsc_class *p, *cl = NULL;
+	rb_node_t *n;
+
+	for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, el_node);
+		if (p->cl_e > cur_time)
+			break;
+		if (cl == NULL || p->cl_d < cl->cl_d)
+			cl = p;
+	}
+	return cl;
+}
+
+/* find the class with minimum eligible time among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_minel(struct hfsc_sched *q)
+{
+	rb_node_t *n;
+
+	n = rb_first(&q->eligible);
+	if (n == NULL)
+		return NULL;
+	return rb_entry(n, struct hfsc_class, el_node);
+}
+
+/*
+ * vttree holds holds backlogged child classes being sorted by their virtual
+ * time. each intermediate class has one vttree.
+ */
+static void
+vttree_insert(struct hfsc_class *cl)
+{
+	rb_node_t **p = &cl->cl_parent->vt_tree.rb_node;
+	rb_node_t *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, vt_node);
+		if (cl->cl_vt >= cl1->cl_vt)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->vt_node, parent, p);
+	rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_update(struct hfsc_class *cl)
+{
+	vttree_remove(cl);
+	vttree_insert(cl);
+}
+
+static inline struct hfsc_class *
+vttree_firstfit(struct hfsc_class *cl, u64 cur_time)
+{
+	struct hfsc_class *p;
+	rb_node_t *n;
+
+	for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, vt_node);
+		if (p->cl_f <= cur_time)
+			return p;
+	}
+	return NULL;
+}
+
+/*
+ * get the leaf class with the minimum vt in the hierarchy
+ */
+static struct hfsc_class *
+vttree_get_minvt(struct hfsc_class *cl, u64 cur_time)
+{
+	/* if root-class's cfmin is bigger than cur_time nothing to do */
+	if (cl->cl_cfmin > cur_time)
+		return NULL;
+
+	while (cl->level > 0) {
+		cl = vttree_firstfit(cl, cur_time);
+		if (cl == NULL)
+			return NULL;
+		/*
+		 * update parent's cl_cvtmin.
+		 */
+		if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+			cl->cl_parent->cl_cvtmin = cl->cl_vt;
+	}
+	return cl;
+}
+
+static void
+cftree_insert(struct hfsc_class *cl)
+{
+	rb_node_t **p = &cl->cl_parent->cf_tree.rb_node;
+	rb_node_t *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, cf_node);
+		if (cl->cl_f >= cl1->cl_f)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->cf_node, parent, p);
+	rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_update(struct hfsc_class *cl)
+{
+	cftree_remove(cl);
+	cftree_insert(cl);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *	m: bps
+ *	d: us
+ *  internal service curve parameters
+ *	sm: (bytes/psched_us) << SM_SHIFT
+ *	ism: (psched_us/byte) << ISM_SHIFT
+ *	dx: psched_us
+ *
+ * Time source resolution
+ *  PSCHED_JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
+ *  PSCHED_CPU: resolution is between 0.5us and 1us.
+ *  PSCHED_GETTIMEOFDAY: resolution is exactly 1us.
+ *
+ * sm and ism are scaled in order to keep effective digits.
+ * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
+ * digits in decimal using the following table.
+ *
+ * Note: We can afford the additional accuracy (altq hfsc keeps at most
+ * 3 effective digits) thanks to the fact that linux clock is bounded
+ * much more tightly.
+ *
+ *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ------------+-------------------------------------------------------
+ *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
+ *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
+ *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
+ *
+ *  0.5us/byte    160        16         1.6        0.16       0.016
+ *  us/byte       80         8          0.8        0.08       0.008
+ *  1.27us/byte   63         6.3        0.63       0.063      0.0063
+ */
+#define	SM_SHIFT	20
+#define	ISM_SHIFT	18
+
+#define	SM_MASK		((1ULL << SM_SHIFT) - 1)
+#define	ISM_MASK	((1ULL << ISM_SHIFT) - 1)
+
+static inline u64
+seg_x2y(u64 x, u64 sm)
+{
+	u64 y;
+
+	/*
+	 * compute
+	 *	y = x * sm >> SM_SHIFT
+	 * but divide it for the upper and lower bits to avoid overflow
+	 */
+	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+	return y;
+}
+
+static inline u64
+seg_y2x(u64 y, u64 ism)
+{
+	u64 x;
+
+	if (y == 0)
+		x = 0;
+	else if (ism == HT_INFINITY)
+		x = HT_INFINITY;
+	else {
+		x = (y >> ISM_SHIFT) * ism
+		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+	}
+	return x;
+}
+
+/* Convert m (bps) into sm (bytes/psched us) */
+static u64
+m2sm(u32 m)
+{
+	u64 sm;
+
+	sm = ((u64)m << SM_SHIFT);
+	sm += PSCHED_JIFFIE2US(HZ) - 1;
+	do_div(sm, PSCHED_JIFFIE2US(HZ));
+	return sm;
+}
+
+/* convert m (bps) into ism (psched us/byte) */
+static u64
+m2ism(u32 m)
+{
+	u64 ism;
+
+	if (m == 0)
+		ism = HT_INFINITY;
+	else {
+		ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+		ism += m - 1;
+		do_div(ism, m);
+	}
+	return ism;
+}
+
+/* convert d (us) into dx (psched us) */
+static u64
+d2dx(u32 d)
+{
+	u64 dx;
+
+	dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+	dx += 1000000 - 1;
+	do_div(dx, 1000000);
+	return dx;
+}
+
+/* convert sm (bytes/psched us) into m (bps) */
+static u32
+sm2m(u64 sm)
+{
+	u64 m;
+
+	m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+	return (u32)m;
+}
+
+/* convert dx (psched us) into d (us) */
+static u32
+dx2d(u64 dx)
+{
+	u64 d;
+
+	d = dx * 1000000;
+	do_div(d, PSCHED_JIFFIE2US(HZ));
+	return (u32)d;
+}
+
+static void
+sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
+{
+	isc->sm1  = m2sm(sc->m1);
+	isc->ism1 = m2ism(sc->m1);
+	isc->dx   = d2dx(sc->d);
+	isc->dy   = seg_x2y(isc->dx, isc->sm1);
+	isc->sm2  = m2sm(sc->m2);
+	isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	rtsc->x	   = x;
+	rtsc->y    = y;
+	rtsc->sm1  = isc->sm1;
+	rtsc->ism1 = isc->ism1;
+	rtsc->dx   = isc->dx;
+	rtsc->dy   = isc->dy;
+	rtsc->sm2  = isc->sm2;
+	rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u64
+rtsc_y2x(struct runtime_sc *rtsc, u64 y)
+{
+	u64 x;
+
+	if (y < rtsc->y)
+		x = rtsc->x;
+	else if (y <= rtsc->y + rtsc->dy) {
+		/* x belongs to the 1st segment */
+		if (rtsc->dy == 0)
+			x = rtsc->x + rtsc->dx;
+		else
+			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+	} else {
+		/* x belongs to the 2nd segment */
+		x = rtsc->x + rtsc->dx
+		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+	}
+	return x;
+}
+
+static u64
+rtsc_x2y(struct runtime_sc *rtsc, u64 x)
+{
+	u64 y;
+
+	if (x <= rtsc->x)
+		y = rtsc->y;
+	else if (x <= rtsc->x + rtsc->dx)
+		/* y belongs to the 1st segment */
+		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+	else
+		/* y belongs to the 2nd segment */
+		y = rtsc->y + rtsc->dy
+		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+	return y;
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	u64 y1, y2, dx, dy;
+	u32 dsm;
+
+	if (isc->sm1 <= isc->sm2) {
+		/* service curve is convex */
+		y1 = rtsc_x2y(rtsc, x);
+		if (y1 < y)
+			/* the current rtsc is smaller */
+			return;
+		rtsc->x = x;
+		rtsc->y = y;
+		return;
+	}
+
+	/*
+	 * service curve is concave
+	 * compute the two y values of the current rtsc
+	 *	y1: at x
+	 *	y2: at (x + dx)
+	 */
+	y1 = rtsc_x2y(rtsc, x);
+	if (y1 <= y) {
+		/* rtsc is below isc, no change to rtsc */
+		return;
+	}
+
+	y2 = rtsc_x2y(rtsc, x + isc->dx);
+	if (y2 >= y + isc->dy) {
+		/* rtsc is above isc, replace rtsc by isc */
+		rtsc->x = x;
+		rtsc->y = y;
+		rtsc->dx = isc->dx;
+		rtsc->dy = isc->dy;
+		return;
+	}
+
+	/*
+	 * the two curves intersect
+	 * compute the offsets (dx, dy) using the reverse
+	 * function of seg_x2y()
+	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+	 */
+	dx = (y1 - y) << SM_SHIFT;
+	dsm = isc->sm1 - isc->sm2;
+	do_div(dx, dsm);
+	/*
+	 * check if (x, y1) belongs to the 1st segment of rtsc.
+	 * if so, add the offset.
+	 */
+	if (rtsc->x + rtsc->dx > x)
+		dx += rtsc->x + rtsc->dx - x;
+	dy = seg_x2y(dx, isc->sm1);
+
+	rtsc->x = x;
+	rtsc->y = y;
+	rtsc->dx = dx;
+	rtsc->dy = dy;
+	return;
+}
+
+static void
+init_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	u64 cur_time;
+
+	PSCHED_GET_TIME(cur_time);
+
+	/* update the deadline curve */
+	rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+
+	/*
+	 * update the eligible curve.
+	 * for concave, it is equal to the deadline curve.
+	 * for convex, it is a linear curve with slope m2.
+	 */
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+
+	/* compute e and d */
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_update(cl);
+}
+
+static inline void
+update_d(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static inline void
+update_cfmin(struct hfsc_class *cl)
+{
+	rb_node_t *n = rb_first(&cl->cf_tree);
+	struct hfsc_class *p;
+
+	if (n == NULL) {
+		cl->cl_cfmin = 0;
+		return;
+	}
+	p = rb_entry(n, struct hfsc_class, cf_node);
+	cl->cl_cfmin = p->cl_f;
+}
+
+static void
+init_vf(struct hfsc_class *cl, unsigned int len)
+{
+	struct hfsc_class *max_cl;
+	rb_node_t *n;
+	u64 vt, f, cur_time;
+	int go_active;
+
+	cur_time = 0;
+	go_active = 1;
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		if (go_active && cl->cl_nactive++ == 0)
+			go_active = 1;
+		else
+			go_active = 0;
+
+		if (go_active) {
+			n = rb_last(&cl->cl_parent->vt_tree);
+			if (n != NULL) {
+				max_cl = rb_entry(n, struct hfsc_class,vt_node);
+				/*
+				 * set vt to the average of the min and max
+				 * classes.  if the parent's period didn't
+				 * change, don't decrease vt of the class.
+				 */
+				vt = max_cl->cl_vt;
+				if (cl->cl_parent->cl_cvtmin != 0)
+					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+				if (cl->cl_parent->cl_vtperiod !=
+				    cl->cl_parentperiod || vt > cl->cl_vt)
+					cl->cl_vt = vt;
+			} else {
+				/*
+				 * first child for a new parent backlog period.
+				 * add parent's cvtmax to cvtoff to make a new
+				 * vt (vtoff + vt) larger than the vt in the
+				 * last period for all children.
+				 */
+				vt = cl->cl_parent->cl_cvtmax;
+				cl->cl_parent->cl_cvtoff += vt;
+				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_parent->cl_cvtmin = 0;
+				cl->cl_vt = 0;
+			}
+
+			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
+							cl->cl_pcvtoff;
+
+			/* update the virtual curve */
+			vt = cl->cl_vt + cl->cl_vtoff;
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
+			                              cl->cl_total);
+			if (cl->cl_virtual.x == vt) {
+				cl->cl_virtual.x -= cl->cl_vtoff;
+				cl->cl_vtoff = 0;
+			}
+			cl->cl_vtadj = 0;
+
+			cl->cl_vtperiod++;  /* increment vt period */
+			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+			if (cl->cl_parent->cl_nactive == 0)
+				cl->cl_parentperiod++;
+			cl->cl_f = 0;
+
+			vttree_insert(cl);
+			cftree_insert(cl);
+
+			if (cl->cl_flags & HFSC_USC) {
+				/* class has upper limit curve */
+				if (cur_time == 0)
+					PSCHED_GET_TIME(cur_time);
+
+				/* update the ulimit curve */
+				rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
+				         cl->cl_total);
+				/* compute myf */
+				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+				                      cl->cl_total);
+				cl->cl_myfadj = 0;
+			}
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
+{
+	u64 f; /* , myf_bound, delta; */
+	int go_passive = 0;
+
+	if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC)
+		go_passive = 1;
+
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		cl->cl_total += len;
+
+		if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0)
+			continue;
+
+		if (go_passive && --cl->cl_nactive == 0)
+			go_passive = 1;
+		else
+			go_passive = 0;
+
+		if (go_passive) {
+			/* no more active child, going passive */
+
+			/* update cvtmax of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+			/* remove this class from the vt tree */
+			vttree_remove(cl);
+
+			cftree_remove(cl);
+			update_cfmin(cl->cl_parent);
+
+			continue;
+		}
+
+		/*
+		 * update vt and f
+		 */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+		            - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
+		/* update the vt tree */
+		vttree_update(cl);
+
+		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
+			                                      cl->cl_total);
+#if 0
+			/*
+			 * This code causes classes to stay way under their
+			 * limit when multiple classes are used at gigabit
+			 * speed. needs investigation. -kaber
+			 */
+			/*
+			 * if myf lags behind by more than one clock tick
+			 * from the current time, adjust myfadj to prevent
+			 * a rate-limited class from going greedy.
+			 * in a steady state under rate-limiting, myf
+			 * fluctuates within one clock tick.
+			 */
+			myf_bound = cur_time - PSCHED_JIFFIE2US(1);
+			if (cl->cl_myf < myf_bound) {
+				delta = cur_time - cl->cl_myf;
+				cl->cl_myfadj += delta;
+				cl->cl_myf += delta;
+			}
+#endif
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+set_active(struct hfsc_class *cl, unsigned int len)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		init_ed(cl, len);
+	if (cl->cl_flags & HFSC_FSC)
+		init_vf(cl, len);
+
+	list_add_tail(&cl->dlist, &cl->sched->droplist);
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		eltree_remove(cl);
+
+	list_del(&cl->dlist);
+
+	/*
+	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from vttree.
+	 */
+}
+
+/*
+ * hack to get length of first packet in queue.
+ */
+static unsigned int
+qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+
+	skb = sch->dequeue(sch);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
+		return 0;
+	}
+	len = skb->len;
+	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: failed to requeue\n");
+		return 0;
+	}
+	return len;
+}
+
+static void
+hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	if (len > 0) {
+		update_vf(cl, 0, 0);
+		set_passive(cl);
+		sch->q.qlen -= len;
+	}
+}
+
+static void
+hfsc_adjust_levels(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+	unsigned int level;
+
+	do {
+		level = 0;
+		list_for_each_entry(p, &cl->children, siblings) {
+			if (p->level > level)
+				level = p->level;
+		}
+		cl->level = level + 1;
+	} while ((cl = cl->cl_parent) != NULL);
+}
+
+static inline unsigned int
+hfsc_hash(u32 h)
+{
+	h ^= h >> 8;
+	h ^= h >> 4;
+
+	return h & (HFSC_HSIZE - 1);
+}
+
+static inline struct hfsc_class *
+hfsc_find_class(u32 classid, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+
+	list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
+		if (cl->classid == classid)
+			return cl;
+	}
+	return NULL;
+}
+
+static void
+hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc,
+                u64 cur_time)
+{
+	sc2isc(rsc, &cl->cl_rsc);
+	rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+	cl->cl_flags |= HFSC_RSC;
+}
+
+static void
+hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
+{
+	sc2isc(fsc, &cl->cl_fsc);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+	cl->cl_flags |= HFSC_FSC;
+}
+
+static void
+hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
+                u64 cur_time)
+{
+	sc2isc(usc, &cl->cl_usc);
+	rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total);
+	cl->cl_flags |= HFSC_USC;
+}
+
+static int
+hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+                  struct rtattr **tca, unsigned long *arg)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl = (struct hfsc_class *)*arg;
+	struct hfsc_class *parent = NULL;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_HFSC_MAX];
+	struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
+	u64 cur_time;
+
+	if (opt == NULL ||
+	    rtattr_parse(tb, TCA_HFSC_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
+		return -EINVAL;
+
+	if (tb[TCA_HFSC_RSC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc))
+			return -EINVAL;
+		rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]);
+		if (rsc->m1 == 0 && rsc->m2 == 0)
+			rsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_FSC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc))
+			return -EINVAL;
+		fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
+		if (fsc->m1 == 0 && fsc->m2 == 0)
+			fsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_USC-1]) {
+		if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc))
+			return -EINVAL;
+		usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
+		if (usc->m1 == 0 && usc->m2 == 0)
+			usc = NULL;
+	}
+
+	if (cl != NULL) {
+		if (parentid) {
+			if (cl->cl_parent && cl->cl_parent->classid != parentid)
+				return -EINVAL;
+			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
+				return -EINVAL;
+		}
+		PSCHED_GET_TIME(cur_time);
+
+		sch_tree_lock(sch);
+		if (rsc != NULL)
+			hfsc_change_rsc(cl, rsc, cur_time);
+		if (fsc != NULL)
+			hfsc_change_fsc(cl, fsc);
+		if (usc != NULL)
+			hfsc_change_usc(cl, usc, cur_time);
+
+		if (cl->qdisc->q.qlen != 0) {
+			if (cl->cl_flags & HFSC_RSC)
+				update_ed(cl, qdisc_peek_len(cl->qdisc));
+			if (cl->cl_flags & HFSC_FSC)
+				update_vf(cl, 0, cur_time);
+		}
+		sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1]) {
+			qdisc_kill_estimator(&cl->stats);
+			qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
+		}
+#endif
+		return 0;
+	}
+
+	if (parentid == TC_H_ROOT)
+		return -EEXIST;
+
+	parent = &q->root;
+	if (parentid) {
+		parent = hfsc_find_class(parentid, sch);
+		if (parent == NULL)
+			return -ENOENT;
+	}
+
+	if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+		return -EINVAL;
+	if (hfsc_find_class(classid, sch))
+		return -EEXIST;
+
+	if (rsc == NULL && fsc == NULL)
+		return -EINVAL;
+
+	cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+	memset(cl, 0, sizeof(struct hfsc_class));
+
+	if (rsc != NULL)
+		hfsc_change_rsc(cl, rsc, 0);
+	if (fsc != NULL)
+		hfsc_change_fsc(cl, fsc);
+	if (usc != NULL)
+		hfsc_change_usc(cl, usc, 0);
+
+	cl->refcnt    = 1;
+	cl->classid   = classid;
+	cl->sched     = q;
+	cl->cl_parent = parent;
+	cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+	cl->stats.lock = &sch->dev->queue_lock;
+	INIT_LIST_HEAD(&cl->children);
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+
+	sch_tree_lock(sch);
+	list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
+	list_add_tail(&cl->siblings, &parent->children);
+	if (parent->level == 0)
+		hfsc_purge_queue(sch, parent);
+	hfsc_adjust_levels(parent);
+	cl->cl_pcvtoff = parent->cl_cvtoff;
+	sch_tree_unlock(sch);
+
+#ifdef CONFIG_NET_ESTIMATOR
+	if (tca[TCA_RATE-1])
+		qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
+#endif
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void
+hfsc_destroy_filters(struct tcf_proto **fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+
+static void
+hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+
+	hfsc_destroy_filters(&cl->filter_list);
+	qdisc_destroy(cl->qdisc);
+#ifdef CONFIG_NET_ESTIMATOR
+	qdisc_kill_estimator(&cl->stats);
+#endif
+	if (cl != &q->root)
+		kfree(cl);
+}
+
+static int
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	list_del(&cl->hlist);
+	list_del(&cl->siblings);
+	hfsc_adjust_levels(cl->cl_parent);
+	hfsc_purge_queue(sch, cl);
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct hfsc_class *
+hfsc_classify(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	struct tcf_result res;
+	struct tcf_proto *tcf;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 &&
+	    (cl = hfsc_find_class(skb->priority, sch)) != NULL)
+		if (cl->level == 0)
+			return cl;
+
+	tcf = q->root.filter_list;
+	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_POLICE
+		if (result == TC_POLICE_SHOT)
+			return NULL;
+#endif
+		if ((cl = (struct hfsc_class *)res.class) == NULL) {
+			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+				break; /* filter selected invalid classid */
+		}
+
+		if (cl->level == 0)
+			return cl; /* hit leaf class */
+
+		/* apply inner filter chain */
+		tcf = cl->filter_list;
+	}
+
+	/* classification failed, try default class */
+	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	if (cl == NULL || cl->level > 0)
+		return NULL;
+
+	return cl;
+}
+
+static int
+hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+                 struct Qdisc **old)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		return -ENOENT;
+	if (cl->level > 0)
+		return -EINVAL;
+	if (new == NULL) {
+		new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	hfsc_purge_queue(sch, cl);
+	*old = xchg(&cl->qdisc, new);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *
+hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl != NULL && cl->level == 0)
+		return cl->qdisc;
+
+	return NULL;
+}
+
+static unsigned long
+hfsc_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+}
+
+static unsigned long
+hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	struct hfsc_class *p = (struct hfsc_class *)parent;
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL) {
+		if (p != NULL && p->level <= cl->level)
+			return 0;
+		cl->filter_cnt++;
+	}
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static struct tcf_proto **
+hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		cl = &q->root;
+
+	return &cl->filter_list;
+}
+
+static int
+hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
+{
+	struct tc_service_curve tsc;
+
+	tsc.m1 = sm2m(sc->sm1);
+	tsc.d  = dx2d(sc->dx);
+	tsc.m2 = sm2m(sc->sm2);
+	RTA_PUT(skb, attr, sizeof(tsc), &tsc);
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static inline int
+hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
+{
+	if ((cl->cl_flags & HFSC_RSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
+		goto rtattr_failure;
+
+	if ((cl->cl_flags & HFSC_FSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
+		goto rtattr_failure;
+
+	if ((cl->cl_flags & HFSC_USC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
+		goto rtattr_failure;
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static inline int
+hfsc_dump_stats(struct sk_buff *skb, struct hfsc_class *cl)
+{
+	cl->stats.qlen = cl->qdisc->q.qlen;
+	if (qdisc_copy_stats(skb, &cl->stats) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static inline int
+hfsc_dump_xstats(struct sk_buff *skb, struct hfsc_class *cl)
+{
+	struct tc_hfsc_stats xstats;
+
+	xstats.level  = cl->level;
+	xstats.period = cl->cl_vtperiod;
+	xstats.work   = cl->cl_total;
+	xstats.rtwork = cl->cl_cumul;
+	RTA_PUT(skb, TCA_XSTATS, sizeof(xstats), &xstats);
+
+	return skb->len;
+
+ rtattr_failure:
+	return -1;
+}
+
+static int
+hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
+                struct tcmsg *tcm)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta = (struct rtattr *)b;
+
+	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
+	tcm->tcm_handle = cl->classid;
+	if (cl->level == 0)
+		tcm->tcm_info = cl->qdisc->handle;
+
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	if (hfsc_dump_curves(skb, cl) < 0)
+		goto rtattr_failure;
+	rta->rta_len = skb->tail - b;
+
+	if ((hfsc_dump_stats(skb, cl) < 0) ||
+	    (hfsc_dump_xstats(skb, cl) < 0))
+		goto rtattr_failure;
+
+	return skb->len;
+
+ rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static void
+hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry(cl, &q->clhash[i], hlist) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static void
+hfsc_watchdog(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(sch->dev);
+}
+
+static void
+hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	u64 next_time = 0;
+	long delay;
+
+	if ((cl = eltree_get_minel(q)) != NULL)
+		next_time = cl->cl_e;
+	if (q->root.cl_cfmin != 0) {
+		if (next_time == 0 || next_time > q->root.cl_cfmin)
+			next_time = q->root.cl_cfmin;
+	}
+	ASSERT(next_time != 0);
+	delay = next_time - cur_time;
+	delay = PSCHED_US2JIFFIE(delay);
+
+	sch->flags |= TCQ_F_THROTTLED;
+	mod_timer(&q->wd_timer, jiffies + delay);
+}
+
+static int
+hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct tc_hfsc_qopt *qopt;
+	unsigned int i;
+
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = RTA_DATA(opt);
+
+	sch->stats.lock = &sch->dev->queue_lock;
+
+	q->defcls = qopt->defcls;
+	for (i = 0; i < HFSC_HSIZE; i++)
+		INIT_LIST_HEAD(&q->clhash[i]);
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	skb_queue_head_init(&q->requeue);
+
+	q->root.refcnt  = 1;
+	q->root.classid = sch->handle;
+	q->root.sched   = q;
+	q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+	if (q->root.qdisc == NULL)
+		q->root.qdisc = &noop_qdisc;
+	q->root.stats.lock = &sch->dev->queue_lock;
+	INIT_LIST_HEAD(&q->root.children);
+	q->root.vt_tree = RB_ROOT;
+	q->root.cf_tree = RB_ROOT;
+
+	list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
+
+	init_timer(&q->wd_timer);
+	q->wd_timer.function = hfsc_watchdog;
+	q->wd_timer.data = (unsigned long)sch;
+
+	MOD_INC_USE_COUNT;
+	return 0;
+}
+
+static int
+hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct tc_hfsc_qopt *qopt;
+
+	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
+		return -EINVAL;;
+	qopt = RTA_DATA(opt);
+
+	sch_tree_lock(sch);
+	q->defcls = qopt->defcls;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static void
+hfsc_reset_class(struct hfsc_class *cl)
+{
+	cl->cl_total        = 0;
+	cl->cl_cumul        = 0;
+	cl->cl_d            = 0;
+	cl->cl_e            = 0;
+	cl->cl_vt           = 0;
+	cl->cl_vtadj        = 0;
+	cl->cl_vtoff        = 0;
+	cl->cl_cvtmin       = 0;
+	cl->cl_cvtmax       = 0;
+	cl->cl_cvtoff       = 0;
+	cl->cl_pcvtoff      = 0;
+	cl->cl_vtperiod     = 0;
+	cl->cl_parentperiod = 0;
+	cl->cl_f            = 0;
+	cl->cl_myf          = 0;
+	cl->cl_myfadj       = 0;
+	cl->cl_cfmin        = 0;
+	cl->cl_nactive      = 0;
+
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+	qdisc_reset(cl->qdisc);
+
+	if (cl->cl_flags & HFSC_RSC)
+		rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
+	if (cl->cl_flags & HFSC_FSC)
+		rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
+	if (cl->cl_flags & HFSC_USC)
+		rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
+}
+
+static void
+hfsc_reset_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry(cl, &q->clhash[i], hlist)
+			hfsc_reset_class(cl);
+	}
+	__skb_queue_purge(&q->requeue);
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	del_timer(&q->wd_timer);
+	sch->flags &= ~TCQ_F_THROTTLED;
+	sch->q.qlen = 0;
+}
+
+static void
+hfsc_destroy_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl, *next;
+	unsigned int i;
+
+	for (i = 0; i < HFSC_HSIZE; i++) {
+		list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
+			hfsc_destroy_class(sch, cl);
+	}
+	__skb_queue_purge(&q->requeue);
+	del_timer(&q->wd_timer);
+	MOD_DEC_USE_COUNT;
+}
+
+static int
+hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	unsigned char *b = skb->tail;
+	struct tc_hfsc_qopt qopt;
+
+	qopt.defcls = q->defcls;
+	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+
+	return skb->len;
+
+ rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_class *cl = hfsc_classify(skb, sch);
+	unsigned int len = skb->len;
+	int err;
+
+	if (cl == NULL) {
+		kfree_skb(skb);
+		sch->stats.drops++;
+		return NET_XMIT_DROP;
+	}
+
+	err = cl->qdisc->enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		cl->stats.drops++;
+		sch->stats.drops++;
+		return err;
+	}
+
+	if (cl->qdisc->q.qlen == 1)
+		set_active(cl, len);
+
+	cl->stats.packets++;
+	cl->stats.bytes += len;
+	sch->stats.packets++;
+	sch->stats.bytes += len;
+	sch->q.qlen++;
+
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *
+hfsc_dequeue(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	struct sk_buff *skb;
+	u64 cur_time;
+	unsigned int next_len;
+	int realtime = 0;
+
+	if (sch->q.qlen == 0)
+		return NULL;
+	if ((skb = __skb_dequeue(&q->requeue)))
+		goto out;
+
+	PSCHED_GET_TIME(cur_time);
+
+	/*
+	 * if there are eligible classes, use real-time criteria.
+	 * find the class with the minimum deadline among
+	 * the eligible classes.
+	 */
+	if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+		realtime = 1;
+	} else {
+		/*
+		 * use link-sharing criteria
+		 * get the class with the minimum vt in the hierarchy
+		 */
+		cl = vttree_get_minvt(&q->root, cur_time);
+		if (cl == NULL) {
+			sch->stats.overlimits++;
+			hfsc_schedule_watchdog(sch, cur_time);
+			return NULL;
+		}
+	}
+
+	skb = cl->qdisc->dequeue(cl->qdisc);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("HFSC: Non-work-conserving qdisc ?\n");
+		return NULL;
+	}
+
+	update_vf(cl, skb->len, cur_time);
+	if (realtime)
+		cl->cl_cumul += skb->len;
+
+	if (cl->qdisc->q.qlen != 0) {
+		if (cl->cl_flags & HFSC_RSC) {
+			/* update ed */
+			next_len = qdisc_peek_len(cl->qdisc);
+			if (realtime)
+				update_ed(cl, next_len);
+			else
+				update_d(cl, next_len);
+		}
+	} else {
+		/* the class becomes passive */
+		set_passive(cl);
+	}
+
+ out:
+	sch->flags &= ~TCQ_F_THROTTLED;
+	sch->q.qlen--;
+
+	return skb;
+}
+
+static int
+hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+
+	__skb_queue_head(&q->requeue, skb);
+	sch->q.qlen++;
+	return NET_XMIT_SUCCESS;
+}
+
+static unsigned int
+hfsc_drop(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = (struct hfsc_sched *)sch->data;
+	struct hfsc_class *cl;
+	unsigned int len;
+
+	list_for_each_entry(cl, &q->droplist, dlist) {
+		if (cl->qdisc->ops->drop != NULL &&
+		    (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
+			if (cl->qdisc->q.qlen == 0) {
+				update_vf(cl, 0, 0);
+				set_passive(cl);
+			} else {
+				list_move_tail(&cl->dlist, &q->droplist);
+			}
+			cl->stats.drops++;
+			sch->stats.drops++;
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+static struct Qdisc_class_ops hfsc_class_ops = {
+	.change		= hfsc_change_class,
+	.delete		= hfsc_delete_class,
+	.graft		= hfsc_graft_class,
+	.leaf		= hfsc_class_leaf,
+	.get		= hfsc_get_class,
+	.put		= hfsc_put_class,
+	.bind_tcf	= hfsc_bind_tcf,
+	.unbind_tcf	= hfsc_unbind_tcf,
+	.tcf_chain	= hfsc_tcf_chain,
+	.dump		= hfsc_dump_class,
+	.walk		= hfsc_walk
+};
+
+struct Qdisc_ops hfsc_qdisc_ops = {
+	.id		= "hfsc",
+	.init		= hfsc_init_qdisc,
+	.change		= hfsc_change_qdisc,
+	.reset		= hfsc_reset_qdisc,
+	.destroy	= hfsc_destroy_qdisc,
+	.dump		= hfsc_dump_qdisc,
+	.enqueue	= hfsc_enqueue,
+	.dequeue	= hfsc_dequeue,
+	.requeue	= hfsc_requeue,
+	.drop		= hfsc_drop,
+	.cl_ops		= &hfsc_class_ops,
+	.priv_size	= sizeof(struct hfsc_sched)
+};
+
+static int __init
+hfsc_init(void)
+{
+	return register_qdisc(&hfsc_qdisc_ops);
+}
+
+static void __exit
+hfsc_cleanup(void)
+{
+	unregister_qdisc(&hfsc_qdisc_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(hfsc_init);
+module_exit(hfsc_cleanup);
diff --git a/release/src/linux/linux/net/sched/sch_htb.c b/release/src/linux/linux/net/sched/sch_htb.c
index 7539e490..944cb555 100644
--- a/release/src/linux/linux/net/sched/sch_htb.c
+++ b/release/src/linux/linux/net/sched/sch_htb.c
@@ -9,6 +9,8 @@
  * Authors:	Martin Devera, <devik@cdi.cz>
  *
  * Credits (in time order) for older HTB versions:
+ *              Stef Coene <stef.coene@docum.org>
+ *			HTB support at LARTC mailing list
  *		Ondrej Kraus, <krauso@barr.cz> 
  *			found missing INIT_QDISC(htb)
  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
@@ -17,9 +19,13 @@
  *			code review and helpful comments on shaping
  *		Tomasz Wrona, <tw@eter.tym.pl>
  *			created test case so that I was able to fix nasty bug
+ *		Wilfried Weissmann
+ *			spotted bug in dequeue code and helped with fix
+ *		Jiri Fojtasek
+ *			fixed requeue routine
  *		and many others. thanks.
  *
- * $Id: sch_htb.c,v 1.1.1.4 2003/10/14 08:09:35 sparq Exp $
+ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -66,21 +72,17 @@
 
 #define HTB_HSIZE 16	/* classid hash size */
 #define HTB_EWMAC 2	/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_DEBUG 1	/* compile debugging support (activated by tc tool) */
+//#define HTB_DEBUG 1	/* compile debugging support (activated by tc tool) */
 #define HTB_RATECM 1    /* whether to use rate computer */
-#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
+#define HTB_HYSTERESIS 0/* whether to use mode hysteresis for speedup */
 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30007	/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30011	/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
-/* temporary debug defines to be removed after beta stage */
-#define DEVIK_MEND(N)
-#define DEVIK_MSTART(N)
-
 /* debugging support; S is subsystem, these are defined:
   0 - netlink messages
   1 - enqueue
@@ -100,13 +102,16 @@
  from LSB
  */
 #ifdef HTB_DEBUG
-#define HTB_DBG(S,L,FMT,ARG...) if (((q->debug>>(2*S))&3) >= L) \
+#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
+#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
 	printk(KERN_DEBUG FMT,##ARG)
 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
 #define HTB_PASSQ q,
 #define HTB_ARGQ struct htb_sched *q,
 #define static
+#undef __inline__
 #define __inline__
+#undef inline
 #define inline
 #define HTB_CMAGIC 0xFEFAFEF1
 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
@@ -114,6 +119,7 @@
 		rb_erase(N,R); \
 		(N)->rb_color = -1; } while (0)
 #else
+#define HTB_DBG_COND(S,L) (0)
 #define HTB_DBG(S,L,FMT,ARG...)
 #define HTB_PASSQ
 #define HTB_ARGQ
@@ -166,6 +172,11 @@ struct htb_class
 	    struct htb_class_inner {
 		    rb_root_t feed[TC_HTB_NUMPRIO];	/* feed trees */
 		    rb_node_t *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
+		    /* When class changes from state 1->2 and disconnects from 
+		       parent's feed then we lost ptr value and start from the
+		       first child again. Here we store classid of the
+		       last valid ptr (used when ptr is NULL). */
+		    u32 last_ptr_id[TC_HTB_NUMPRIO];
 	    } inner;
     } un;
     rb_node_t node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
@@ -212,6 +223,7 @@ struct htb_sched
     rb_root_t row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
     int row_mask[TC_HTB_MAXDEPTH];
     rb_node_t *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+    u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 
     /* self wait list - roots of wait PQs per row */
     rb_root_t wait_pq[TC_HTB_MAXDEPTH];
@@ -219,6 +231,9 @@ struct htb_sched
     /* time of nearest event per level (row) */
     unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
 
+    /* cached value of jiffies in dequeue */
+    unsigned long jiffies;
+
     /* whether we hit non-work conserving class during this dequeue; we use */
     int nwc_hit;	/* this to disable mindelay complaint in dequeue */
 
@@ -297,7 +312,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
 	   rules in it */
 	if (skb->priority == sch->handle)
 		return HTB_DIRECT;  /* X:0 (direct flow) selected */
-	if ((cl = htb_find(skb->priority,sch)) != NULL) 
+	if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
 		return cl;
 
 	tcf = q->filter_list;
@@ -338,7 +353,7 @@ static void htb_next_rb_node(rb_node_t **n);
 static void htb_debug_dump (struct htb_sched *q)
 {
 	int i,p;
-	printk(KERN_DEBUG "htb*g j=%lu\n",jiffies);
+	printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
 	/* rows */
 	for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
 		printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
@@ -421,26 +436,24 @@ static void htb_add_to_wait_tree (struct htb_sched *q,
 	if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
 		printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
 #endif
-	DEVIK_MSTART(9);
-	cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay);
-	if (cl->pq_key == jiffies)
+	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
+	if (cl->pq_key == q->jiffies)
 		cl->pq_key++;
 
 	/* update the nearest event cache */
-	if (q->near_ev_cache[cl->level] - cl->pq_key < 0x80000000)
+	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
 		q->near_ev_cache[cl->level] = cl->pq_key;
 	
 	while (*p) {
 		struct htb_class *c; parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
-		if (cl->pq_key - c->pq_key < 0x80000000)
+		if (time_after_eq(cl->pq_key, c->pq_key))
 			p = &parent->rb_right;
 		else 
 			p = &parent->rb_left;
 	}
 	rb_link_node(&cl->pq_node, parent, p);
 	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
-	DEVIK_MEND(9);
 }
 
 /**
@@ -453,12 +466,14 @@ static void htb_next_rb_node(rb_node_t **n)
 {
 	rb_node_t *p;
 	if ((*n)->rb_right) {
+		/* child at right. use it or its leftmost ancestor */
 		*n = (*n)->rb_right;
 		while ((*n)->rb_left) 
 			*n = (*n)->rb_left;
 		return;
 	}
 	while ((p = (*n)->rb_parent) != NULL) {
+		/* if we've arrived from left child then we have next node */
 		if (p->rb_left == *n) break;
 		*n = p;
 	}
@@ -567,8 +582,13 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 			int prio = ffz(~m);
 			m &= ~(1 << prio);
 			
-			if (p->un.inner.ptr[prio] == cl->node+prio)
-				htb_next_rb_node(p->un.inner.ptr + prio);
+			if (p->un.inner.ptr[prio] == cl->node+prio) {
+				/* we are removing child which is pointed to from
+				   parent feed - forget the pointer but remember
+				   classid */
+				p->un.inner.last_ptr_id[prio] = cl->classid;
+				p->un.inner.ptr[prio] = NULL;
+			}
 			
 			htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
 			
@@ -602,7 +622,7 @@ htb_class_mode(struct htb_class *cl,long *diff)
     long toks;
 
     if ((toks = (cl->ctokens + *diff)) < (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
 	    cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
 #endif
        	    0)) {
@@ -610,7 +630,7 @@ htb_class_mode(struct htb_class *cl,long *diff)
 	    return HTB_CANT_SEND;
     }
     if ((toks = (cl->tokens + *diff)) >= (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
 	    cl->cmode == HTB_CAN_SEND ? -cl->buffer :
 #endif
 	    0))
@@ -689,7 +709,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
     struct htb_sched *q = (struct htb_sched *)sch->data;
     struct htb_class *cl = htb_classify(skb,sch);
 
-    DEVIK_MSTART(0);
     if (cl == HTB_DIRECT || !cl) {
 	/* enqueue to helper queue */
 	if (q->direct_queue.qlen < q->direct_qlen && cl) {
@@ -698,25 +717,20 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	} else {
 	    kfree_skb (skb);
 	    sch->stats.drops++;
-	    DEVIK_MEND(0);
 	    return NET_XMIT_DROP;
 	}
     } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
 	sch->stats.drops++;
 	cl->stats.drops++;
-	DEVIK_MEND(0);
 	return NET_XMIT_DROP;
     } else {
 	cl->stats.packets++; cl->stats.bytes += skb->len;
-	DEVIK_MSTART(1);
 	htb_activate (q,cl);
-	DEVIK_MEND(1);
     }
 
     sch->q.qlen++;
     sch->stats.packets++; sch->stats.bytes += skb->len;
-    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
-    DEVIK_MEND(0);
+    HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
     return NET_XMIT_SUCCESS;
 }
 
@@ -725,16 +739,18 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
     struct htb_sched *q = (struct htb_sched *)sch->data;
     struct htb_class *cl = htb_classify(skb,sch);
+    struct sk_buff *tskb;
 
     if (cl == HTB_DIRECT || !cl) {
 	/* enqueue to helper queue */
 	if (q->direct_queue.qlen < q->direct_qlen && cl) {
-	    __skb_queue_tail(&q->direct_queue, skb);
-	    q->direct_pkts++;
+	    __skb_queue_head(&q->direct_queue, skb);
 	} else {
-	    kfree_skb (skb);
-	    sch->stats.drops++;
-	    return NET_XMIT_DROP;
+            __skb_queue_head(&q->direct_queue, skb);
+            tskb = __skb_dequeue_tail(&q->direct_queue);
+            kfree_skb (tskb);
+            sch->stats.drops++;
+            return NET_XMIT_CN;	
 	}
     } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
 	sch->stats.drops++;
@@ -744,7 +760,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	    htb_activate (q,cl);
 
     sch->q.qlen++;
-    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
+    HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
     return NET_XMIT_SUCCESS;
 }
 
@@ -819,7 +835,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
 				       cl->classid, diff,
 				       (unsigned long long) q->now,
 				       (unsigned long long) cl->t_c,
-				       jiffies);
+				       q->jiffies);
 			diff = 1000;
 		}
 #endif
@@ -862,6 +878,7 @@ static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
  *
  * Scans event queue for pending events and applies them. Returns jiffies to
  * next pending event (0 for no event in pq).
+ * Note: Aplied are events whose have cl->pq_key <= jiffies.
  */
 static long htb_do_events(struct htb_sched *q,int level)
 {
@@ -876,9 +893,9 @@ static long htb_do_events(struct htb_sched *q,int level)
 		while (p->rb_left) p = p->rb_left;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
-		if (cl->pq_key - (jiffies+1) < 0x80000000) {
-			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies);
-			return cl->pq_key - jiffies;
+		if (time_after(cl->pq_key, q->jiffies)) {
+			HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
+			return cl->pq_key - q->jiffies;
 		}
 		htb_safe_rb_erase(p,q->wait_pq+level);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
@@ -889,7 +906,7 @@ static long htb_do_events(struct htb_sched *q,int level)
 				       cl->classid, diff,
 				       (unsigned long long) q->now,
 				       (unsigned long long) cl->t_c,
-				       jiffies);
+				       q->jiffies);
 			diff = 1000;
 		}
 #endif
@@ -902,24 +919,56 @@ static long htb_do_events(struct htb_sched *q,int level)
 	return HZ/10;
 }
 
+/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
+   is no such one exists. */
+static rb_node_t *
+htb_id_find_next_upper(int prio,rb_node_t *n,u32 id)
+{
+	rb_node_t *r = NULL;
+	while (n) {
+		struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]);
+		if (id == cl->classid) return n;
+		
+		if (id > cl->classid) {
+			n = n->rb_right;
+		} else {
+			r = n;
+			n = n->rb_left;
+		}
+	}
+	return r;
+}
+
 /**
  * htb_lookup_leaf - returns next leaf class in DRR order
  *
  * Find leaf where current feed pointers points to.
  */
 static struct htb_class *
-htb_lookup_leaf(rb_root_t *tree,int prio,rb_node_t **pptr)
+htb_lookup_leaf(HTB_ARGQ rb_root_t *tree,int prio,rb_node_t **pptr,u32 *pid)
 {
 	int i;
 	struct {
 		rb_node_t *root;
 		rb_node_t **pptr;
+		u32 *pid;
 	} stk[TC_HTB_MAXDEPTH],*sp = stk;
 	
+	BUG_TRAP(tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
+	sp->pid = pid;
 
 	for (i = 0; i < 65535; i++) {
+		HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid);
+
+		if (!*sp->pptr && *sp->pid) { 
+			/* ptr was invalidated but id is valid - try to recover 
+			   the original or next ptr */
+			*sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid);
+		}
+		*sp->pid = 0; /* ptr is valid now so that remove this hint as it
+			         can become out of date quickly */
 		if (!*sp->pptr) { /* we are at right end; rewind & go up */
 			*sp->pptr = sp->root;
 			while ((*sp->pptr)->rb_left) 
@@ -937,6 +986,7 @@ htb_lookup_leaf(rb_root_t *tree,int prio,rb_node_t **pptr)
 				return cl;
 			(++sp)->root = cl->un.inner.feed[prio].rb_node;
 			sp->pptr = cl->un.inner.ptr+prio;
+			sp->pid = cl->un.inner.last_ptr_id+prio;
 		}
 	}
 	BUG_TRAP(0);
@@ -949,16 +999,37 @@ static struct sk_buff *
 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 {
 	struct sk_buff *skb = NULL;
-	//struct htb_sched *q = (struct htb_sched *)sch->data;
 	struct htb_class *cl,*start;
 	/* look initial class up in the row */
-	DEVIK_MSTART(6);
-	start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
+	start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,
+			q->ptr[level]+prio,q->last_ptr_id[level]+prio);
 	
 	do {
-		BUG_TRAP(cl && cl->un.leaf.q->q.qlen); if (!cl) return NULL;
+next:
+		BUG_TRAP(cl); 
+		if (!cl) return NULL;
 		HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
 				prio,level,cl->classid,cl->un.leaf.deficit[level]);
+
+		/* class can be empty - it is unlikely but can be true if leaf
+		   qdisc drops packets in enqueue routine or if someone used
+		   graft operation on the leaf since last dequeue; 
+		   simply deactivate and skip such class */
+		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+			struct htb_class *next;
+			htb_deactivate(q,cl);
+
+			/* row/level might become empty */
+			if ((q->row_mask[level] & (1 << prio)) == 0)
+				return NULL; 
+			
+			next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,
+					prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio);
+			if (cl == start) /* fix start if we just deleted it */
+				start = next;
+			cl = next;
+			goto next;
+		}
 	
 		if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
 			break;
@@ -968,11 +1039,10 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 		}
 		q->nwc_hit++;
 		htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
-		cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
+		cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio,
+				q->last_ptr_id[level]+prio);
 	} while (cl != start);
 
-	DEVIK_MEND(6);
-	DEVIK_MSTART(7);
 	if (likely(skb != NULL)) {
 		if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
 			HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
@@ -984,27 +1054,22 @@ htb_dequeue_tree(struct htb_sched *q,int prio,int level)
 		   gives us slightly better performance */
 		if (!cl->un.leaf.q->q.qlen)
 			htb_deactivate (q,cl);
-	DEVIK_MSTART(8);
 		htb_charge_class (q,cl,level,skb->len);
-	DEVIK_MEND(8);
 	}
-	DEVIK_MEND(7);
 	return skb;
 }
 
 static void htb_delay_by(struct Qdisc *sch,long delay)
 {
 	struct htb_sched *q = (struct htb_sched *)sch->data;
-	if (netif_queue_stopped(sch->dev)) return;
 	if (delay <= 0) delay = 1;
 	if (unlikely(delay > 5*HZ)) {
 		if (net_ratelimit())
 			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
 		delay = 5*HZ;
 	}
-	del_timer(&q->timer);
-	q->timer.expires = jiffies + delay;
-	add_timer(&q->timer);
+	/* why don't use jiffies here ? because expires can be in past */
+	mod_timer(&q->timer, q->jiffies + delay);
 	sch->flags |= TCQ_F_THROTTLED;
 	sch->stats.overlimits++;
 	HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
@@ -1016,7 +1081,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	struct htb_sched *q = (struct htb_sched *)sch->data;
 	int level;
 	long min_delay;
+#ifdef HTB_DEBUG
+	int evs_used = 0;
+#endif
 
+	q->jiffies = jiffies;
 	HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
 			sch->q.qlen);
 
@@ -1027,27 +1096,26 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 		return skb;
 	}
 
-	DEVIK_MSTART(2);
 	if (!sch->q.qlen) goto fin;
 	PSCHED_GET_TIME(q->now);
 
-	min_delay = HZ*5;
+	min_delay = LONG_MAX;
 	q->nwc_hit = 0;
 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 		/* common case optimization - skip event handler quickly */
 		int m;
 		long delay;
-	DEVIK_MSTART(3);
-		if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
+		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
 			delay = htb_do_events(q,level);
-			q->near_ev_cache[level] += delay ? delay : HZ;
+			q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+#ifdef HTB_DEBUG
+			evs_used++;
+#endif
 		} else
-			delay = q->near_ev_cache[level] - jiffies;	
+			delay = q->near_ev_cache[level] - q->jiffies;	
 		
 		if (delay && min_delay > delay) 
 			min_delay = delay;
-	DEVIK_MEND(3);
-	DEVIK_MSTART(5);
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz (m);
@@ -1056,29 +1124,29 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			if (likely(skb != NULL)) {
 				sch->q.qlen--;
 				sch->flags &= ~TCQ_F_THROTTLED;
-	DEVIK_MEND(5);
 				goto fin;
 			}
 		}
-	DEVIK_MEND(5);
 	}
-	DEVIK_MSTART(4);
 #ifdef HTB_DEBUG
-	if (!q->nwc_hit && min_delay >= 5*HZ && net_ratelimit()) { 
-		printk(KERN_ERR "HTB: mindelay=%ld, report it please !\n",min_delay);
-		htb_debug_dump(q);
+	if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
+		if (min_delay == LONG_MAX) {
+			printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
+					evs_used,q->jiffies,jiffies);
+			htb_debug_dump(q);
+		} else 
+			printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
+					"too small rate\n",min_delay);
 	}
 #endif
-	htb_delay_by (sch,min_delay);
-	DEVIK_MEND(4);
+	htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
 fin:
-	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb);
-	DEVIK_MEND(2);
+	HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
 	return skb;
 }
 
 /* try to drop from each class (by prio) until one succeed */
-static int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc* sch)
 {
 	struct htb_sched *q = (struct htb_sched *)sch->data;
 	int prio;
@@ -1086,14 +1154,15 @@ static int htb_drop(struct Qdisc* sch)
 	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 		struct list_head *p;
 		list_for_each (p,q->drops+prio) {
-			struct htb_class *cl = list_entry(p,struct htb_class,
-					un.leaf.drop_list);
+			struct htb_class *cl = list_entry(p, struct htb_class,
+							  un.leaf.drop_list);
+			unsigned int len;
 			if (cl->un.leaf.q->ops->drop && 
-				cl->un.leaf.q->ops->drop(cl->un.leaf.q)) {
+				(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 				sch->q.qlen--;
 				if (!cl->un.leaf.q->q.qlen)
 					htb_deactivate (q,cl);
-				return 1;
+				return len;
 			}
 		}
 	}
@@ -1162,7 +1231,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 				HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
 		return -EINVAL;
 	}
-	memset(q,0,sizeof(*q));
 	q->debug = gopt->debug;
 	HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
 
@@ -1208,7 +1276,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	gopt.direct_pkts = q->direct_pkts;
 
 #ifdef HTB_DEBUG
-	htb_debug_dump(q);
+	if (HTB_DBG_COND(0,2))
+		htb_debug_dump(q);
 #endif
 	gopt.version = HTB_VER;
 	gopt.rate2quantum = q->rate2quantum;
@@ -1218,8 +1287,6 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
 	rta->rta_len = skb->tail - b;
-	sch->stats.qlen = sch->q.qlen;
-	RTA_PUT(skb, TCA_STATS, sizeof(sch->stats), &sch->stats);
 	HTB_QUNLOCK(sch);
 	return skb->len;
 rtattr_failure:
@@ -1289,6 +1356,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 					return -ENOBUFS;
 		sch_tree_lock(sch);
 		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+			if (cl->prio_activity)
+				htb_deactivate ((struct htb_sched*)sch->data,cl);
+
 			/* TODO: is it correct ? Why CBQ doesn't do it ? */
 			sch->q.qlen -= (*old)->q.qlen;	
 			qdisc_reset(*old);
@@ -1323,7 +1393,7 @@ static void htb_destroy_filters(struct tcf_proto **fl)
 
 	while ((tp = *fl) != NULL) {
 		*fl = tp->next;
-		tp->ops->destroy(tp);
+		tcf_destroy(tp);
 	}
 }
 
@@ -1371,11 +1441,16 @@ static void htb_destroy(struct Qdisc* sch)
 #ifdef HTB_RATECM
 	del_timer_sync (&q->rttim);
 #endif
+	/* This line used to be after htb_destroy_class call below
+	   and surprisingly it worked in 2.4. But it must precede it 
+	   because filter need its target class alive to be able to call
+	   unbind_filter on it (without Oops). */
+	htb_destroy_filters(&q->filter_list);
+	
 	while (!list_empty(&q->root)) 
 		htb_destroy_class (sch,list_entry(q->root.next,
 					struct htb_class,sibling));
 
-	htb_destroy_filters(&q->filter_list);
 	__skb_queue_purge(&q->direct_queue);
 	MOD_DEC_USE_COUNT;
 }
@@ -1438,12 +1513,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
 
 	hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
-	HTB_DBG(0,1,"htb_chg cl=%p, clid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
+	HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
 	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
 	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
 	if (!rtab || !ctab) goto failure;
 
 	if (!cl) { /* new class */
+		struct Qdisc *new_q;
 		/* check for valid classid */
 		if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
 			goto failure;
@@ -1467,6 +1543,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->magic = HTB_CMAGIC;
 #endif
 
+		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
+		   so that can't be used inside of sch_tree_lock
+		   -- thanks to Karlis Peisenieks */
+		new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
@@ -1485,8 +1565,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			memset (&parent->un.inner,0,sizeof(parent->un.inner));
 		}
 		/* leaf (we) needs elementary qdisc */
-		if (!(cl->un.leaf.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
-			cl->un.leaf.q = &noop_qdisc;
+		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
 
 		cl->classid = classid; cl->parent = parent;
 
@@ -1514,11 +1593,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	if (!cl->level) {
 		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
 		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.", cl->classid);
+			printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
 			cl->un.leaf.quantum = 1000;
 		}
 		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
-			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.", cl->classid);
+			printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
 			cl->un.leaf.quantum = 200000;
 		}
 		if (hopt->quantum)
diff --git a/release/src/linux/linux/net/sched/sch_ingress.c b/release/src/linux/linux/net/sched/sch_ingress.c
index 2b30fce0..70698728 100644
--- a/release/src/linux/linux/net/sched/sch_ingress.c
+++ b/release/src/linux/linux/net/sched/sch_ingress.c
@@ -250,7 +250,6 @@ int ingress_init(struct Qdisc *sch,struct rtattr *opt)
 	}
 
 	DPRINTK("ingress_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt);
-	memset(p, 0, sizeof(*p));
 	p->filter_list = NULL;
 	p->q = &noop_qdisc;
 	MOD_INC_USE_COUNT;
@@ -286,10 +285,7 @@ static void ingress_destroy(struct Qdisc *sch)
 		p->filter_list = tp->next;
 		tp->ops->destroy(tp);
 	}
-	memset(p, 0, sizeof(*p));
-	p->filter_list = NULL;
 
- 
 	MOD_DEC_USE_COUNT;
 
 }
diff --git a/release/src/linux/linux/net/sched/sch_sfq.c b/release/src/linux/linux/net/sched/sch_sfq.c
index c96762fb..a6c17424 100644
--- a/release/src/linux/linux/net/sched/sch_sfq.c
+++ b/release/src/linux/linux/net/sched/sch_sfq.c
@@ -218,6 +218,7 @@ static int sfq_drop(struct Qdisc *sch)
 	struct sfq_sched_data *q = (struct sfq_sched_data *)sch->data;
 	sfq_index d = q->max_depth;
 	struct sk_buff *skb;
+	int len;
 
 	/* Queue is full! Find the longest slot and
 	   drop a packet from it */
@@ -225,12 +226,13 @@ static int sfq_drop(struct Qdisc *sch)
 	if (d > 1) {
 		sfq_index x = q->dep[d+SFQ_DEPTH].next;
 		skb = q->qs[x].prev;
+		len = skb->len;
 		__skb_unlink(skb, &q->qs[x]);
 		kfree_skb(skb);
 		sfq_dec(q, x);
 		sch->q.qlen--;
 		sch->stats.drops++;
-		return 1;
+		return len;
 	}
 
 	if (d == 1) {
@@ -239,13 +241,14 @@ static int sfq_drop(struct Qdisc *sch)
 		q->next[q->tail] = q->next[d];
 		q->allot[q->next[d]] += q->quantum;
 		skb = q->qs[d].prev;
+		len = skb->len;
 		__skb_unlink(skb, &q->qs[d]);
 		kfree_skb(skb);
 		sfq_dec(q, d);
 		sch->q.qlen--;
 		q->ht[q->hash[d]] = SFQ_DEPTH;
 		sch->stats.drops++;
-		return 1;
+		return len;
 	}
 
 	return 0;
@@ -342,6 +345,7 @@ sfq_dequeue(struct Qdisc* sch)
 
 	/* Is the slot empty? */
 	if (q->qs[a].qlen == 0) {
+		q->ht[q->hash[a]] = SFQ_DEPTH;
 		a = q->next[a];
 		if (a == old_a) {
 			q->tail = SFQ_DEPTH;
-- 
cgit v1.2.3-54-g00ecf