patch-2.1.99 linux/net/sched/sch_red.c

Next file: linux/net/sched/sch_sfq.c
Previous file: linux/net/sched/sch_prio.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_red.c linux/net/sched/sch_red.c
@@ -1,5 +1,5 @@
 /*
- * net/sched/sch_red.c	Random Early Detection scheduler.
+ * net/sched/sch_red.c	Random Early Detection queue.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -9,6 +9,8 @@
  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  */
 
+#include <linux/config.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/bitops.h>
@@ -62,32 +64,42 @@
 
 	and mark (drop) packet with this probability.
 	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
-	max_P should be small (not 1!).
+	max_P should be small (not 1), usually 0.01..0.02 is good value.
+
+	max_P is chosen as a number, so that max_P/(th_max-th_min)
+	is negative power of two in order arithmetics to contain
+	only shifts.
+
+
+	Parameters, settable by user:
+	-----------------------------
+
+	limit		- bytes (must be > qth_max + burst)
+
+	Hard limit on queue length, should be chosen >qth_max
+	to allow packet bursts. This parameter does not
+	affect algorithm behaviour and can be chosen
+	arbitrarily high (well, less than ram size)
+	Really, this limit will never be achieved
+	if RED works correctly.
+
+	qth_min		- bytes (should be < qth_max/2)
+	qth_max		- bytes (should be at least 2*qth_min and less limit)
+	Wlog	       	- bits (<32) log(1/W).
+	Plog	       	- bits (<32)
+
+	Plog is related to max_P by formula:
+
+	max_P = (qth_max-qth_min)/2^Plog;
+
+	F.e. if qth_max=128K and qth_min=32K, then Plog=22
+	corresponds to max_P=0.02
+
+	Scell_log
+	Stab
+
+	Lookup table for log((1-W)^(t/t_ave).
 
-	NB.	SF&VJ assumed that Pb[avg] is linear function. I think it
-	        is wrong. I'd make:
-		P[th_min] = 0, P[th_max] = 1;
-		dP/davg[th_min] = 0, dP/davg[th_max] = infinity, or a large number.
-
-	I choose max_P as a number between 0.01 and 0.1, so that
-	C1 = max_P/(th_max-th_min) is power of two: C1 = 2^(-C1log)
-
-	Parameters, settable by user (with default values):
-
-	qmaxbytes=256K - hard limit on queue length, should be chosen >qth_max
-	                 to allow packet bursts. This parameter does not
-			 affect algorithm behaviour and can be chosen
-			 arbitrarily high (well, less than ram size)
-			 Really, this limit will never be achieved
-			 if RED works correctly.
-	qth_min=32K
-	qth_max=128K   - qth_max should be at least 2*qth_min
-	Wlog=8	       - log(1/W).
-	Alog=Wlog      - fixed point position in th_min and th_max.
-	Rlog=10
-	C1log=24       - C1log = trueC1log+Alog-Rlog
-	                 so that trueC1log=22 and max_P~0.02
-	
 
 NOTES:
 
@@ -97,10 +109,10 @@
 	If you want to allow bursts of L packets of size S,
 	you should choose W:
 
-	L + 1 -th_min/S < (1-(1-W)^L)/W
-
-	For th_min/S = 32
+	L + 1 - th_min/S < (1-(1-W)^L)/W
 
+	th_min/S = 32         th_min/S = 4
+			                       
 	log(W)	L
 	-1	33
 	-2	35
@@ -117,33 +129,24 @@
 struct red_sched_data
 {
 /* Parameters */
-	unsigned long	qmaxbytes;	/* HARD maximal queue length	*/
-	unsigned long	qth_min;	/* Min average length threshold: A scaled */
-	unsigned long	qth_max;	/* Max average length threshold: A scaled */
-	char		Alog;		/* Point position in average lengths */
+	u32		limit;		/* HARD maximal queue length	*/
+	u32		qth_min;	/* Min average length threshold: A scaled */
+	u32		qth_max;	/* Max average length threshold: A scaled */
+	u32		Rmask;
+	u32		Scell_max;
 	char		Wlog;		/* log(W)		*/
-	char		Rlog;		/* random number bits	*/
-	char		C1log;		/* log(1/C1)		*/
-	char		Slog;
-	char		Stab[256];
+	char		Plog;		/* random number bits	*/
+	char		Scell_log;
+	u8		Stab[256];
 
 /* Variables */
-	unsigned long	qbytes;		/* Queue length in bytes	*/
 	unsigned long	qave;		/* Average queue length: A scaled */
 	int		qcount;		/* Packets since last random number generation */
-	unsigned	qR;		/* Cached random number [0..1<Rlog) */
+	u32		qR;		/* Cached random number */
+
 	psched_time_t	qidlestart;	/* Start of idle period		*/
 };
 
-/* Stolen from igmp.c. */
-
-static __inline__ unsigned red_random(int log)
-{
-	static unsigned long seed=152L;
-	seed=seed*69069L+1;
-	return (seed^jiffies)&((1<<log)-1);
-}
-
 static int
 red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
@@ -155,17 +158,15 @@
 		long us_idle;
 		PSCHED_SET_PASTPERFECT(q->qidlestart);
 		PSCHED_GET_TIME(now);
-		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, (256<<q->Slog)-1, 0);
-
-/* It is wrong, but I do not think that SF+VJ proposal is reasonable
-   and did not invented anything more clever 8)
+		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
 
+/*
    The problem: ideally, average length queue recalcultion should
    be done over constant clock intervals. It is too expensive, so that
    calculation is driven by outgoing packets.
    When queue is idle we have to model this clock by hands.
 
-   SF+VJ proposed to "generate" m = (idletime/bandwidth)*average_pkt_size
+   SF+VJ proposed to "generate" m = idletime/(average_pkt_size/bandwidth)
    dummy packets as burst after idle time, i.e.
 
           q->qave *= (1-W)^m
@@ -175,129 +176,193 @@
    I believe, that a simpler model may be used here,
    but it is field for experiments.
 */
-		q->qave >>= q->Stab[(us_idle>>q->Slog)&0xFF];
+		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
 	}
 
-	q->qave += ((q->qbytes<<q->Alog) - q->qave) >> q->Wlog;
+	q->qave += sch->stats.backlog - (q->qave >> q->Wlog);
 
 	if (q->qave < q->qth_min) {
 enqueue:
 		q->qcount = -1;
-		if (q->qbytes <= q->qmaxbytes) {
-			skb_queue_tail(&sch->q, skb);
-			q->qbytes += skb->len;
+		if (sch->stats.backlog <= q->limit) {
+			__skb_queue_tail(&sch->q, skb);
+			sch->stats.backlog += skb->len;
+			sch->stats.bytes += skb->len;
+			sch->stats.packets++;
 			return 1;
 		}
 drop:
 		kfree_skb(skb);
+		sch->stats.drops++;
 		return 0;
 	}
 	if (q->qave >= q->qth_max) {
 		q->qcount = -1;
+		sch->stats.overlimits++;
 		goto drop;
 	}
-	q->qcount++;
-	if (q->qcount++) {
-		if ((((q->qave - q->qth_min)*q->qcount)>>q->C1log) < q->qR)
+	if (++q->qcount) {
+		if (((q->qave - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
 			goto enqueue;
 		q->qcount = 0;
-		q->qR = red_random(q->Rlog);
+		q->qR = net_random()&q->Rmask;
+		sch->stats.overlimits++;
 		goto drop;
 	}
-	q->qR = red_random(q->Rlog);
+	q->qR = net_random()&q->Rmask;
 	goto enqueue;
 }
 
+static int
+red_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = (struct red_sched_data *)sch->data;
+
+	PSCHED_SET_PASTPERFECT(q->qidlestart);
+
+	__skb_queue_head(&sch->q, skb);
+	sch->stats.backlog += skb->len;
+	return 1;
+}
+
 static struct sk_buff *
 red_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
 	struct red_sched_data *q = (struct red_sched_data *)sch->data;
 
-	skb = skb_dequeue(&sch->q);
+	skb = __skb_dequeue(&sch->q);
 	if (skb) {
-		q->qbytes -= skb->len;
+		sch->stats.backlog -= skb->len;
 		return skb;
 	}
 	PSCHED_GET_TIME(q->qidlestart);
 	return NULL;
 }
 
-static void
-red_reset(struct Qdisc* sch)
+static int
+red_drop(struct Qdisc* sch)
 {
-	struct red_sched_data *q = (struct red_sched_data *)sch->data;
 	struct sk_buff *skb;
+	struct red_sched_data *q = (struct red_sched_data *)sch->data;
 
-	while((skb=skb_dequeue(&sch->q))!=NULL) {
-		q->qbytes -= skb->len;
+	skb = __skb_dequeue_tail(&sch->q);
+	if (skb) {
+		sch->stats.backlog -= skb->len;
+		sch->stats.drops++;
 		kfree_skb(skb);
+		return 1;
 	}
-	if (q->qbytes) {
-		printk("red_reset: qbytes=%lu\n", q->qbytes);
-		q->qbytes = 0;
-	}
+	PSCHED_GET_TIME(q->qidlestart);
+	return 0;
+}
+
+static void red_reset(struct Qdisc* sch)
+{
+	struct red_sched_data *q = (struct red_sched_data *)sch->data;
+	struct sk_buff *skb;
+
+	while((skb=__skb_dequeue(&sch->q))!=NULL)
+		kfree_skb(skb);
+	sch->stats.backlog = 0;
 	PSCHED_SET_PASTPERFECT(q->qidlestart);
 	q->qave = 0;
 	q->qcount = -1;
 }
 
-static int red_init(struct Qdisc *sch, struct pschedctl *pctl)
+static int red_init(struct Qdisc *sch, struct rtattr *opt)
 {
-	struct red_sched_data *q;
-	struct redctl *ctl = (struct redctl*)pctl->args;
-
-	q = (struct red_sched_data *)sch->data;
+	struct red_sched_data *q = (struct red_sched_data *)sch->data;
+	struct rtattr *tb[TCA_RED_STAB];
+	struct tc_red_qopt *ctl;
 
-	if (pctl->arglen < sizeof(struct redctl))
+	if (opt == NULL ||
+	    rtattr_parse(tb, TCA_RED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
+	    tb[TCA_RED_PARMS-1] == 0 || tb[TCA_RED_STAB-1] == 0 ||
+	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
+	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < 256)
 		return -EINVAL;
 
+	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
+
 	q->Wlog = ctl->Wlog;
-	q->Alog = ctl->Alog;
-	q->Rlog = ctl->Rlog;
-	q->C1log = ctl->C1log;
-	q->Slog = ctl->Slog;
-	q->qth_min = ctl->qth_min;
-	q->qth_max = ctl->qth_max;
-	q->qmaxbytes = ctl->qmaxbytes;
-	memcpy(q->Stab, ctl->Stab, 256);
+	q->Plog = ctl->Plog;
+	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
+	q->Scell_log = ctl->Scell_log;
+	q->Scell_max = (256<<q->Scell_log)-1;
+	q->qth_min = ctl->qth_min<<ctl->Wlog;
+	q->qth_max = ctl->qth_max<<ctl->Wlog;
+	q->limit = ctl->limit;
+	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
 
 	q->qcount = -1;
 	PSCHED_SET_PASTPERFECT(q->qidlestart);
+	MOD_INC_USE_COUNT;
 	return 0;
 }
 
-struct Qdisc_ops red_ops =
+#ifdef CONFIG_RTNETLINK
+static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct red_sched_data *q = (struct red_sched_data *)sch->data;
+	unsigned char	 *b = skb->tail;
+	struct rtattr *rta;
+	struct tc_red_qopt opt;
+
+	rta = (struct rtattr*)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+	opt.limit = q->limit;
+	opt.qth_min = q->qth_min>>q->Wlog;
+	opt.qth_max = q->qth_max>>q->Wlog;
+	opt.Wlog = q->Wlog;
+	opt.Plog = q->Plog;
+	opt.Scell_log = q->Scell_log;
+	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	rta->rta_len = skb->tail - b;
+
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+#endif
+
+static void red_destroy(struct Qdisc *sch)
+{
+	MOD_DEC_USE_COUNT;
+}
+
+struct Qdisc_ops red_qdisc_ops =
 {
 	NULL,
+	NULL,
 	"red",
-	0,
 	sizeof(struct red_sched_data),
+
 	red_enqueue,
 	red_dequeue,
-	red_reset,
-	NULL,
+	red_requeue,
+	red_drop,
+
 	red_init,
-	NULL
+	red_reset,
+	red_destroy,
+
+#ifdef CONFIG_RTNETLINK
+	red_dump,
+#endif
 };
 
 
 #ifdef MODULE
-#include <linux/module.h>
 int init_module(void)
 {
-	int err;
-
-	/* Load once and never free it. */
-	MOD_INC_USE_COUNT;
-
-	err = register_qdisc(&red_ops);
-	if (err)
-		MOD_DEC_USE_COUNT;
-	return err;
+	return register_qdisc(&red_qdisc_ops);
 }
 
 void cleanup_module(void) 
 {
+	unregister_qdisc(&red_qdisc_ops);
 }
 #endif

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov