patch-2.1.99 linux/net/sched/sch_api.c

Next file: linux/net/sched/sch_cbq.c
Previous file: linux/net/sched/police.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.98/linux/net/sched/sch_api.c linux/net/sched/sch_api.c
@@ -0,0 +1,994 @@
+/*
+ * net/sched/sch_api.c	Packet scheduler API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+#define BUG_TRAP(x) if (!(x)) { printk("Assertion (" #x ") failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
+
+#ifdef CONFIG_RTNETLINK
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			struct Qdisc *old, struct Qdisc *new);
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			 struct Qdisc *q, unsigned long cl, int event);
+#endif
+
+/*
+
+   Short review.
+   -------------
+
+   This file consists of two interrelated parts:
+
+   1. queueing disciplines manager frontend.
+   2. traffic classes manager frontend.
+
+   Generally, queueing discipline ("qdisc") is a black box,
+   which is able to enqueue packets and to dequeue them (when
+   device is ready to send something) in order and at times
+   determined by algorithm hidden in it.
+
+   qdisc's are divided to two categories:
+   - "queues", which have no internal structure visible from outside.
+   - "schedulers", which split all the packets to "traffic classes",
+     using "packet classifiers" (look at cls_api.c)
+
+   In turn, classes may have child qdiscs (as rule, queues)
+   attached to them etc. etc. etc.
+
+   The goal of the routines in this file is to translate
+   information supplied by user in the form of handles
+   to more intelligible for kernel form, to make some sanity
+   checks and part of work, which is common to all qdiscs
+   and to provide rtnetlink notifications.
+
+   All real intelligent work is done inside qdisc modules.
+
+
+
+   Every discipline has two major routines: enqueue and dequeue.
+
+   ---dequeue
+
+   dequeue usually returns a skb to send. It is allowed to return NULL,
+   but it does not mean that queue is empty, it just means that
+   discipline does not want to send anything this time.
+   Queue is really empty if q->q.qlen == 0.
+   For complicated disciplines with multiple queues q->q is not
+   real packet queue, but however q->q.qlen must be valid.
+
+   ---enqueue
+
+   enqueue returns number of enqueued packets i.e. this number is 1,
+   if packet was enqueued sucessfully and <1 if something (not
+   necessary THIS packet) was dropped.
+
+   Auxiliary routines:
+
+   ---requeue
+
+   requeues once dequeued packet. It is used for non-standard or
+   just buggy devices, which can defer output even if dev->tbusy=0.
+
+   ---reset
+
+   returns qdisc to initial state: purge all buffers, clear all
+   timers, counters (except for statistics) etc.
+
+   ---init
+
+   initializes newly created qdisc.
+
+   ---destroy
+
+   destroys resources allocated by init and during lifetime of qdisc.
+ */
+
+/************************************************
+ *	Queueing disciplines manipulation.	*
+ ************************************************/
+
+
+/* The list of all installed queueing disciplines. */
+
+static struct Qdisc_ops *qdisc_base = NULL;
+
+/* Register/uregister queueing discipline */
+
+int register_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+
+	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+		if (strcmp(qops->id, q->id) == 0)
+			return -EEXIST;
+
+	if (qops->enqueue == NULL)
+		qops->enqueue = noop_qdisc_ops.enqueue;
+	if (qops->requeue == NULL)
+		qops->requeue = noop_qdisc_ops.requeue;
+	if (qops->dequeue == NULL)
+		qops->dequeue = noop_qdisc_ops.dequeue;
+
+	qops->next = NULL;
+	*qp = qops;
+	return 0;
+}
+
+int unregister_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+		if (q == qops)
+			break;
+	if (!q)
+		return -ENOENT;
+	*qp = q->next;
+	q->next = NULL;
+	return 0;
+}
+
+/* We know handle. Find qdisc among all qdisc's attached to device
+   (root qdisc, all its children, children of children etc.)
+ */
+
+struct Qdisc *qdisc_lookup(struct device *dev, u32 handle)
+{
+	struct Qdisc *q;
+
+	for (q = dev->qdisc_list; q; q = q->next) {
+		if (q->handle == handle)
+			return q;
+	}
+	return NULL;
+}
+
+/* We know classid. Find qdisc among all qdisc's attached to device
+   (root qdisc, all its children, children of children etc.)
+ */
+
+struct Qdisc *qdisc_lookup_class(struct device *dev, u32 classid)
+{
+	struct Qdisc *q;
+
+	for (q = dev->qdisc_list; q; q = q->next) {
+		if (q->classid == classid)
+			return q;
+	}
+	return NULL;
+}
+
+
+/* Find queueing discipline by name */
+
+struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
+{
+	struct Qdisc_ops *q;
+
+	if (kind) {
+		for (q = qdisc_base; q; q = q->next) {
+			if (rtattr_strcmp(kind, q->id) == 0)
+				return q;
+		}
+	}
+	return NULL;
+}
+
+static struct qdisc_rate_table *qdisc_rtab_list;
+
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
+{
+	struct qdisc_rate_table *rtab;
+
+	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
+		if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
+			rtab->refcnt++;
+			return rtab;
+		}
+	}
+
+	if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
+		return NULL;
+
+	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
+	if (rtab) {
+		rtab->rate = *r;
+		rtab->refcnt = 1;
+		memcpy(rtab->data, RTA_DATA(tab), 1024);
+		rtab->next = qdisc_rtab_list;
+		qdisc_rtab_list = rtab;
+	}
+	return rtab;
+}
+
+void qdisc_put_rtab(struct qdisc_rate_table *tab)
+{
+	struct qdisc_rate_table *rtab, **rtabp;
+
+	if (!tab || --tab->refcnt)
+		return;
+
+	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+		if (rtab == tab) {
+			*rtabp = rtab->next;
+			kfree(rtab);
+			return;
+		}
+	}
+}
+
+
+/* Allocate an unique handle from space managed by kernel */
+
+u32 qdisc_alloc_handle(struct device *dev)
+{
+	int i = 0x10000;
+	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
+
+	do {
+		autohandle += TC_H_MAKE(0x10000U, 0);
+		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
+			autohandle = TC_H_MAKE(0x80000000U, 0);
+	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
+
+	return i>0 ? autohandle : 0;
+}
+
+/* Graft qdisc "new" to class "classid" of qdisc "parent" or
+   to device "dev".
+
+   Old qdisc is not destroyed but returned in *old.
+ */
+
+int qdisc_graft(struct device *dev, struct Qdisc *parent, u32 classid,
+		struct Qdisc *new, struct Qdisc **old)
+{
+	int err = 0;
+
+	if (parent == NULL) {
+		BUG_TRAP(classid == TC_H_ROOT);
+		if (new) {
+			new->parent = NULL;
+			new->classid = TC_H_ROOT;
+		}
+		*old = dev_set_scheduler(dev, new);
+	} else {
+		struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+
+		BUG_TRAP(classid != TC_H_ROOT);
+
+		err = -EINVAL;
+
+		if (cops) {
+			unsigned long cl = cops->get(parent, classid);
+			if (cl) {
+				err = cops->graft(parent, cl, new, old);
+				cops->put(parent, cl);
+			}
+		}
+	}
+	return err;
+}
+
+#ifdef CONFIG_RTNETLINK
+
+/*
+   Allocate and initialize new qdisc.
+
+   Parameters are passed via opt.
+ */
+
+static struct Qdisc *
+qdisc_create(struct device *dev, struct Qdisc_ops *ops, u32 handle,
+	     u32 parentid, struct rtattr **tca, int *errp)
+{
+	int err;
+	struct rtattr *kind = tca[TCA_KIND-1];
+	struct Qdisc *sch = NULL;
+	int size;
+	int new = 0;
+
+	if (ops == NULL) {
+		ops = qdisc_lookup_ops(kind);
+		err = -EINVAL;
+		if (ops == NULL)
+			goto err_out;
+		new = 1;
+	}
+
+	size = sizeof(*sch) + ops->priv_size;
+
+	sch = kmalloc(size, GFP_KERNEL);
+	err = -ENOBUFS;
+	if (!sch)
+		goto err_out;
+
+	/* Grrr... Resolve race condition with module unload */
+	
+	err = -EINVAL;
+	if (new) {
+		if (ops != qdisc_lookup_ops(kind))
+			goto err_out;
+	} else if (kind) {
+		if (rtattr_strcmp(kind, ops->id))
+			goto err_out;
+	}
+
+	memset(sch, 0, size);
+
+	skb_queue_head_init(&sch->q);
+	sch->ops = ops;
+	sch->enqueue = ops->enqueue;
+	sch->dequeue = ops->dequeue;
+	sch->dev = dev;
+	if (handle == 0) {
+		handle = qdisc_alloc_handle(dev);
+		err = -ENOMEM;
+		if (handle == 0)
+			goto err_out;
+	}
+	sch->handle = handle;
+	sch->classid = parentid;
+
+	if (ops->init && (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+		sch->next = dev->qdisc_list;
+		dev->qdisc_list = sch;
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1])
+			qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
+#endif
+		return sch;
+	}
+
+err_out:
+	*errp = err;
+	if (sch)
+		kfree(sch);
+	return NULL;
+}
+
+
+/*
+   Create/delete/change/get qdisc.
+ */
+
+static int tc_ctl_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct rtattr **tca = arg;
+	struct device *dev;
+	u32 clid = tcm->tcm_parent;
+	struct Qdisc *old_q;
+	struct Qdisc *q = NULL;
+	struct Qdisc *p = NULL;
+	struct Qdisc *leaf = NULL;
+	struct Qdisc_ops *qops = NULL;
+	int err;
+
+	/* Find device */
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	/* If parent is specified, it must exist
+	   and tcm_parent selects a class in parent which
+	   new qdisc will be attached to.
+
+	   The place may be already busy by another qdisc,
+	   remember this fact, if it was not auto-created discipline.
+	 */
+	if (clid) {
+		if (clid != TC_H_ROOT) {
+			p = qdisc_lookup(dev, TC_H_MAJ(clid));
+			if (p == NULL)
+				return -ENOENT;
+			leaf = qdisc_lookup_class(dev, clid);
+		} else
+			leaf = dev->qdisc_sleeping;
+
+		if (leaf && leaf->flags&TCQ_F_DEFAULT && n->nlmsg_type == RTM_NEWQDISC)
+			leaf = NULL;
+
+		/*
+		   Also, leaf may be exactly that qdisc, which we want
+		   to control. Remember this to avoid one more qdisc_lookup.
+		 */
+
+		if (leaf && leaf->handle == tcm->tcm_handle)
+			q = leaf;
+	}
+
+	/* Try to locate the discipline */
+	if (tcm->tcm_handle && q == NULL) {
+		if (TC_H_MIN(tcm->tcm_handle))
+			return -EINVAL;
+		q = qdisc_lookup(dev, tcm->tcm_handle);
+	}
+
+	/* If discipline already exists, check that its real parent
+	   matches to one selected by tcm_parent.
+	 */
+	   
+	if (q) {
+		if (clid && p != q->parent)
+			return -EINVAL;
+		BUG_TRAP(!leaf || leaf == q);
+		if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
+			return -EINVAL;
+		clid = q->classid;
+		goto process_existing;
+	}
+
+	/* The discipline is known not to exist.
+	   If parent was not selected too, return error.
+	 */
+	if (clid == 0)
+		return tcm->tcm_handle ? -ENOENT : -EINVAL;
+
+	/* Check for the case when leaf is exactly the thing,
+	   that you want.
+	 */
+
+	if (leaf && tcm->tcm_handle == 0) {
+		q = leaf;
+		if (!tca[TCA_KIND-1] || rtattr_strcmp(tca[TCA_KIND-1], q->ops->id) == 0)
+			goto process_existing;
+	}
+
+	if (n->nlmsg_type != RTM_NEWQDISC || !(n->nlmsg_flags&NLM_F_CREATE))
+		return -ENOENT;
+	if (leaf && n->nlmsg_flags&NLM_F_EXCL)
+		return -EEXIST;
+
+create_and_graft:
+	q = qdisc_create(dev, qops, tcm->tcm_handle, clid, tca, &err);
+	if (q == NULL)
+		return err;
+
+graft:
+	err = qdisc_graft(dev, p, clid, q, &old_q);
+	if (err) {
+		if (q)
+			qdisc_destroy(q);
+		return err;
+	}
+	qdisc_notify(skb, n, old_q, q);
+	if (old_q)
+		qdisc_destroy(old_q);
+	return 0;
+
+process_existing:
+
+	switch (n->nlmsg_type) {
+	case RTM_NEWQDISC:
+		if (n->nlmsg_flags&NLM_F_EXCL)
+			return -EEXIST;
+		qops = q->ops;
+		goto create_and_graft;
+	case RTM_GETQDISC:	
+		qdisc_notify(skb, n, NULL, q);
+		return 0;
+	case RTM_DELQDISC:
+		q = NULL;
+		goto graft;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q,
+			 pid_t pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
+	tcm->tcm_parent = q->classid;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	if (q->ops->dump && q->ops->dump(q, skb) < 0)
+		goto rtattr_failure;
+	q->stats.qlen = q->q.qlen;
+	RTA_PUT(skb, TCA_STATS, sizeof(q->stats), &q->stats);
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			 struct Qdisc *old, struct Qdisc *new)
+{
+	struct sk_buff *skb;
+	pid_t pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (old && !(old->flags&TCQ_F_DEFAULT)) {
+		if (tc_fill_qdisc(skb, old, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+			goto err_out;
+	}
+	if (new) {
+		if (tc_fill_qdisc(skb, new, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+			goto err_out;
+	}
+
+	if (skb->len)
+		return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+
+err_out:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int idx, q_idx;
+	int s_idx, s_q_idx;
+	struct device *dev;
+	struct Qdisc *q;
+
+	s_idx = cb->args[0];
+	s_q_idx = q_idx = cb->args[1];
+	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+		if (idx < s_idx)
+			continue;
+		if (idx > s_idx)
+			s_q_idx = 0;
+		for (q = dev->qdisc_list, q_idx = 0; q;
+		     q = q->next, q_idx++) {
+			if (q_idx < s_q_idx)
+				continue;
+			if (tc_fill_qdisc(skb, q, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+				goto done;
+		}
+	}
+
+done:
+	cb->args[0] = idx;
+	cb->args[1] = q_idx;
+
+	return skb->len;
+}
+
+
+
+/************************************************
+ *	Traffic classes manipulation.		*
+ ************************************************/
+
+
+
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct rtattr **tca = arg;
+	struct device *dev;
+	struct Qdisc *q = NULL;
+	struct Qdisc_class_ops *cops;
+	unsigned long cl = 0;
+	unsigned long new_cl;
+	u32 pid = tcm->tcm_parent;
+	u32 clid = tcm->tcm_handle;
+	u32 qid = TC_H_MAJ(clid);
+	int err;
+
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	/*
+	   parent == TC_H_UNSPEC - unspecified parent.
+	   parent == TC_H_ROOT   - class is root, which has no parent.
+	   parent == X:0	 - parent is root class.
+	   parent == X:Y	 - parent is a node in hierarchy.
+	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
+
+	   handle == 0:0	 - generate handle from kernel pool.
+	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
+	   handle == X:Y	 - clear.
+	   handle == X:0	 - root class.
+	 */
+
+	/* Step 1. Determine qdisc handle X:0 */
+
+	if (pid != TC_H_ROOT) {
+		u32 qid1 = TC_H_MAJ(pid);
+
+		if (qid && qid1) {
+			/* If both majors are known, they must be identical. */
+			if (qid != qid1)
+				return -EINVAL;
+		} else if (qid1) {
+			qid = qid1;
+		} else if (qid == 0)
+			qid = dev->qdisc_sleeping->handle;
+
+		/* Now qid is genuine qdisc handle consistent
+		   both with parent and child.
+
+		   TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 */
+		if (pid)
+			pid = TC_H_MAKE(qid, pid);
+	} else {
+		if (qid == 0)
+			qid = dev->qdisc_sleeping->handle;
+	}
+
+	/* OK. Locate qdisc */
+	if ((q = qdisc_lookup(dev, qid)) == NULL) 
+		return -ENOENT;
+
+	/* An check that it supports classes */
+	cops = q->ops->cl_ops;
+	if (cops == NULL)
+		return -EINVAL;
+
+	/* Now try to get class */
+	if (clid == 0) {
+		if (pid == TC_H_ROOT)
+			clid = qid;
+	} else
+		clid = TC_H_MAKE(qid, clid);
+
+	if (clid)
+		cl = cops->get(q, clid);
+
+	if (cl == 0) {
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto out;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTCLASS:	
+			err = -EEXIST;
+			if (n->nlmsg_flags&NLM_F_EXCL)
+				goto out;
+			break;
+		case RTM_DELTCLASS:
+			err = cops->delete(q, cl);
+			if (err == 0)
+				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+			goto out;
+		case RTM_GETTCLASS:
+			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+			goto out;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	new_cl = cl;
+	err = cops->change(q, clid, pid, tca, &new_cl);
+	if (err == 0)
+		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+
+out:
+	if (cl)
+		cops->put(q, cl);
+
+	return err;
+}
+
+
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+			  unsigned long cl,
+			  pid_t pid, u32 seq, unsigned flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb->tail;
+
+	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
+	nlh->nlmsg_flags = flags;
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
+	tcm->tcm_parent = q->handle;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
+	if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
+		goto rtattr_failure;
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct Qdisc *q, unsigned long cl, int event)
+{
+	struct sk_buff *skb;
+	pid_t pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+}
+
+struct qdisc_dump_args
+{
+	struct qdisc_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
+{
+	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
+
+	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
+			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
+}
+
+static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int t;
+	int s_t;
+	struct device *dev;
+	struct Qdisc *q;
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct qdisc_dump_args arg;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return 0;
+	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
+		return 0;
+
+	s_t = cb->args[0];
+
+	for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
+		if (t < s_t) continue;
+		if (!q->ops->cl_ops) continue;
+		if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle
+		    && (tcm->tcm_parent != TC_H_ROOT || q->parent != NULL))
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+		arg.w.fn = qdisc_class_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop  = 0;
+		arg.w.skip = cb->args[1];
+		arg.w.count = 0;
+		q->ops->cl_ops->walk(q, &arg.w);
+		cb->args[1] = arg.w.count;
+		if (arg.w.stop)
+			break;
+	}
+
+	cb->args[0] = t;
+
+	return skb->len;
+}
+#endif
+
+int psched_us_per_tick = 1;
+int psched_tick_per_us = 1;
+
+#ifdef CONFIG_PROC_FS
+static int psched_read_proc(char *buffer, char **start, off_t offset,
+			     int length, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(buffer, "%08x %08x\n",
+		      psched_tick_per_us, psched_us_per_tick);
+
+	len -= offset;
+
+	if (len > length)
+		len = length;
+	if(len < 0)
+		len = 0;
+
+	*start = buffer + offset;
+	*eof = 1;
+
+	return len;
+}
+#endif
+
+psched_time_t psched_time_base;
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
+psched_tdiff_t psched_clock_per_hz;
+int psched_clock_scale;
+#endif
+
+#ifdef PSCHED_WATCHER
+u32 psched_time_mark;
+
+static void psched_tick(unsigned long);
+
+static struct timer_list psched_timer =
+	{ NULL, NULL, 0, 0L, psched_tick };
+
+static void psched_tick(unsigned long dummy)
+{
+#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
+	psched_time_t dummy_stamp;
+	PSCHED_GET_TIME(dummy_stamp);
+	psched_timer.expires = jiffies + 4*HZ;
+#else
+	unsigned long jiffies = now;
+	psched_time_base = ((u64)now)<<PSCHED_JSCALE;
+	psched_time_mark = now;
+	psched_timer.expires = jiffies + 60*60*HZ;
+#endif
+	add_timer(&psched_timer);
+}
+#endif
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
+__initfunc(int psched_calibrate_clock(void))
+{
+	psched_time_t stamp, stamp1;
+	struct timeval tv, tv1;
+	psched_tdiff_t delay;
+	long rdelay;
+	unsigned long stop;
+
+#if CPU == 586 || CPU == 686
+	if (!(boot_cpu_data.x86_capability & 16))
+		return -1;
+#endif
+
+	start_bh_atomic();
+#ifdef PSCHED_WATCHER
+	psched_tick(0);
+#endif
+	stop = jiffies + HZ/10;
+	PSCHED_GET_TIME(stamp);
+	do_gettimeofday(&tv);
+	while (jiffies < stop)
+		boundary();
+	PSCHED_GET_TIME(stamp1);
+	do_gettimeofday(&tv1);
+	end_bh_atomic();
+
+	delay = PSCHED_TDIFF(stamp1, stamp);
+	rdelay = tv1.tv_usec - tv.tv_usec;
+	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
+	if (rdelay > delay)
+		return -1;
+	delay /= rdelay;
+	psched_tick_per_us = delay;
+	while ((delay>>=1) != 0)
+		psched_clock_scale++;
+	psched_us_per_tick = 1<<psched_clock_scale;
+	psched_clock_per_hz = (delay*(1000000/HZ))>>psched_clock_scale;
+	return 0;
+}
+#endif
+
+__initfunc(int pktsched_init(void))
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *ent;
+#endif
+
+#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
+	if (psched_calibrate_clock() < 0)
+		return -1;
+#elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
+	psched_tick_per_us = HZ<<PSCHED_JSCALE;
+	psched_us_per_tick = 1000000;
+#endif
+
+#ifdef CONFIG_RTNETLINK
+	struct rtnetlink_link *link_p = rtnetlink_links[AF_UNSPEC];
+
+	/* Setup rtnetlink links. It is made here to avoid
+	   exporting large number of public symbols.
+	 */
+
+	if (link_p) {
+		link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_ctl_qdisc;
+		link_p[RTM_DELQDISC-RTM_BASE].doit = tc_ctl_qdisc;
+		link_p[RTM_GETQDISC-RTM_BASE].doit = tc_ctl_qdisc;
+		link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
+		link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
+		link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
+	}
+#endif
+
+#define INIT_QDISC(name) { \
+          extern struct Qdisc_ops name##_qdisc_ops; \
+          register_qdisc(&##name##_qdisc_ops); \
+	}
+
+	INIT_QDISC(pfifo);
+	INIT_QDISC(bfifo);
+
+#ifdef CONFIG_NET_SCH_CBQ
+	INIT_QDISC(cbq);
+#endif
+#ifdef CONFIG_NET_SCH_CSZ
+	INIT_QDISC(csz);
+#endif
+#ifdef CONFIG_NET_SCH_HPFQ
+	INIT_QDISC(hpfq);
+#endif
+#ifdef CONFIG_NET_SCH_HFSC
+	INIT_QDISC(hfsc);
+#endif
+#ifdef CONFIG_NET_SCH_RED
+	INIT_QDISC(red);
+#endif
+#ifdef CONFIG_NET_SCH_SFQ
+	INIT_QDISC(sfq);
+#endif
+#ifdef CONFIG_NET_SCH_TBF
+	INIT_QDISC(tbf);
+#endif
+#ifdef CONFIG_NET_SCH_TEQL
+	teql_init();
+#endif
+#ifdef CONFIG_NET_SCH_PRIO
+	INIT_QDISC(prio);
+#endif
+#ifdef CONFIG_NET_CLS
+	tc_filter_init();
+#endif
+
+#ifdef CONFIG_PROC_FS
+	ent = create_proc_entry("net/psched", 0, 0);
+	ent->read_proc = psched_read_proc;
+#endif
+
+	return 0;
+}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov