patch-2.4.21 linux-2.4.21/net/ipv4/ip_fragment.c

Next file: linux-2.4.21/net/ipv4/ipip.c
Previous file: linux-2.4.21/net/ipv4/igmp.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.20/net/ipv4/ip_fragment.c linux-2.4.21/net/ipv4/ip_fragment.c
@@ -19,6 +19,7 @@
  *		Bill Hawes	:	Frag accounting and evictor fixes.
  *		John McDonald	:	0 length frag bug.
  *		Alexey Kuznetsov:	SMP races, threading, cleanup.
+ *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
 #include <linux/config.h>
@@ -26,9 +27,12 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/skbuff.h>
+#include <linux/list.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
 #include <linux/netdevice.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -67,6 +71,7 @@
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
 	struct ipq	*next;		/* linked list pointers			*/
+	struct list_head lru_list;	/* lru list member 			*/
 	u32		saddr;
 	u32		daddr;
 	u16		id;
@@ -94,6 +99,8 @@
 /* Per-bucket lock is easy to add now. */
 static struct ipq *ipq_hash[IPQ_HASHSZ];
 static rwlock_t ipfrag_lock = RW_LOCK_UNLOCKED;
+static u32 ipfrag_hash_rnd;
+static LIST_HEAD(ipq_lru_list);
 int ip_frag_nqueues = 0;
 
 static __inline__ void __ipq_unlink(struct ipq *qp)
@@ -101,6 +108,7 @@
 	if(qp->next)
 		qp->next->pprev = qp->pprev;
 	*qp->pprev = qp->next;
+	list_del(&qp->lru_list);
 	ip_frag_nqueues--;
 }
 
@@ -111,21 +119,51 @@
 	write_unlock(&ipfrag_lock);
 }
 
-/*
- * Was:	((((id) >> 1) ^ (saddr) ^ (daddr) ^ (prot)) & (IPQ_HASHSZ - 1))
- *
- * I see, I see evil hand of bigendian mafia. On Intel all the packets hit
- * one hash bucket with this hash function. 8)
- */
-static __inline__ unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
+static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot)
 {
-	unsigned int h = saddr ^ daddr;
-
-	h ^= (h>>16)^id;
-	h ^= (h>>8)^prot;
-	return h & (IPQ_HASHSZ - 1);
+	return jhash_3words((u32)id << 16 | prot, saddr, daddr,
+			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
 }
 
+static struct timer_list ipfrag_secret_timer;
+int sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
+
+static void ipfrag_secret_rebuild(unsigned long dummy)
+{
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&ipfrag_lock);
+	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
+	for (i = 0; i < IPQ_HASHSZ; i++) {
+		struct ipq *q;
+
+		q = ipq_hash[i];
+		while (q) {
+			struct ipq *next = q->next;
+			unsigned int hval = ipqhashfn(q->id, q->saddr,
+						      q->daddr, q->protocol);
+
+			if (hval != i) {
+				/* Unlink. */
+				if (q->next)
+					q->next->pprev = q->pprev;
+				*q->pprev = q->next;
+
+				/* Relink to new hash chain. */
+				if ((q->next = ipq_hash[hval]) != NULL)
+					q->next->pprev = &q->next;
+				ipq_hash[hval] = q;
+				q->pprev = &ipq_hash[hval];
+			}
+
+			q = next;
+		}
+	}
+	write_unlock(&ipfrag_lock);
+
+	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+}
 
 atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
 
@@ -202,39 +240,30 @@
  */
 static void ip_evictor(void)
 {
-	int i, progress;
+	struct ipq *qp;
+	struct list_head *tmp;
 
-	do {
+	for(;;) {
 		if (atomic_read(&ip_frag_mem) <= sysctl_ipfrag_low_thresh)
 			return;
-		progress = 0;
-		/* FIXME: Make LRU queue of frag heads. -DaveM */
-		for (i = 0; i < IPQ_HASHSZ; i++) {
-			struct ipq *qp;
-			if (ipq_hash[i] == NULL)
-				continue;
-
-			read_lock(&ipfrag_lock);
-			if ((qp = ipq_hash[i]) != NULL) {
-				/* find the oldest queue for this hash bucket */
-				while (qp->next)
-					qp = qp->next;
-				atomic_inc(&qp->refcnt);
-				read_unlock(&ipfrag_lock);
-
-				spin_lock(&qp->lock);
-				if (!(qp->last_in&COMPLETE))
-					ipq_kill(qp);
-				spin_unlock(&qp->lock);
-
-				ipq_put(qp);
-				IP_INC_STATS_BH(IpReasmFails);
-				progress = 1;
-				continue;
-			}
+		read_lock(&ipfrag_lock);
+		if (list_empty(&ipq_lru_list)) {
 			read_unlock(&ipfrag_lock);
+			return;
 		}
-	} while (progress);
+		tmp = ipq_lru_list.next;
+		qp = list_entry(tmp, struct ipq, lru_list);
+		atomic_inc(&qp->refcnt);
+		read_unlock(&ipfrag_lock);
+
+		spin_lock(&qp->lock);
+		if (!(qp->last_in&COMPLETE))
+			ipq_kill(qp);
+		spin_unlock(&qp->lock);
+
+		ipq_put(qp);
+		IP_INC_STATS_BH(IpReasmFails);
+	}
 }
 
 /*
@@ -302,6 +331,8 @@
 		qp->next->pprev = &qp->next;
 	ipq_hash[hash] = qp;
 	qp->pprev = &ipq_hash[hash];
+	INIT_LIST_HEAD(&qp->lru_list);
+	list_add_tail(&qp->lru_list, &ipq_lru_list);
 	ip_frag_nqueues++;
 	write_unlock(&ipfrag_lock);
 	return qp;
@@ -496,6 +527,10 @@
 	if (offset == 0)
 		qp->last_in |= FIRST_IN;
 
+	write_lock(&ipfrag_lock);
+	list_move_tail(&qp->lru_list, &ipq_lru_list);
+	write_unlock(&ipfrag_lock);
+
 	return;
 
 err:
@@ -629,3 +664,14 @@
 	kfree_skb(skb);
 	return NULL;
 }
+
+void ipfrag_init(void)
+{
+	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				 (jiffies ^ (jiffies >> 6)));
+
+	init_timer(&ipfrag_secret_timer);
+	ipfrag_secret_timer.function = ipfrag_secret_rebuild;
+	ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
+	add_timer(&ipfrag_secret_timer);
+}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)