patch-2.1.120 linux/net/ipv6/reassembly.c

Next file: linux/net/ipv6/route.c
Previous file: linux/net/ipv6/raw.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.119/linux/net/ipv6/reassembly.c linux/net/ipv6/reassembly.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Pedro Roque		<roque@di.fc.ul.pt>	
  *
- *	$Id: reassembly.c,v 1.10 1998/04/30 16:24:32 freitag Exp $
+ *	$Id: reassembly.c,v 1.11 1998/08/26 12:05:16 davem Exp $
  *
  *	Based on: net/ipv4/ip_fragment.c
  *
@@ -41,83 +41,145 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 
+int sysctl_ip6frag_high_thresh = 256*1024;
+int sysctl_ip6frag_low_thresh = 192*1024;
+int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT;
+
+atomic_t ip6_frag_mem = ATOMIC_INIT(0);
+
+struct ipv6_frag {
+	__u16			offset;
+	__u16			len;
+	struct sk_buff		*skb;
+
+	struct frag_hdr		*fhdr;
+
+	struct ipv6_frag	*next;
+};
+
+/*
+ *	Equivalent of ipv4 struct ipq
+ */
+
+struct frag_queue {
+
+	struct frag_queue	*next;
+	struct frag_queue	*prev;
+
+	__u32			id;		/* fragment id		*/
+	struct in6_addr		saddr;
+	struct in6_addr		daddr;
+	struct timer_list	timer;		/* expire timer		*/
+	struct ipv6_frag	*fragments;
+	struct device		*dev;
+	int			iif;
+	__u8			last_in;	/* has first/last segment arrived? */
+#define FIRST_IN		2
+#define LAST_IN			1
+	__u8			nexthdr;
+	__u16			nhoffset;
+};
 
 static struct frag_queue ipv6_frag_queue = {
 	&ipv6_frag_queue, &ipv6_frag_queue,
 	0, {{{0}}}, {{{0}}},
 	{0}, NULL, NULL,
-	0, 0, NULL
+	0, 0, 0, 0
 };
 
+/* Memory Tracking Functions. */
+extern __inline__ void frag_kfree_skb(struct sk_buff *skb)
+{
+	atomic_sub(skb->truesize, &ip6_frag_mem);
+	kfree_skb(skb);
+}
+
+extern __inline__ void frag_kfree_s(void *ptr, int len)
+{
+	atomic_sub(len, &ip6_frag_mem);
+	kfree(ptr);
+}
+ 
+extern __inline__ void *frag_kmalloc(int size, int pri)
+{
+	void *vp = kmalloc(size, pri);
+
+	if(!vp)
+		return NULL;
+	atomic_add(size, &ip6_frag_mem);
+	return vp;
+}
+
+
 static void			create_frag_entry(struct sk_buff *skb, 
-						  struct device *dev,
 						  __u8 *nhptr,
 						  struct frag_hdr *fhdr);
-static int			reasm_frag_1(struct frag_queue *fq, 
-					     struct sk_buff **skb_in);
+static u8 *			reasm_frag(struct frag_queue *fq, 
+					   struct sk_buff **skb_in);
 
 static void			reasm_queue(struct frag_queue *fq, 
 					    struct sk_buff *skb, 
-					    struct frag_hdr *fhdr);
-
-static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb, 
-		      __u8 *nhptr,
-		      struct frag_hdr *fhdr)
-{
-	__u32	expires = jiffies + IPV6_FRAG_TIMEOUT;
-	int nh;
+					    struct frag_hdr *fhdr,
+					    u8 *nhptr);
 
-	if (del_timer(&fq->timer))
-		expires = fq->timer.expires;
+static void			fq_free(struct frag_queue *fq);
 
-	/*
-	 *	We queue the packet even if it's the last.
-	 *	It's a trade off. This allows the reassembly 
-	 *	code to be simpler (=faster) and of the
-	 *	steps we do for queueing the only unnecessary 
-	 *	one it's the kmalloc for a struct ipv6_frag.
-	 *	Feel free to try other alternatives...
-	 */
-	if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) {
-		fq->last_in = 1;
-		fq->nhptr = nhptr;
-	}
-	reasm_queue(fq, *skb, fhdr);
+static void frag_prune(void)
+{
+	struct frag_queue *fq;
 
-	if (fq->last_in) {
-		if ((nh = reasm_frag_1(fq, skb)))
-			return nh;
+	while ((fq = ipv6_frag_queue.next) != &ipv6_frag_queue) {
+		ipv6_statistics.Ip6ReasmFails++;
+		fq_free(fq);
+		if (atomic_read(&ip6_frag_mem) <= sysctl_ip6frag_low_thresh)
+			return;
 	}
-
-	fq->timer.expires = expires;
-	add_timer(&fq->timer);
-	
-	return 0;
+	if (atomic_read(&ip6_frag_mem))
+		printk(KERN_DEBUG "IPv6 frag_prune: memleak\n");
+	atomic_set(&ip6_frag_mem, 0);
 }
 
-int ipv6_reassembly(struct sk_buff **skbp, struct device *dev, __u8 *nhptr,
-		    struct ipv6_options *opt)
+
+u8* ipv6_reassembly(struct sk_buff **skbp, __u8 *nhptr)
 {
 	struct sk_buff *skb = *skbp; 
 	struct frag_hdr *fhdr = (struct frag_hdr *) (skb->h.raw);
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr;
 
+	hdr = skb->nh.ipv6h;
+
+	ipv6_statistics.Ip6ReasmReqds++;
+
+	/* Jumbo payload inhibits frag. header */
+	if (hdr->payload_len==0) {
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
+		return NULL;
+	}
 	if ((u8 *)(fhdr+1) > skb->tail) {
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw);
-		return 0;
+		return NULL;
 	}
-	hdr = skb->nh.ipv6h;
+	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+		frag_prune();
+
 	for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next) {
 		if (fq->id == fhdr->identification && 
 		    !ipv6_addr_cmp(&hdr->saddr, &fq->saddr) &&
-		    !ipv6_addr_cmp(&hdr->daddr, &fq->daddr))
-			return reasm_frag(fq, skbp, nhptr,fhdr);
+		    !ipv6_addr_cmp(&hdr->daddr, &fq->daddr)) {
+
+			reasm_queue(fq, skb, fhdr, nhptr);
+
+			if (fq->last_in == (FIRST_IN|LAST_IN))
+				return reasm_frag(fq, skbp);
+
+			return NULL;
+		}
 	}
-	
-	create_frag_entry(skb, dev, nhptr, fhdr);
 
-	return 0;
+	create_frag_entry(skb, nhptr, fhdr);
+
+	return NULL;
 }
 
 
@@ -125,11 +187,13 @@
 {
 	struct ipv6_frag *fp, *back;
 
-	for(fp = fq->fragments; fp; ) {
-		kfree_skb(fp->skb);		
+	del_timer(&fq->timer);
+
+	for (fp = fq->fragments; fp; ) {
+		frag_kfree_skb(fp->skb);
 		back = fp;
 		fp=fp->next;
-		kfree(back);
+		frag_kfree_s(back, sizeof(*back));
 	}
 
 	fq->prev->next = fq->next;
@@ -137,7 +201,7 @@
 
 	fq->prev = fq->next = NULL;
 	
-	kfree(fq);
+	frag_kfree_s(fq, sizeof(*fq));
 }
 
 static void frag_expire(unsigned long data)
@@ -147,33 +211,50 @@
 
 	fq = (struct frag_queue *) data;
 
-	del_timer(&fq->timer);
-
 	frag = fq->fragments;
 
+	ipv6_statistics.Ip6ReasmTimeout++;
+	ipv6_statistics.Ip6ReasmFails++;
+
 	if (frag == NULL) {
 		printk(KERN_DEBUG "invalid fragment queue\n");
 		return;
 	}
 
-	icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
-		    frag->skb->dev);
+	/* Send error only if the first segment arrived.
+	   (fixed --ANK (980728))
+	 */
+	if (fq->last_in&FIRST_IN) {
+		struct device *dev = dev_get_by_index(fq->iif);
+
+		/*
+		   But use as source device on which LAST ARRIVED
+		   segment was received. And do not use fq->dev
+		   pointer directly, device might already disappeared.
+		 */
+		if (dev) {
+			frag->skb->dev = dev;
+			icmpv6_send(frag->skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0,
+				    dev);
+		}
+	}
 	
 	fq_free(fq);
 }
 
 
-static void create_frag_entry(struct sk_buff *skb, struct device *dev, 
+static void create_frag_entry(struct sk_buff *skb,
 			      __u8 *nhptr,
 			      struct frag_hdr *fhdr)
 {
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr; 
 
-	fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue), 
-					   GFP_ATOMIC);
+	fq = (struct frag_queue *) frag_kmalloc(sizeof(struct frag_queue), 
+						GFP_ATOMIC);
 
 	if (fq == NULL) {
+		ipv6_statistics.Ip6ReasmFails++;
 		kfree_skb(skb);
 		return;
 	}
@@ -186,38 +267,41 @@
 	ipv6_addr_copy(&fq->saddr, &hdr->saddr);
 	ipv6_addr_copy(&fq->daddr, &hdr->daddr);
 
-	fq->dev = dev;
-
 	/* init_timer has been done by the memset */
 	fq->timer.function = frag_expire;
 	fq->timer.data = (long) fq;
-	fq->timer.expires = jiffies + IPV6_FRAG_TIMEOUT;
-
-	fq->nexthdr = fhdr->nexthdr;
-
+	fq->timer.expires = jiffies + sysctl_ip6frag_time;
 
-	if ((fhdr->frag_off & __constant_htons(0x0001)) == 0) {
-		fq->last_in = 1;
-		fq->nhptr = nhptr;
-	}
-	reasm_queue(fq, skb, fhdr);
+	reasm_queue(fq, skb, fhdr, nhptr);
 
-	fq->prev = ipv6_frag_queue.prev;
-	fq->next = &ipv6_frag_queue;
-	fq->prev->next = fq;
-	ipv6_frag_queue.prev = fq;
-	
-	add_timer(&fq->timer);
+	if (fq->fragments) {
+		fq->prev = ipv6_frag_queue.prev;
+		fq->next = &ipv6_frag_queue;
+		fq->prev->next = fq;
+		ipv6_frag_queue.prev = fq;
+
+		add_timer(&fq->timer);
+	} else
+		frag_kfree_s(fq, sizeof(*fq));
 }
 
 
+/*
+ *	We queue the packet even if it's the last.
+ *	It's a trade off. This allows the reassembly 
+ *	code to be simpler (=faster) and of the
+ *	steps we do for queueing the only unnecessary 
+ *	one it's the kmalloc for a struct ipv6_frag.
+ *	Feel free to try other alternatives...
+ */
+
 static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, 
-				     struct frag_hdr *fhdr)
+				     struct frag_hdr *fhdr, u8 *nhptr)
 {
 	struct ipv6_frag *nfp, *fp, **bptr;
 
-	nfp = (struct ipv6_frag *) kmalloc(sizeof(struct ipv6_frag), 
-					   GFP_ATOMIC);
+	nfp = (struct ipv6_frag *) frag_kmalloc(sizeof(struct ipv6_frag), 
+						GFP_ATOMIC);
 
 	if (nfp == NULL) {		
 		kfree_skb(skb);
@@ -228,24 +312,40 @@
 	nfp->len = (ntohs(skb->nh.ipv6h->payload_len) -
 		    ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
 
-	if ((u32)nfp->offset + (u32)nfp->len > 65536) {
+	if ((u32)nfp->offset + (u32)nfp->len >= 65536) {
 		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off); 
 		goto err;
 	}
+	if (fhdr->frag_off & __constant_htons(0x0001)) {
+		/* Check if the fragment is rounded to 8 bytes.
+		 * Required by the RFC.
+		 * ... and would break our defragmentation algorithm 8)
+		 */
+		if (nfp->len & 0x7) {
+			printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
+
+			/*
+			   It is not in specs, but I see no reasons
+			   to send an error in this case. --ANK
+			 */
+			if (nfp->offset == 0)
+				icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 
+						  &skb->nh.ipv6h->payload_len);
+			goto err;
+		}
+	}
 
 	nfp->skb  = skb;
 	nfp->fhdr = fhdr;
-
 	nfp->next = NULL;
 
 	bptr = &fq->fragments;
-	
+
 	for (fp = fq->fragments; fp; fp=fp->next) {
 		if (nfp->offset <= fp->offset)
 			break;
 		bptr = &fp->next;
 	}
-	
 	if (fp && fp->offset == nfp->offset) {
 		if (nfp->len != fp->len) {
 			printk(KERN_DEBUG "reasm_queue: dup with wrong len\n");
@@ -254,29 +354,40 @@
 		/* duplicate. discard it. */
 		goto err;
 	}
-	
-	*bptr = nfp;
-	nfp->next = fp;
 
-#ifdef STRICT_RFC
-	if (fhdr->frag_off & __constant_htons(0x0001)) {
-		/* Check if the fragment is rounded to 8 bytes.
-		 * Required by the RFC.
-		 */
-		if (nfp->len & 0x7) {
-			printk(KERN_DEBUG "fragment not rounded to 8bytes\n");
+	atomic_add(skb->truesize, &ip6_frag_mem);
 
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 
-					  &skb->nh.ipv6h->payload_len);
-			goto err;
-		}
+	/* All the checks are done, fragment is acepted.
+	   Only now we are allowed to update reassembly data!
+	   (fixed --ANK (980728))
+	 */
+
+	/* iif always set to one of the last arrived segment */
+	fq->dev = skb->dev;
+	fq->iif = skb->dev->ifindex;
+
+	/* Last fragment */
+	if ((fhdr->frag_off & __constant_htons(0x0001)) == 0)
+		fq->last_in |= LAST_IN;
+
+	/* First fragment.
+	   nexthdr and nhptr are get from the first fragment.
+	   Moreover, nexthdr is UNDEFINED for all the fragments but the
+	   first one.
+	   (fixed --ANK (980728))
+	 */
+	if (nfp->offset == 0) {
+		fq->nexthdr = fhdr->nexthdr;
+		fq->last_in |= FIRST_IN;
+		fq->nhoffset = nhptr - skb->nh.raw;
 	}
-#endif 
 
+	*bptr = nfp;
+	nfp->next = fp;
 	return;
 
 err:
-	kfree(nfp);
+	frag_kfree_s(nfp, sizeof(*nfp));
 	kfree_skb(skb);
 }
 
@@ -284,20 +395,21 @@
  *	check if this fragment completes the packet
  *	returns true on success
  */
-static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
+static u8* reasm_frag(struct frag_queue *fq, struct sk_buff **skb_in)
 {
 	struct ipv6_frag *fp;
+	struct ipv6_frag *head = fq->fragments;
 	struct ipv6_frag *tail = NULL;
 	struct sk_buff *skb;
 	__u32  offset = 0;
 	__u32  payload_len;
 	__u16  unfrag_len;
 	__u16  copy;
-	int    nh;
+	u8     *nhptr;
 
-	for(fp = fq->fragments; fp; fp=fp->next) {
+	for(fp = head; fp; fp=fp->next) {
 		if (offset != fp->offset)
-			return 0;
+			return NULL;
 
 		offset += fp->len;
 		tail = fp;
@@ -309,31 +421,42 @@
 	 * this means we have all fragments.
 	 */
 
-	unfrag_len = (u8 *) (tail->fhdr) - (u8 *) (tail->skb->nh.ipv6h + 1);
+	/* Unfragmented part is taken from the first segment.
+	   (fixed --ANK (980728))
+	 */
+	unfrag_len = (u8 *) (head->fhdr) - (u8 *) (head->skb->nh.ipv6h + 1);
 
 	payload_len = (unfrag_len + tail->offset + 
 		       (tail->skb->tail - (__u8 *) (tail->fhdr + 1)));
 
-#if 0
-	printk(KERN_DEBUG "reasm: payload len = %d\n", payload_len);
-#endif
+	if (payload_len > 65535) {
+		if (net_ratelimit())
+			printk(KERN_DEBUG "reasm_frag: payload len = %d\n", payload_len);
+		ipv6_statistics.Ip6ReasmFails++;
+		fq_free(fq);
+		return NULL;
+	}
 
 	if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL) {
-		printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
+		if (net_ratelimit())
+			printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
+		ipv6_statistics.Ip6ReasmFails++;
 		fq_free(fq);
-		return 1;
+		return NULL;
 	}
 
 	copy = unfrag_len + sizeof(struct ipv6hdr);
 
 	skb->nh.ipv6h = (struct ipv6hdr *) skb->data;
-
 	skb->dev = fq->dev;
-
-	nh = fq->nexthdr;
-
-	*(fq->nhptr) = nh;
-	memcpy(skb_put(skb, copy), tail->skb->nh.ipv6h, copy);
+	skb->protocol = __constant_htons(ETH_P_IPV6);
+	skb->pkt_type = head->skb->pkt_type;
+	memcpy(skb->cb, head->skb->cb, sizeof(skb->cb));
+	skb->dst = dst_clone(head->skb->dst);
+
+	memcpy(skb_put(skb, copy), head->skb->nh.ipv6h, copy);
+	nhptr = skb->nh.raw + fq->nhoffset;
+	*nhptr = fq->nexthdr;
 
 	skb->h.raw = skb->tail;
 
@@ -351,18 +474,19 @@
 		struct ipv6_frag *back;
 
 		memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len);
-		kfree_skb(fp->skb);
+		frag_kfree_skb(fp->skb);
 		back = fp;
 		fp=fp->next;
-		kfree(back);
+		frag_kfree_s(back, sizeof(*back));
 	}
-	
+
+	del_timer(&fq->timer);
 	fq->prev->next = fq->next;
 	fq->next->prev = fq->prev;
-
 	fq->prev = fq->next = NULL;
-	
-	kfree(fq);
 
-	return nh;
+	frag_kfree_s(fq, sizeof(*fq));
+
+	ipv6_statistics.Ip6ReasmOKs++;
+	return nhptr;
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov