patch-2.1.102 linux/net/ipv4/ip_fw.c

Next file: linux/net/ipv4/ip_input.c
Previous file: linux/net/ipv4/fib_frontend.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.101/linux/net/ipv4/ip_fw.c linux/net/ipv4/ip_fw.c
@@ -1,96 +1,44 @@
 /*
- *	IP firewalling code. This is taken from 4.4BSD. Please note the 
- *	copyright message below. As per the GPL it must be maintained
- *	and the licenses thus do not conflict. While this port is subject
- *	to the GPL I also place my modifications under the original 
- *	license in recognition of the original copyright. 
- *				-- Alan Cox.
+ * This code is heavily based on the code in ip_fw.c; see that file for
+ * copyrights and attributions.  This code is basically GPL.
  *
- *	$Id: ip_fw.c,v 1.35 1998/04/30 16:29:51 freitag Exp $
- *
- *	Ported from BSD to Linux,
- *		Alan Cox 22/Nov/1994.
- *	Zeroing /proc and other additions
- *		Jos Vos 4/Feb/1995.
- *	Merged and included the FreeBSD-Current changes at Ugen's request
- *	(but hey it's a lot cleaner now). Ugen would prefer in some ways
- *	we waited for his final product but since Linux 1.2.0 is about to
- *	appear it's not practical - Read: It works, it's not clean but please
- *	don't consider it to be his standard of finished work.
- *		Alan Cox 12/Feb/1995
- *	Porting bidirectional entries from BSD, fixing accounting issues,
- *	adding struct ip_fwpkt for checking packets with interface address
- *		Jos Vos 5/Mar/1995.
- *	Established connections (ACK check), ACK check on bidirectional rules,
- *	ICMP type check.
- *		Wilfred Mollenvanger 7/7/1995.
- *	TCP attack protection.
- *		Alan Cox 25/8/95, based on information from bugtraq.
- *	ICMP type printk, IP_FW_F_APPEND
- *		Bernd Eckenfels 1996-01-31
- *	Split blocking chain into input and output chains, add new "insert" and
- *	"append" commands to replace semi-intelligent "add" command, let "delete".
- *	only delete the first matching entry, use 0xFFFF (0xFF) as ports (ICMP
- *	types) when counting packets being 2nd and further fragments.
- *		Jos Vos <jos@xos.nl> 8/2/1996.
- *	Add support for matching on device names.
- *		Jos Vos <jos@xos.nl> 15/2/1996.
- *	Transparent proxying support.
- *		Willy Konynenberg <willy@xos.nl> 10/5/96.
- *	Make separate accounting on incoming and outgoing packets possible.
- *		Jos Vos <jos@xos.nl> 18/5/1996.
- *	Added trap out of bad frames.
- *		Alan Cox <alan@cymru.net> 17/11/1996
- *
- *
- * Masquerading functionality
- *
- * Copyright (c) 1994 Pauline Middelink
- *
- * The pieces which added masquerading functionality are totally
- * my responsibility and have nothing to with the original authors
- * copyright or doing.
- *
- * Parts distributed under GPL.
- *
- * Fixes:
- *	Pauline Middelink	:	Added masquerading.
- *	Alan Cox		:	Fixed an error in the merge.
- *	Thomas Quinot		:	Fixed port spoofing.
- *	Alan Cox		:	Cleaned up retransmits in spoofing.
- *	Alan Cox		:	Cleaned up length setting.
- *	Wouter Gadeyne		:	Fixed masquerading support of ftp PORT commands
- *
- *	Juan Jose Ciarlante	:	Masquerading code moved to ip_masq.c
- *	Andi Kleen :		Print frag_offsets and the ip flags properly.
- *
- *	All the real work was done by .....
- *
- */
-
-
-/*
- * Copyright (c) 1993 Daniel Boulet
- * Copyright (c) 1994 Ugen J.S.Antsilevich
- *
- * Redistribution and use in source forms, with and without modification,
- * are permitted provided that this entire comment appears intact.
- *
- * Redistribution in binary form may occur without any restrictions.
- * Obviously, it would be nice if you gave credit where credit is due
- * but requiring it would be too onerous.
- *
- * This software is provided ``AS IS'' without any warranties of any kind.
+ * 15-Aug-1997: Major changes to allow graphs for firewall rules.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au> and
+ *		Michael Neuling <Michael.Neuling@rustcorp.com.au> 
+ * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
+ *              Added explicit RETURN from chains.
+ *              Removed TOS mangling (done in ipchains 1.0.1).
+ *              Fixed read & reset bug by reworking proc handling.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 28-Sep-1997: Added packet marking for net sched code.
+ *              Removed fw_via comparisons: all done on device name now,
+ *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
+ *              Paul Russell <Paul.Russell@rustcorp.com.au>
+ * 2-Nov-1997:  Moved types across to __u16, etc.
+ *              Added inverse flags.
+ *              Fixed fragment bug (in args to port_match).
+ *              Changed mark to only one flag (MARKABS).
+ * 21-Nov-1997: Added ability to test ICMP code.
+ * 19-Jan-1998: Added wildcard interfaces.
+ * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
+ *              Initialised ip_masq for 2.0.x version.
+ *              Added explicit NETLINK option for 2.1.x version.
+ *              Added packet and byte counters for policy matches.
+ * 26-Feb-1998: Fixed race conditions, added SMP support.
+ * 18-Mar-1998: Fix SMP, fix race condition fix.
+ * 1-May-1998:  Remove caching of device pointer, added caching
+ *              for proc output (no longer order n^2).
  */
 
 #include <linux/config.h>
+
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/config.h>
 
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -107,9 +55,9 @@
 #include <net/sock.h>
 #include <net/icmp.h>
 #include <linux/netlink.h>
+#include <linux/init.h>
 #include <linux/firewall.h>
 #include <linux/ip_fw.h>
-#include <linux/init.h>
 
 #ifdef CONFIG_IP_MASQUERADE
 #include <net/ip_masq.h>
@@ -119,223 +67,494 @@
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 
+/* Understanding locking in this code: (thanks to Alan Cox for using
+ * little words to explain this to me). -- PR
+ *
+ * In UP, there can be two packets traversing the chains:
+ * 1) A packet from the current userspace context
+ * 2) A packet off the bh handlers (timer or net).
+ *
+ * For SMP (kernel v2.1+), multiply this by # CPUs.
+ *
+ * This means counters and backchains can get corrupted if no precautions
+ * are taken.
+ *
+ * To actually alter a chain on UP, we need only do a cli(), as this will
+ * stop a bh handler firing, as we are in the current userspace context
+ * (coming from a setsockopt()).
+ *
+ * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
+ * UP.
+ *
+ * For backchains and counters, we use an array, indexed by
+ * [smp_processor_id()*2 + !in_interrupt()]; the array is of size
+ * [smp_num_cpus*2].  For v2.0, smp_num_cpus is effectively 1.  So,
+ * confident of uniqueness, we modify counters even though we only
+ * have a read lock (to read the counters, you need a write lock,
+ * though).  */
+
+/* Why I didn't use straight locking... -- PR
+ * 
+ * The backchains can be separated out of the ip_chains structure, and
+ * allocated as needed inside ip_fw_check().
+ *
+ * The counters, however, can't.  Trying to lock these means blocking
+ * interrupts every time we want to access them.  This would suck HARD
+ * performance-wise.  Not locking them leads to possible corruption,
+ * made worse on 32-bit machines (counters are 64-bit).  */
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+/*#define DEBUG_IP_FIREWALL_LOCKING*/
+
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+static struct sock *ipfwsk;
+#endif
+
+#define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
+#define NUM_SLOTS (smp_num_cpus*2)
+
+#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
+				+ NUM_SLOTS*sizeof(struct ip_reent))
+#define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
+				    + NUM_SLOTS*sizeof(struct ip_counters))
+
+#ifdef DEBUG_IP_FIREWALL_LOCKING
+static unsigned int fwc_rlocks, fwc_wlocks;
+#define FWC_DEBUG_LOCK(d)			\
+do {						\
+	FWC_DONT_HAVE_LOCK(d);			\
+	d |= (1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DEBUG_UNLOCK(d)			\
+do {						\
+	FWC_HAVE_LOCK(d);			\
+	d &= ~(1 << SLOT_NUMBER());		\
+} while (0)
+
+#define FWC_DONT_HAVE_LOCK(d)					\
+do {								\
+	if ((d) & (1 << SLOT_NUMBER()))				\
+		printk("%s:%i: Got lock on %i already!\n", 	\
+		       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while(0)
+
+#define FWC_HAVE_LOCK(d)				\
+do {							\
+	if (!((d) & (1 << SLOT_NUMBER())))		\
+	printk("%s:%i:No lock on %i!\n", 		\
+	       __FILE__, __LINE__, SLOT_NUMBER());	\
+} while (0)
+
+#else
+#define FWC_DEBUG_LOCK(d) do { } while(0)
+#define FWC_DEBUG_UNLOCK(d) do { } while(0)
+#define FWC_DONT_HAVE_LOCK(d) do { } while(0)
+#define FWC_HAVE_LOCK(d) do { } while(0)
+#endif /*DEBUG_IP_FIRWALL_LOCKING*/
+
+#define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
+#define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
+#define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
+#define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
+#define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
+#define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
+#define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
+#define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
+
+struct ip_chain;
+
+struct ip_counters
+{
+	__u64 pcnt, bcnt;			/* Packet and byte counters */
+};
+
+struct ip_fwkernel
+{
+	struct ip_fw ipfw;
+	struct ip_fwkernel *next;	/* where to go next if current
+					 * rule doesn't match */
+	struct ip_chain *branch;	/* which branch to jump to if
+					 * current rule matches */
+	int simplebranch;		/* Use this if branch == NULL */
+	struct ip_counters counters[0]; /* Actually several of these */
+};
+
+struct ip_reent 
+{
+	struct ip_chain *prevchain;	/* Pointer to referencing chain */
+	struct ip_fwkernel *prevrule;	/* Pointer to referencing rule */
+	struct ip_counters counters;
+};
+
+struct ip_chain
+{
+	ip_chainlabel label;	    /* Defines the label for each block */
+ 	struct ip_chain *next;	    /* Pointer to next block */
+	struct ip_fwkernel *chain;  /* Pointer to first rule in block */
+	__u32 refcount; 	    /* Number of refernces to block */
+	int policy;		    /* Default rule for chain.  Only *
+				     * used in built in chains */
+	struct ip_reent reent[0];   /* Actually several of these */
+};
+
 /*
  *	Implement IP packet firewall
  */
 
 #ifdef DEBUG_IP_FIREWALL 
-#define dprintf1(a)		printk(a)
-#define dprintf2(a1,a2)		printk(a1,a2)
-#define dprintf3(a1,a2,a3)	printk(a1,a2,a3)
-#define dprintf4(a1,a2,a3,a4)	printk(a1,a2,a3,a4)
+#define dprintf(format, args...)  printk(format , ## args)
 #else
-#define dprintf1(a)	
-#define dprintf2(a1,a2)
-#define dprintf3(a1,a2,a3)
-#define dprintf4(a1,a2,a3,a4)
+#define dprintf(format, args...)
 #endif
 
-#define print_ip(a)	 printk("%ld.%ld.%ld.%ld",(ntohl(a)>>24)&0xFF,\
-					      (ntohl(a)>>16)&0xFF,\
-					      (ntohl(a)>>8)&0xFF,\
-					      (ntohl(a))&0xFF);
-
-#ifdef DEBUG_IP_FIREWALL
-#define dprint_ip(a)	print_ip(a)
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
 #else
-#define dprint_ip(a)	
+#define duprintf(format, args...)
 #endif
 
-#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL)
+/* Lock around ip_fw_chains linked list structure */
+spinlock_t ip_fw_lock = SPIN_LOCK_UNLOCKED;
 
-struct ip_fw *ip_fw_fwd_chain;
-struct ip_fw *ip_fw_in_chain;
-struct ip_fw *ip_fw_out_chain;
-struct ip_fw *ip_acct_chain;
-struct ip_fw *ip_masq_chain;
-
-static struct ip_fw **chains[] =
-	{&ip_fw_fwd_chain, &ip_fw_in_chain, &ip_fw_out_chain, &ip_acct_chain,
-	 &ip_masq_chain
-	};
-#endif /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */
- 
-#ifdef CONFIG_IP_FIREWALL
-int ip_fw_fwd_policy=IP_FW_F_ACCEPT;
-int ip_fw_in_policy=IP_FW_F_ACCEPT;
-int ip_fw_out_policy=IP_FW_F_ACCEPT;
+/* Head of linked list of fw rules */
+static struct ip_chain *ip_fw_chains; 
 
-static int *policies[] =
-	{&ip_fw_fwd_policy, &ip_fw_in_policy, &ip_fw_out_policy};
+#define IP_FW_INPUT_CHAIN ip_fw_chains
+#define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
+#define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
 
-#endif
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+extern inline int port_match(__u16 min, __u16 max, __u16 port,
+			     int frag, int invert)
+{
+	if (frag) /* Fragments fail ANY port test. */
+		return (min == 0 && max == 0xFFFF);
+	else return (port >= min && port <= max) ^ invert;
+}
 
-#ifdef CONFIG_IP_FIREWALL_NETLINK
-struct sock *ipfwsk;
-#endif
+/* Returns whether matches rule or not. */
+static int ip_rule_match(struct ip_fwkernel *f, 
+			 const char *ifname, 
+			 struct iphdr *ip, 
+			 char tcpsyn,
+			 __u16 src_port, __u16 dst_port,
+			 char isfrag)
+{
+#define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
+	/*
+	 *	This is a bit simpler as we don't have to walk
+	 *	an interface chain as you do in BSD - same logic
+	 *	however.
+	 */
 
-/*
- *	Returns 1 if the port is matched by the vector, 0 otherwise
- */
+	if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
+		  IP_FW_INV_SRCIP)
+	    || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
+		     IP_FW_INV_DSTIP)) {
+		dprintf("Source or dest mismatch.\n");
+
+		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+			f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
+		dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+			f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
+			f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
+		return 0;
+	}
 
-extern inline int port_match(unsigned short *portptr,int nports,unsigned short port,int range_flag)
-{
-	if (!nports)
-		return 1;
-	if ( range_flag ) 
-	{
-		if ( portptr[0] <= port && port <= portptr[1] ) 
-		{
-			return( 1 );
-		}
-		nports -= 2;
-		portptr += 2;
+	/*
+	 *	Look for a VIA device match 
+	 */
+	if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
+	    if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
+			      strlen(f->ipfw.fw_vianame)) != 0,
+		      IP_FW_INV_VIA)) {	
+		dprintf("Wildcard interface mismatch.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
+		return 0;	/* Mismatch */
+	    }
+	}
+	else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
+		       IP_FW_INV_VIA)) {
+	    dprintf("Interface name does not match.%s\n",
+		    f->ipfw.fw_invflg & IP_FW_INV_VIA
+		    ? " (INV)" : "");
+	    return 0;	/* Mismatch */
 	}
-	while ( nports-- > 0 ) 
-	{
-		if ( *portptr++ == port ) 
-		{
-			return( 1 );
-		}
+
+	/*
+	 *	Ok the chain addresses match.
+	 */
+	
+	/* If we have a fragment rule but the packet is not a fragment
+	 * the we return zero */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) { 
+		dprintf("Fragment rule but not fragment.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
+		return 0;
 	}
-	return(0);
-}
 
-#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL)
+	/* Fragment NEVER passes a SYN test, even an inverted one. */
+	if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
+	    || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
+		dprintf("Rule requires SYN and packet has no SYN.%s\n",
+			f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
+		return 0;
+	}
 
-#ifdef CONFIG_IP_FIREWALL_VERBOSE
+	if (f->ipfw.fw_proto) {
+		/*
+		 *	Specific firewall - packet's protocol
+		 *	must match firewall's.
+		 */
 
-/* 
- *	VERY ugly piece of code which actually makes kernel printf for
- * 	matching packets. 
- */
+		if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
+			dprintf("Packet protocol %hi does not match %hi.%s\n",
+				ip->protocol, f->ipfw.fw_proto,
+				f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
+			return 0;
+		}
+
+		/* For non TCP/UDP/ICMP, port range is max anyway. */
+		if (!port_match(f->ipfw.fw_spts[0], 
+				f->ipfw.fw_spts[1],
+				src_port, isfrag, 
+				!!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
+		    || !port_match(f->ipfw.fw_dpts[0], 
+				   f->ipfw.fw_dpts[1],
+				   dst_port, isfrag, 
+				   !!(f->ipfw.fw_invflg
+				      &IP_FW_INV_DSTPT))) {
+		    dprintf("Port match failed.\n");
+		    return 0;
+		}
+	}
 
-static char *chain_name(struct ip_fw *chain, int mode)
-{
-	switch (mode) { 
-	case IP_FW_MODE_ACCT_IN: return "acct in";
-	case IP_FW_MODE_ACCT_OUT: return "acct out";
+	dprintf("Match succeeded.\n");
+	return 1;
+}
+
+static const char *branchname(struct ip_chain *branch,int simplebranch)
+{
+	if (branch)
+		return branch->label;
+	switch (simplebranch)
+	{
+	case FW_BLOCK: return IP_FW_LABEL_BLOCK;
+	case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
+	case FW_REJECT: return IP_FW_LABEL_REJECT;
+	case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
+	case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
+	case FW_SKIP: return "-";
+	case FW_SKIP+1: return IP_FW_LABEL_RETURN;
 	default:
-		if (chain == ip_fw_fwd_chain) 
-			return "fw-fwd";
-		else if (chain == ip_fw_in_chain)
-			return "fw-in";
-		else
-			return "fw-out"; 
+		return "UNKNOWN";
 	}
 }
 
-static char *rule_name(struct ip_fw *f, int mode, char *buf)
+/*
+ * VERY ugly piece of code which actually
+ * makes kernel printf for matching packets...
+ */
+static void dump_packet(const struct iphdr *ip, 
+			const char *ifname,
+			struct ip_fwkernel *f, 
+			const ip_chainlabel chainlabel,
+			__u16 src_port, 
+			__u16 dst_port)
 {
-	if (mode == IP_FW_MODE_ACCT_IN || mode == IP_FW_MODE_ACCT_OUT)
-		return "";
+	__u32 *opt = (__u32 *) (ip + 1);
+	int opti;
+	
+	if (f)
+	{
+		printk(KERN_INFO "Packet log: %s ",chainlabel);
+		
+		printk("%s ",branchname(f->branch,f->simplebranch));
+		if (f->simplebranch==FW_REDIRECT)
+			printk("%d ",f->ipfw.fw_redirpt);
+	}
+
+	printk("%s PROTO=%d %ld.%ld.%ld.%ld:%hu %ld.%ld.%ld.%ld:%hu"
+	       " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
+	       ifname, ip->protocol,
+	       (ntohl(ip->saddr)>>24)&0xFF,
+	       (ntohl(ip->saddr)>>16)&0xFF,
+	       (ntohl(ip->saddr)>>8)&0xFF,
+	       (ntohl(ip->saddr))&0xFF,
+	       src_port,
+	       (ntohl(ip->daddr)>>24)&0xFF,
+	       (ntohl(ip->daddr)>>16)&0xFF,
+	       (ntohl(ip->daddr)>>8)&0xFF,
+	       (ntohl(ip->daddr))&0xFF,
+	       dst_port,
+	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
+	       ntohs(ip->frag_off), ip->ttl);
 
-	if(f->fw_flg&IP_FW_F_ACCEPT) {
-		if(f->fw_flg&IP_FW_F_REDIR) {
-			sprintf(buf, "acc/r%d ", f->fw_pts[f->fw_nsp+f->fw_ndp]);
-			return buf;
-		} else if(f->fw_flg&IP_FW_F_MASQ)
-			return "acc/masq ";
-		else
-			return "acc ";
-	} else if(f->fw_flg&IP_FW_F_ICMPRPL) {
-		return "rej ";
-	} else {
-		return "deny ";
-	}
+	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
+		printk(" O=0x%8.8X", *opt++);
+	printk("\n");
 }
 
-static void print_packet(struct iphdr *ip, 
-			 u16 src_port, u16 dst_port, u16 icmp_type,
-			 char *chain, char *rule, char *devname)
+/* function for checking chain labels for user space.  Makes sure that
+ * there are no special characters in the string */
+static int check_label(ip_chainlabel label)
 {
-	__u32 *opt = (__u32 *) (ip + 1);
-	int opti;
-	__u16 foff = ntohs(ip->frag_off); 
+	unsigned int i;
+	
+	for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1 && label[i]; i++)
+		if (label[i] <= ' ') 
+			return 0;
+	if (i == IP_FW_MAX_LABEL_LENGTH+1)
+		return 0;
+	return 1;
+}	
 
-	printk(KERN_INFO "IP %s %s%s", chain, rule, devname); 
+/*	This function returns a pointer to the first chain with a label
+ *	that matches the one given. */
+static struct ip_chain *find_label(ip_chainlabel label)
+{
+	struct ip_chain *tmp;
+	FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			break;
+	return tmp;
+}
 
-	switch(ip->protocol)
-	{
-	case IPPROTO_TCP:
-		printk(" TCP ");
-		break;
-	case IPPROTO_UDP:
-		printk(" UDP ");
-		break;
-	case IPPROTO_ICMP:
-		printk(" ICMP/%d ", icmp_type);
-		break;
-	default:
-		printk(" PROTO=%d ", ip->protocol);
-		break;
+/* This function returns a boolean which when true sets answer to one
+   of the FW_*. */
+static int find_special(ip_chainlabel label, int *answer)
+{
+	if (label[0] == '\0') {
+		*answer = FW_SKIP; /* => pass-through rule */
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
+		*answer = FW_ACCEPT;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
+		*answer = FW_BLOCK;
+		return 1;
+	} else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
+		*answer = FW_REJECT;
+		return 1;
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+	} else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
+		*answer = FW_REDIRECT;
+		return 1;
+#endif
+#ifdef CONFIG_IP_MASQUERADE
+	} else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
+		*answer = FW_MASQUERADE;
+		return 1;
+#endif
+	} else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
+		*answer = FW_SKIP+1;
+		return 1;
+	} else {
+		return 0;
 	}
-	print_ip(ip->saddr);
-	if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
-		printk(":%hu", src_port);
-	printk(" ");
-	print_ip(ip->daddr);
-	if(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP)
-		printk(":%hu", dst_port);
-	printk(" L=%hu S=0x%2.2hX I=%hu FO=0x%4.4hX T=%hu",
-	       ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
-	       foff & IP_OFFSET, ip->ttl); 
-	if (foff & IP_DF) printk(" DF=1");
-	if (foff & IP_MF) printk(" MF=1"); 
-	for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
-		printk(" O=0x%8.8X", *opt++);
-	printk("\n");	
 }
+
+/* This function cleans up the prevchain and prevrule.  If the verbose
+ * flag is set then he names of the chains will be printed as it
+ * cleans up.  */
+static void cleanup(struct ip_chain *chain, 
+		    const int verbose, 
+		    unsigned int slot)
+{ 
+	struct ip_chain *tmpchain = chain->reent[slot].prevchain;
+	if (verbose)
+		printk(KERN_ERR "Chain backtrace: ");
+	while (tmpchain) {
+		if (verbose)
+			printk("%s<-",chain->label);
+		chain->reent[slot].prevchain = NULL;
+		chain = tmpchain;
+		tmpchain = chain->reent[slot].prevchain;
+	}
+	if (verbose)
+		printk("%s\n",chain->label);
+}
+
+static inline void
+ip_fw_domatch(struct ip_fwkernel *f,
+	      const struct iphdr *ip, 
+	      const char *rif,
+	      const ip_chainlabel label,
+	      struct sk_buff *skb,
+	      unsigned int slot,
+	      __u16 src_port, __u16 dst_port)
+{
+	f->counters[slot].bcnt+=ntohs(ip->tot_len);
+	f->counters[slot].pcnt++;
+	if (f->ipfw.fw_flg & IP_FW_F_PRN) {
+		dump_packet(ip,rif,f,label,src_port,dst_port);
+	}
+/* This functionality is useless in stock 2.0.x series, but we don't
+ * discard the mark thing altogether, to avoid breaking ipchains (and,
+ * more importantly, the ipfwadm wrapper) --PR */
+	if (f->ipfw.fw_flg & IP_FW_F_MARKABS)
+		skb->fwmark = f->ipfw.fw_mark;
+	else
+		skb->fwmark+=f->ipfw.fw_mark;
+#ifdef CONFIG_IP_FIREWALL_NETLINK
+	if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
+		size_t len = min(f->ipfw.fw_outputsize, ntohs(ip->tot_len)) 
+			+ sizeof(skb->fwmark) + IFNAMSIZ;
+		struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
+
+		duprintf("Sending packet out NETLINK (length = %u).\n", 
+			 (unsigned int)len);
+		if (outskb) {
+			/* Prepend mark & interface */
+			skb_put(outskb, len);
+			*((__u32 *)outskb->data) = skb->fwmark;
+			strcpy(outskb->data+sizeof(__u32), rif);
+			memcpy(outskb->data+sizeof(__u32)+IFNAMSIZ, ip, 
+			       len-(sizeof(__u32)+IFNAMSIZ));
+			netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_KERNEL);
+		}
+		else duprintf("netlink post failed - alloc_skb failed!\n");
+	}
 #endif
+}
 
 /*
  *	Returns one of the generic firewall policies, like FW_ACCEPT.
- *	Also does accounting so you can feed it the accounting chain.
  *
- *	The modes is either IP_FW_MODE_FW (normal firewall mode),
- *	IP_FW_MODE_ACCT_IN or IP_FW_MODE_ACCT_OUT (accounting mode,
- *	steps through the entire chain and handles fragments
- *	differently), or IP_FW_MODE_CHK (handles user-level check,
- *	counters are not updated).
+ *	The testing is either false for normal firewall mode or true for
+ *	user checking mode (counters are not updated, TOS & mark not done).
  */
-
-
-int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_fw *chain, int policy, int mode)
+static int 
+ip_fw_check(struct iphdr *ip, 
+	    const char *rif,
+	    __u16 *redirport,
+	    struct ip_chain *chain,
+	    struct sk_buff *skb,
+	    unsigned int slot,
+	    int testing)
 {
-	struct ip_fw *f;
 	struct tcphdr		*tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
 	struct udphdr		*udp=(struct udphdr *)((__u32 *)ip+ip->ihl);
 	struct icmphdr		*icmp=(struct icmphdr *)((__u32 *)ip+ip->ihl);
 	__u32			src, dst;
-	__u16			src_port=0xFFFF, dst_port=0xFFFF, icmp_type=0xFF;
-	unsigned short		f_prt=0, prt;
-	char			notcpsyn=0, notcpack=0, match;
-	unsigned short		offset;
-	int			answer;
-	unsigned char		tosand, tosxor;
-
-	/*
-	 *	If the chain is empty follow policy. The BSD one
-	 *	accepts anything giving you a time window while
-	 *	flushing and rebuilding the tables.
-	 */
-	 
-	src = ip->saddr;
-	dst = ip->daddr;
-
-	/* 
-	 *	This way we handle fragmented packets.
-	 *	we ignore all fragments but the first one
-	 *	so the whole packet can't be reassembled.
-	 *	This way we relay on the full info which
-	 *	stored only in first packet.
-	 *
-	 *	Note that this theoretically allows partial packet
-	 *	spoofing. Not very dangerous but paranoid people may
-	 *	wish to play with this. It also allows the so called
-	 *	"fragment bomb" denial of service attack on some types
-	 *	of system.
-	 */
+	__u16			src_port = 0xFFFF, dst_port = 0xFFFF;
+	char			tcpsyn=0;
+	__u16			offset;
+	unsigned char		oldtos;
+	struct ip_fwkernel	*f;	
+	int			ret = FW_SKIP+2;
+
+	/* We handle fragments by dealing with the first fragment as
+	 * if it was a normal packet.  All other fragments are treated
+	 * normally, except that they will NEVER match rules that ask
+	 * things we don't know, ie. tcp syn flag or ports).  If the
+	 * rule is also a fragment-specific rule, non-fragments won't
+	 * match it. */
 
 	offset = ntohs(ip->frag_off) & IP_OFFSET;
 	
@@ -346,33 +565,52 @@
 	 *	checks.
 	 */
 	 
-	if (offset == 1 && ip->protocol == IPPROTO_TCP)
-		return FW_BLOCK;
-		
-	if (offset!=0 && !(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT)) &&
-		(ip->protocol == IPPROTO_TCP || ip->protocol == IPPROTO_UDP ||
-			ip->protocol == IPPROTO_ICMP))
-		return FW_ACCEPT;
-		
-	/*
-	 *	 Header fragment for TCP is too small to check the bits.
-	 */
-	 
-	if(ip->protocol==IPPROTO_TCP && (ip->ihl<<2)+16 > ntohs(ip->tot_len))
+	if (offset == 1 && ip->protocol == IPPROTO_TCP)	{
+		if (!testing && net_ratelimit()) {
+			printk("Suspect TCP fragment.\n");
+			dump_packet(ip,rif,NULL,NULL,0,0);
+		}
 		return FW_BLOCK;
-	
-	/*
-	 *	Too short.
-	 *
-	 *	But only too short for a packet with ports...
+	}
+
+	/* Check for too-small packets (not non-first fragments).
+	 * For each protocol, we assume that we can get the required
+	 * information, eg. port number or ICMP type.  If this fails,
+	 * reject it. 
+	 * 
+	 * Sizes might as well be rounded up to 8 here, since either
+	 * there are more fragments to come (which must be on 8-byte
+	 * boundaries), or this is a bogus packet anyway.
 	 */
-	 
-	else if((ntohs(ip->tot_len)<8+(ip->ihl<<2))&&(ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP))
-		return FW_BLOCK;
-		
+	if (offset == 0) {
+		unsigned int size_req;
+		switch (ip->protocol) {
+		case IPPROTO_TCP:
+			/* Don't care about things past flags word */
+			size_req = 16; 
+			break;
+
+		case IPPROTO_UDP:
+		case IPPROTO_ICMP:
+			size_req = 8;
+			break;
+
+		default:
+			size_req = 0;
+		}
+		if (ntohs(ip->tot_len) < (ip->ihl<<2)+size_req) {
+			if (!testing && net_ratelimit()) {
+				printk("Packet too short.\n");
+				dump_packet(ip,rif,NULL,NULL,0,0);
+			}
+			return FW_BLOCK;
+		}
+	}
+
 	src = ip->saddr;
 	dst = ip->daddr;
-
+	oldtos = ip->tos;
+	
 	/*
 	 *	If we got interface from which packet came
 	 *	we can use the address directly. This is unlike
@@ -381,898 +619,1041 @@
 	 *	devices instead.
 	 */
 	 
-	dprintf1("Packet ");
+	dprintf("Packet ");
 	switch(ip->protocol) 
 	{
 		case IPPROTO_TCP:
-			dprintf1("TCP ");
-			/* ports stay 0xFFFF if it is not the first fragment */
+			dprintf("TCP ");
 			if (!offset) {
 				src_port=ntohs(tcp->source);
 				dst_port=ntohs(tcp->dest);
-				if(!tcp->ack && !tcp->rst)
-					/* We do NOT have ACK, value TRUE */
-					notcpack=1;
-				if(!tcp->syn || !notcpack)
-					/* We do NOT have SYN, value TRUE */
-					notcpsyn=1;
+
+				/* Connection initilisation can only
+				 * be made when the syn bit is set and
+				 * neither of the ack or reset is
+				 * set. */
+				if(tcp->syn && !(tcp->ack || tcp->rst))
+					tcpsyn=1;
 			}
-			prt=IP_FW_F_TCP;
 			break;
 		case IPPROTO_UDP:
-			dprintf1("UDP ");
-			/* ports stay 0xFFFF if it is not the first fragment */
+			dprintf("UDP ");
 			if (!offset) {
 				src_port=ntohs(udp->source);
 				dst_port=ntohs(udp->dest);
 			}
-			prt=IP_FW_F_UDP;
 			break;
 		case IPPROTO_ICMP:
-			/* icmp_type stays 255 if it is not the first fragment */
-			if (!offset)
-				icmp_type=(__u16)(icmp->type);
-			dprintf2("ICMP:%d ",icmp_type);
-			prt=IP_FW_F_ICMP;
+			if (!offset) {
+				src_port=(__u16)icmp->type;
+				dst_port=(__u16)icmp->code;
+			}
+			dprintf("ICMP ");
 			break;
 		default:
-			dprintf2("p=%d ",ip->protocol);
-			prt=IP_FW_F_ALL;
+			dprintf("p=%d ",ip->protocol);
 			break;
 	}
 #ifdef DEBUG_IP_FIREWALL
-	dprint_ip(ip->saddr);
+	print_ip(ip->saddr);
 	
-	if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP)
-		/* This will print 65535 when it is not the first fragment! */
-		dprintf2(":%d ", src_port);
-	dprint_ip(ip->daddr);
-	if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP)
-		/* This will print 65535 when it is not the first fragment! */
-		dprintf2(":%d ",dst_port);
-	dprintf1("\n");
-#endif	
-
-	for (f=chain;f;f=f->fw_next) 
-	{
-		/*
-		 *	This is a bit simpler as we don't have to walk
-		 *	an interface chain as you do in BSD - same logic
-		 *	however.
-		 */
-
-		/*
-		 *	Match can become 0x01 (a "normal" match was found),
-		 *	0x02 (a reverse match was found), and 0x03 (the
-		 *	IP addresses match in both directions).
-		 *	Now we know in which direction(s) we should look
-		 *	for a match for the TCP/UDP ports.  Both directions
-		 *	might match (e.g., when both addresses are on the
-		 *	same network for which an address/mask is given), but
-		 *	the ports might only match in one direction.
-		 *	This was obviously wrong in the original BSD code.
-		 */
-		match = 0x00;
-
-		if ((src&f->fw_smsk.s_addr)==f->fw_src.s_addr
-		&&  (dst&f->fw_dmsk.s_addr)==f->fw_dst.s_addr)
-			/* normal direction */
-			match |= 0x01;
-
-		if ((f->fw_flg & IP_FW_F_BIDIR) &&
-		    (dst&f->fw_smsk.s_addr)==f->fw_src.s_addr
-		&&  (src&f->fw_dmsk.s_addr)==f->fw_dst.s_addr)
-			/* reverse direction */
-			match |= 0x02;
-
-		if (!match)
-			continue;
-
-		/*
-		 *	Look for a VIA device match 
-		 */
-		if(f->fw_viadev)
-		{
-			if(rif!=f->fw_viadev)
-				continue;	/* Mismatch */
-		}
-
-		/* This looks stupid, because we scan almost static
-		   list, searching for static key. However, this way seems
-		   to be only reasonable way of handling fw_via rules
-		   (btw bsd makes the same thing).
-
-		   It will not affect performance if you will follow
-		   the following simple rules:
-
-		   - if inteface is aliased, ALWAYS specify fw_viadev,
-		     so that previous check will guarantee, that we will
-		     not waste time when packet arrive on another interface.
-
-		   - avoid using fw_via.s_addr if fw_via.s_addr is owned
-		     by an aliased interface.
-
-		                                                       --ANK
-		 */
-		if (f->fw_via.s_addr && rif) {
-			struct in_ifaddr *ifa;
-
-			if (rif->ip_ptr == NULL)
-				continue;	/* Mismatch */
-
-			for (ifa = ((struct in_device*)(rif->ip_ptr))->ifa_list;
-			     ifa; ifa = ifa->ifa_next) {
-				if (ifa->ifa_local == f->fw_via.s_addr)
-					goto ifa_ok;
+	if (offset) 
+		dprintf(":fragment (%i) ", ((int)offset)<<2);
+	else if (ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP 
+		 || ip->protocol==IPPROTO_ICMP)
+		dprintf(":%hu:%hu", src_port, dst_port);
+	dprintf("\n");
+#endif
+
+	if (!testing) FWC_READ_LOCK(&ip_fw_lock);
+	else FWC_HAVE_LOCK(fwc_rlocks);
+
+	f = chain->chain;
+	do {
+		for (; f; f = f->next) {
+			if (ip_rule_match(f,rif,ip,
+					  tcpsyn,src_port,dst_port,offset)) {
+				if (!testing)
+					ip_fw_domatch(f, ip, rif, chain->label, skb,
+						      slot, src_port,dst_port);
+				break;
 			}
-			continue;	/* Mismatch */
-
-		ifa_ok:
 		}
-
-		/*
-		 *	Ok the chain addresses match.
-		 */
-
-#ifdef CONFIG_IP_ACCT
-		/*
-		 *	See if we're in accounting mode and only want to
-		 *	count incoming or outgoing packets.
-		 */
-
-		if (mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT) &&
-		   ((mode == IP_FW_MODE_ACCT_IN && f->fw_flg&IP_FW_F_ACCTOUT) ||
-		    (mode == IP_FW_MODE_ACCT_OUT && f->fw_flg&IP_FW_F_ACCTIN)))
-			continue;
-
-#endif
-		/*
-		 * For all non-TCP packets and/or non-first fragments,
-		 * notcpsyn and notcpack will always be FALSE,
-		 * so the IP_FW_F_TCPSYN and IP_FW_F_TCPACK flags
-		 * are actually ignored for these packets.
-		 */
-		 
-		if((f->fw_flg&IP_FW_F_TCPSYN) && notcpsyn)
-		 	continue;
-
-		if((f->fw_flg&IP_FW_F_TCPACK) && notcpack)
-		 	continue;
-
-		f_prt=f->fw_flg&IP_FW_F_KIND;
-		if (f_prt!=IP_FW_F_ALL) 
-		{
-			/*
-			 *	Specific firewall - packet's protocol
-			 *	must match firewall's.
-			 */
-
-			if(prt!=f_prt)
-				continue;
-				
-			if((prt==IP_FW_F_ICMP &&
-				! port_match(&f->fw_pts[0], f->fw_nsp,
-					icmp_type,f->fw_flg&IP_FW_F_SRNG)) ||
-			    !(prt==IP_FW_F_ICMP || ((match & 0x01) &&
-				port_match(&f->fw_pts[0], f->fw_nsp, src_port,
-					f->fw_flg&IP_FW_F_SRNG) &&
-				port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, dst_port,
-					f->fw_flg&IP_FW_F_DRNG)) || ((match & 0x02) &&
-				port_match(&f->fw_pts[0], f->fw_nsp, dst_port,
-					f->fw_flg&IP_FW_F_SRNG) &&
-				port_match(&f->fw_pts[f->fw_nsp], f->fw_ndp, src_port,
-					f->fw_flg&IP_FW_F_DRNG))))
-			{
-				continue;
+		if (f) {
+			if (f->branch) {
+				/* Do sanity check to see if we have
+                                 * already set prevchain and if so we
+                                 * must be in a loop */
+				if (f->branch->reent[slot].prevchain) {
+					if (!testing) {
+						printk(KERN_ERR 
+						       "IP firewall: "
+						       "Loop detected "
+						       "at `%s'.\n",
+						       f->branch->label);
+						cleanup(chain, 1, slot);
+						ret = FW_BLOCK;
+					} else {
+						cleanup(chain, 0, slot);
+						ret = FW_SKIP+1;
+					}
+				}
+				else {
+					f->branch->reent[slot].prevchain 
+						= chain;
+					f->branch->reent[slot].prevrule 
+						= f->next;
+					chain = f->branch;
+					f = chain->chain;
+				}
+			}
+			else if (f->simplebranch == FW_SKIP) 
+				f = f->next;
+			else if (f->simplebranch == FW_SKIP+1) {
+				/* Just like falling off the chain */
+				goto fall_off_chain;
+			}
+			else {	
+				cleanup(chain, 0, slot);
+				ret = f->simplebranch;
+			}
+		} /* f == NULL */
+		else {
+		fall_off_chain:
+			if (chain->reent[slot].prevchain) {
+				struct ip_chain *tmp = chain;
+				f = chain->reent[slot].prevrule;
+				chain = chain->reent[slot].prevchain;
+				tmp->reent[slot].prevchain = NULL;
+			}
+			else {
+				ret = chain->policy;
+				if (!testing) {
+					chain->reent[slot].counters.pcnt++;
+					chain->reent[slot].counters.bcnt
+						+= ntohs(ip->tot_len);
+				}
 			}
 		}
+	} while (ret == FW_SKIP+2);
 
-#ifdef CONFIG_IP_FIREWALL_VERBOSE
-		if (f->fw_flg & IP_FW_F_PRN)
-		{
-			char buf[16]; 
-
-			print_packet(ip, src_port, dst_port, icmp_type,
-				     chain_name(chain, mode), 
-				     rule_name(f, mode, buf), 
-				     rif ? rif->name : "-");
-		}
-#endif		
-		if (mode != IP_FW_MODE_CHK) {
-			f->fw_bcnt+=ntohs(ip->tot_len);
-			f->fw_pcnt++;
-		}
-		if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT)))
-			break;
-	} /* Loop */
-	
-	if (!(mode & (IP_FW_MODE_ACCT_IN|IP_FW_MODE_ACCT_OUT))) {
-
-		/*
-		 * We rely on policy defined in the rejecting entry or, if no match
-		 * was found, we rely on the general policy variable for this type
-		 * of firewall.
-		 */
+	if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
 
-		if (f!=NULL) {
-			policy=f->fw_flg;
-			tosand=f->fw_tosand;
-			tosxor=f->fw_tosxor;
-		} else {
-			tosand=0xFF;
-			tosxor=0x00;
-		}
+	/* Recalculate checksum if not going to reject, and TOS changed. */
+	if (ip->tos != oldtos 
+	    && ret != FW_REJECT && ret != FW_BLOCK 
+	    && !testing)
+		ip_send_check(ip);
 
-		if (policy&IP_FW_F_ACCEPT) {
-			/* Adjust priority and recompute checksum */
-			__u8 old_tos = ip->tos;
-			ip->tos = (old_tos & tosand) ^ tosxor;
-			if (ip->tos != old_tos)
-		 		ip_send_check(ip);
 #ifdef CONFIG_IP_TRANSPARENT_PROXY
-			if (policy&IP_FW_F_REDIR) {
-				if (redirport)
-					if ((*redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp])) == 0) {
-						/* Wildcard redirection.
-						 * Note that redirport will become
-						 * 0xFFFF for non-TCP/UDP packets.
-						 */
-						*redirport = htons(dst_port);
-					}
-				answer = FW_REDIRECT;
-			} else
-#endif
-#ifdef CONFIG_IP_MASQUERADE
-			if (policy&IP_FW_F_MASQ)
-				answer = FW_MASQUERADE;
-			else
+	if (ret == FW_REDIRECT && redirport) {
+		if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
+			/* Wildcard redirection.
+			 * Note that redirport will become
+			 * 0xFFFF for non-TCP/UDP packets.
+			 */
+			*redirport = htons(dst_port);
+		}
+	}
 #endif
-				answer = FW_ACCEPT;
-			
-		} else if(policy&IP_FW_F_ICMPRPL)
-			answer = FW_REJECT;
-		else
-			answer = FW_BLOCK;
 
-#ifdef CONFIG_IP_FIREWALL_NETLINK
-		if((policy&IP_FW_F_PRN) && (answer == FW_REJECT || answer == FW_BLOCK))
-		{
-			struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC);
-			if(skb)
-			{
-				int len=min(128,ntohs(ip->tot_len));
-				skb_put(skb,len);
-				memcpy(skb->data,ip,len);
-				if(netlink_post(NETLINK_FIREWALL, skb))
-					kfree_skb(skb);
-			}
-		}
-#endif		
-		return answer;
-	} else
-		/* we're doing accounting, always ok */
-		return 0;
+#ifdef DEBUG_ALLOW_ALL
+	return (testing ? ret : FW_ACCEPT);
+#else
+	return ret;
+#endif
 }
 
+/* Must have write lock & interrupts off for any of these */
 
-static void zero_fw_chain(struct ip_fw *chainptr)
+/* This function sets all the byte counters in a chain to zero.  The
+ * input is a pointer to the chain required for zeroing */
+static int zero_fw_chain(struct ip_chain *chainptr)
 {
-	struct ip_fw *ctmp=chainptr;
-	while(ctmp) 
-	{
-		ctmp->fw_pcnt=0L;
-		ctmp->fw_bcnt=0L;
-		ctmp=ctmp->fw_next;
-	}
-}
+	struct ip_fwkernel *i;
 
-static void free_fw_chain(struct ip_fw *volatile* chainptr)
-{
-	unsigned long flags;
-	save_flags(flags);
-	cli();
-	while ( *chainptr != NULL ) 
-	{
-		struct ip_fw *ftmp;
-		ftmp = *chainptr;
-		*chainptr = ftmp->fw_next;
-		kfree_s(ftmp,sizeof(*ftmp));
-	}
-	restore_flags(flags);
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = chainptr->chain; i; i = i->next)
+		memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+	return 0;
 }
 
-/* Volatiles to keep some of the compiler versions amused */
-
-static int insert_in_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len)
+static int clear_fw_chain(struct ip_chain *chainptr)
 {
-	struct ip_fw *ftmp;
-	unsigned long flags;
+	struct ip_fwkernel *i= chainptr->chain;
 
-	save_flags(flags);
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->chain=NULL;
 
-	ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC );
-	if ( ftmp == NULL ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl:  malloc said no\n");
-#endif
-		return( ENOMEM );
+	while (i) {
+		struct ip_fwkernel *tmp = i->next;
+		if (i->branch)
+			i->branch->refcount--;
+		kfree(i);
+		i = tmp;
 	}
+	return 0;
+}
 
-	memcpy(ftmp, frwl, len);
-	/*
-	 *	Allow the more recent "minimise cost" flag to be
-	 *	set. [Rob van Nieuwkerk]
-	 */
-	ftmp->fw_tosand |= 0x01;
-	ftmp->fw_tosxor &= 0xFE;
-	ftmp->fw_pcnt=0L;
-	ftmp->fw_bcnt=0L;
-
-	cli();
-
-	if ((ftmp->fw_vianame)[0]) {
-		if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame)))
-			ftmp->fw_viadev = (struct device *) -1;
-	} else
-		ftmp->fw_viadev = NULL;
-
-	ftmp->fw_next = *chainptr;
-       	*chainptr=ftmp;
-	restore_flags(flags);
-	return(0);
-}
-
-static int append_to_chain(struct ip_fw *volatile* chainptr, struct ip_fw *frwl,int len)
-{
-	struct ip_fw *ftmp;
-	struct ip_fw *chtmp=NULL;
-	struct ip_fw *volatile chtmp_prev=NULL;
-	unsigned long flags;
-
-	save_flags(flags);
+static int replace_in_chain(struct ip_chain *chainptr, 
+			    struct ip_fwkernel *frwl,
+			    __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
 
-	ftmp = kmalloc( sizeof(struct ip_fw), GFP_ATOMIC );
-	if ( ftmp == NULL ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl:  malloc said no\n");
-#endif
-		return( ENOMEM );
-	}
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+		
+	if (f->branch) f->branch->refcount--;
+	if (frwl->branch) frwl->branch->refcount++;
 
-	memcpy(ftmp, frwl, len);
-	/*
-	 *	Allow the more recent "minimise cost" flag to be
-	 *	set. [Rob van Nieuwkerk]
-	 */
-	ftmp->fw_tosand |= 0x01;
-	ftmp->fw_tosxor &= 0xFE;
-	ftmp->fw_pcnt=0L;
-	ftmp->fw_bcnt=0L;
-
-	ftmp->fw_next = NULL;
-
-	cli();
-
-	if ((ftmp->fw_vianame)[0]) {
-		if (!(ftmp->fw_viadev = dev_get(ftmp->fw_vianame)))
-			ftmp->fw_viadev = (struct device *) -1;
-	} else
-		ftmp->fw_viadev = NULL;
-
-	chtmp_prev=NULL;
-	for (chtmp=*chainptr;chtmp!=NULL;chtmp=chtmp->fw_next) 
-		chtmp_prev=chtmp;
-	
-	if (chtmp_prev)
-		chtmp_prev->fw_next=ftmp;
-	else
-        	*chainptr=ftmp;
-	restore_flags(flags);
-	return(0);
+	frwl->next = f->next;
+	memcpy(f,frwl,sizeof(struct ip_fwkernel));
+	kfree(frwl);
+	return 0;
 }
 
-static int del_from_chain(struct ip_fw *volatile*chainptr, struct ip_fw *frwl)
+static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
 {
-	struct ip_fw 	*ftmp,*ltmp;
-	unsigned short	tport1,tport2,tmpnum;
-	char		matches,was_found;
-	unsigned long 	flags;
+	struct ip_fwkernel *i;
 
-	save_flags(flags);
-	cli();
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Special case if no rules already present */
+	if (chainptr->chain == NULL) {
 
-	ftmp=*chainptr;
-
-	if ( ftmp == NULL ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl:  chain is empty\n");
-#endif
-		restore_flags(flags);
-		return( EINVAL );
+		/* If pointer writes are atomic then turning off
+		 * interupts is not necessary. */
+		chainptr->chain = rule;
+		if (rule->branch) rule->branch->refcount++;
+		return 0;
 	}
 
-	ltmp=NULL;
-	was_found=0;
+	/* Find the rule before the end of the chain */
+	for (i = chainptr->chain; i->next; i = i->next); 
+	i->next = rule;
+	if (rule->branch) rule->branch->refcount++;
+	return 0;
+}
 
-	while( !was_found && ftmp != NULL )
-	{
-		matches=1;
-		if (ftmp->fw_src.s_addr!=frwl->fw_src.s_addr 
-		     ||  ftmp->fw_dst.s_addr!=frwl->fw_dst.s_addr
-		     ||  ftmp->fw_smsk.s_addr!=frwl->fw_smsk.s_addr
-		     ||  ftmp->fw_dmsk.s_addr!=frwl->fw_dmsk.s_addr
-		     ||  ftmp->fw_via.s_addr!=frwl->fw_via.s_addr
-		     ||  ftmp->fw_flg!=frwl->fw_flg)
-        		matches=0;
-
-		tport1=ftmp->fw_nsp+ftmp->fw_ndp;
-		tport2=frwl->fw_nsp+frwl->fw_ndp;
-		if (tport1!=tport2)
-		        matches=0;
-		else if (tport1!=0)
-		{
-			for (tmpnum=0;tmpnum < tport1 && tmpnum < IP_FW_MAX_PORTS;tmpnum++)
-        		if (ftmp->fw_pts[tmpnum]!=frwl->fw_pts[tmpnum])
-				matches=0;
-		}
-		if (strncmp(ftmp->fw_vianame, frwl->fw_vianame, IFNAMSIZ))
-		        matches=0;
-		if(matches)
-		{
-			was_found=1;
-			if (ltmp)
-			{
-				ltmp->fw_next=ftmp->fw_next;
-				kfree_s(ftmp,sizeof(*ftmp));
-				ftmp=ltmp->fw_next;
-        		}
-      			else
-      			{
-      				*chainptr=ftmp->fw_next; 
-	 			kfree_s(ftmp,sizeof(*ftmp));
-				ftmp=*chainptr;
-			}       
-		}
-		else
-		{
-			ltmp = ftmp;
-			ftmp = ftmp->fw_next;
-		 }
-	}
-	restore_flags(flags);
-	if (was_found)
+/* This function inserts a rule at the position of position in the
+ * chain refenced by chainptr.  If position is 1 then this rule will
+ * become the new rule one. */
+static int insert_in_chain(struct ip_chain *chainptr, 
+			   struct ip_fwkernel *frwl,
+			   __u32 position)
+{
+	struct ip_fwkernel *f = chainptr->chain;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* special case if the position is number 1 */
+	if (position == 1) {
+		frwl->next = chainptr->chain;
+		if (frwl->branch) frwl->branch->refcount++; 
+		chainptr->chain = frwl;
 		return 0;
-	else
-		return(EINVAL);
+	}
+	position--;
+	while (--position && f != NULL) f = f->next;
+	if (f == NULL)
+		return EINVAL;
+	if (frwl->branch) frwl->branch->refcount++;
+	frwl->next = f->next;
+	
+	f->next = frwl;
+	return 0;
 }
 
-#endif  /* CONFIG_IP_ACCT || CONFIG_IP_FIREWALL */
+/* This function deletes the a rule from a given rulenum and chain.
+ * With rulenum = 1 is the first rule is deleted. */
 
-struct ip_fw *check_ipfw_struct(struct ip_fw *frwl, int len)
+static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
 {
+	struct ip_fwkernel *i=chainptr->chain,*tmp;
+	
+	FWC_HAVE_LOCK(fwc_wlocks);
 
-	if ( len != sizeof(struct ip_fw) )
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: len=%d, want %d\n",len, sizeof(struct ip_fw));
-#endif
-		return(NULL);
-	}
+	if (!chainptr->chain)
+		return ENOENT;
 
-	if ( (frwl->fw_flg & ~IP_FW_F_MASK) != 0 )
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: undefined flag bits set (flags=%x)\n",
-			frwl->fw_flg);
-#endif
-		return(NULL);
-	}
+	/* Need a special case for the first rule */
+	if (rulenum == 1) {
+		/* store temp to allow for freeing up of memory */
+		tmp = chainptr->chain;
+	        if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
+		chainptr->chain = chainptr->chain->next;
+		kfree(tmp); /* free memory that is now unused */
+	} else {  
+		rulenum--;
+		while (--rulenum && i->next ) i = i->next;
+		if (!i->next)
+			return ENOENT;
+		tmp = i->next;
+		if (i->next->branch)
+			i->next->branch->refcount--;
+		i->next = i->next->next;
+		kfree(tmp);
+	}
+	return 0;
+}
 
-#ifndef CONFIG_IP_TRANSPARENT_PROXY
-	if (frwl->fw_flg & IP_FW_F_REDIR) {
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: unsupported flag IP_FW_F_REDIR\n");
-#endif
-		return(NULL);
-	}
-#endif
 
-#ifndef CONFIG_IP_MASQUERADE
-	if (frwl->fw_flg & IP_FW_F_MASQ) {
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: unsupported flag IP_FW_F_MASQ\n");
-#endif
-		return(NULL);
-	}
-#endif
+/* This function deletes the a rule from a given rule and chain.
+ * The rule that is deleted is the first occursance of that rule. */
+static int del_rule_from_chain(struct ip_chain *chainptr, 
+			       struct ip_fwkernel *frwl)
+{
+	struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
+	int was_found;
 
-	if ( (frwl->fw_flg & IP_FW_F_SRNG) && frwl->fw_nsp < 2 ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: src range set but fw_nsp=%d\n",
-			frwl->fw_nsp);
+	FWC_HAVE_LOCK(fwc_wlocks);
+	
+	/* Sure, we should compare marks, but since the `ipfwadm'
+	 * script uses it for an unholy hack... well, life is easier
+	 * this way.  We also mask it out of the flags word. --PR */
+	for (ltmp=NULL, was_found=0; 
+	     !was_found && ftmp != NULL;
+	     ltmp = ftmp,ftmp = ftmp->next) {	
+		if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr 
+		    || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
+		    || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
+		    || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
+#if 0
+		    || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
+#else
+		    || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS) 
+			!= (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
 #endif
-		return(NULL);
-	}
+		    || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
+		    || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
+#if 0
+		    || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
+#endif
+		    || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
+		    || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
+		    || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
+		    || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
+		    || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
+		    || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
+			duprintf("del_rule_from_chain: mismatch:"
+				 "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
+				 "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
+				 "mark:%u/%u "
+				 "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
+				 "outputsize:%hu-%hu\n",
+				 ftmp->ipfw.fw_src.s_addr,
+				 frwl->ipfw.fw_src.s_addr,
+				 ftmp->ipfw.fw_dst.s_addr,
+				 frwl->ipfw.fw_dst.s_addr,
+				 ftmp->ipfw.fw_smsk.s_addr,
+				 frwl->ipfw.fw_smsk.s_addr,
+				 ftmp->ipfw.fw_dmsk.s_addr,
+				 frwl->ipfw.fw_dmsk.s_addr,
+				 ftmp->ipfw.fw_flg,
+				 frwl->ipfw.fw_flg,
+				 ftmp->ipfw.fw_invflg,
+				 frwl->ipfw.fw_invflg,
+				 ftmp->ipfw.fw_proto,
+				 frwl->ipfw.fw_proto,
+				 ftmp->ipfw.fw_mark,
+				 frwl->ipfw.fw_mark,
+				 ftmp->ipfw.fw_spts[0],
+				 frwl->ipfw.fw_spts[0],
+				 ftmp->ipfw.fw_spts[1],
+				 frwl->ipfw.fw_spts[1],
+				 ftmp->ipfw.fw_dpts[0],
+				 frwl->ipfw.fw_dpts[0],
+				 ftmp->ipfw.fw_dpts[1],
+				 frwl->ipfw.fw_dpts[1],
+				 ftmp->ipfw.fw_outputsize,
+				 frwl->ipfw.fw_outputsize);
+			continue;
+		}
 
-	if ( (frwl->fw_flg & IP_FW_F_DRNG) && frwl->fw_ndp < 2 ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: dst range set but fw_ndp=%d\n",
-			frwl->fw_ndp);
-#endif
-		return(NULL);
+		if (strncmp(ftmp->ipfw.fw_vianame, 
+			    frwl->ipfw.fw_vianame, 
+			    IFNAMSIZ)) {
+			duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
+				 ftmp->ipfw.fw_vianame, 
+				 frwl->ipfw.fw_vianame);
+		        continue;
+		}
+		if (ftmp->branch != frwl->branch) {
+			duprintf("del_rule_from_chain: branch mismatch: "
+				 "%s/%s\n", 
+				 ftmp->branch?ftmp->branch->label:"(null)",
+				 frwl->branch?frwl->branch->label:"(null)");
+			continue;
+		}
+		if (ftmp->branch == NULL 
+		    && ftmp->simplebranch != frwl->simplebranch) {
+			duprintf("del_rule_from_chain: simplebranch mismatch: "
+				 "%i/%i\n", 
+				 ftmp->simplebranch, frwl->simplebranch);
+			continue;
+		}
+		was_found = 1;
+		if (ftmp->branch)
+			ftmp->branch->refcount--;
+		if (ltmp)
+			ltmp->next = ftmp->next;
+		else
+			chainptr->chain = ftmp->next; 
+		kfree(ftmp);
+		break;
 	}
-
-	if ( frwl->fw_nsp + frwl->fw_ndp > (frwl->fw_flg & IP_FW_F_REDIR ? IP_FW_MAX_PORTS - 1 : IP_FW_MAX_PORTS) ) 
-	{
-#ifdef DEBUG_IP_FIREWALL
-		printk("ip_fw_ctl: too many ports (%d+%d)\n",
-			frwl->fw_nsp,frwl->fw_ndp);
-#endif
-		return(NULL);
+	
+	if (was_found)
+		return 0;
+	else {
+		duprintf("del_rule_from_chain: no matching rule found\n");
+		return EINVAL;
 	}
-
-	return frwl;
 }
 
+/* This function takes the label of a chain and deletes the first
+ * chain with that name.  No special cases required for the built in
+ * chains as they have their refcount initilised to 1 so that they are
+ * never deleted.  */
+static int del_chain(ip_chainlabel label) 
+{
+	struct ip_chain *tmp,*tmp2;
 
+	FWC_HAVE_LOCK(fwc_wlocks);
+	/* Corner case: return EBUSY not ENOENT for first elem ("input") */
+	if (strcmp(label, ip_fw_chains->label) == 0) 
+		return EBUSY;
 
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if(strcmp(tmp->next->label,label) == 0)
+			break;
 
-#ifdef CONFIG_IP_ACCT
+	tmp2 = tmp->next;
+	if (!tmp2)
+		return ENOENT;
 
-int ip_acct_ctl(int stage, void *m, int len)
-{
-	if ( stage == IP_ACCT_FLUSH )
-	{
-		free_fw_chain(&ip_acct_chain);
-		return(0);
-	}  
-	if ( stage == IP_ACCT_ZERO )
-	{
-		zero_fw_chain(ip_acct_chain);
-		return(0);
-	}
-	if ( stage == IP_ACCT_INSERT || stage == IP_ACCT_APPEND ||
-	  				stage == IP_ACCT_DELETE )
-	{
-		struct ip_fw *frwl;
+	if (tmp2->refcount)
+		return EBUSY;
 
-		if (!(frwl=check_ipfw_struct(m,len)))
-			return (EINVAL);
+	if (tmp2->chain)
+		return ENOTEMPTY;
+	
+	tmp->next = tmp2->next;
+	kfree(tmp2);
+	return 0;
+}
+
+/* This is a function to initilise a chain.  Built in rules start with
+ * refcount = 1 so that they cannot be deleted.  User defined rules
+ * start with refcount = 0 so they can be deleted. */
+static struct ip_chain *ip_init_chain(ip_chainlabel name, 
+				      __u32 ref, 
+				      int policy)
+{
+	unsigned int i;
+	struct ip_chain *label 
+		= kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
+	if (label == NULL)
+		panic("Can't kmalloc for firewall chains.\n");
+	strcpy(label->label,name);
+	label->next = NULL;
+	label->chain = NULL;
+	label->refcount = ref;
+	label->policy = policy;
+	for (i = 0; i < smp_num_cpus*2; i++) {
+		label->reent[i].counters.pcnt = label->reent[i].counters.bcnt 
+			= 0;
+		label->reent[i].prevchain = NULL;
+		label->reent[i].prevrule = NULL;
+	}
+
+	return label;
+}
+
+/* This is a function for reating a new chain.  The chains is not
+ * created if a chain of the same name already exists */
+static int create_chain(ip_chainlabel label) 
+{
+	struct ip_chain *tmp;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
+		if (strcmp(tmp->label,label) == 0)
+			return EEXIST;
+	
+	if (strcmp(tmp->label,label) == 0)
+		return EEXIST;
+	
+	tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
+					      * zero since this is a
+					      * user defined chain *
+					      * and therefore can be
+					      * deleted */
+	return 0;
+}
+
+/* This function simply changes the policy on one of the built in
+ * chains.  checking must be done before this is call to ensure that
+ * chainptr is pointing to one of the three possible chains */
+static int change_policy(struct ip_chain *chainptr, int policy)
+{ 
+	FWC_HAVE_LOCK(fwc_wlocks);
+	chainptr->policy = policy;
+	return 0;
+}
+
+/* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
+ * performs some checks in the structure. */
+static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
+{
+	struct ip_fwkernel *fwkern;
+
+	if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
+		duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
+			 fwuser->ipfw.fw_flg);
+		*errno = EINVAL;
+		return NULL;
+	}
+
+#if DEBUG_IP_FIREWALL_USER
+	/* These are sanity checks that don't really matter.
+	 * We can get rid of these once testing is complete. 
+	 */
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
+		duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
+	    && fwuser->ipfw.fw_redirpt != 0) {
+		duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG) 
+	     && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
+	    || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN) 
+		&& (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
+		duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT) 
+	     && fwuser->ipfw.fw_spts[0] == 0 
+	     && fwuser->ipfw.fw_spts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT) 
+		&& fwuser->ipfw.fw_dpts[0] == 0 
+		&& fwuser->ipfw.fw_dpts[1] == 0xFFFF)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA) 
+		&& (fwuser->ipfw.fw_vianame)[0] == '\0')
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
+		&& fwuser->ipfw.fw_smsk.s_addr == 0)
+	    || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
+		&& fwuser->ipfw.fw_dmsk.s_addr == 0)) {
+		duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
+	    && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
+	    && (fwuser->ipfw.fw_spts[0] != 0
+		|| fwuser->ipfw.fw_spts[1] != 0xFFFF
+		|| fwuser->ipfw.fw_dpts[0] != 0
+		|| fwuser->ipfw.fw_dpts[1] != 0xFFFF
+		|| (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
+		duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+#endif
+
+	if ((fwuser->ipfw.fw_spts[0] != 0
+	     || fwuser->ipfw.fw_spts[1] != 0xFFFF
+	     || fwuser->ipfw.fw_dpts[0] != 0
+	     || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
+	    && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
+		|| (fwuser->ipfw.fw_proto != IPPROTO_TCP
+		    && fwuser->ipfw.fw_proto != IPPROTO_UDP
+		    && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
+		duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
+		*errno = EINVAL;
+		return NULL;
+	}
+
+	fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_KERNEL);
+	if (!fwkern) {
+		duprintf("convert_ipfw: kmalloc failed!\n");
+		*errno = ENOMEM;
+		return NULL;
+	}
+	memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
+
+	if (!find_special(fwuser->label, &fwkern->simplebranch)) {
+		fwkern->branch = find_label(fwuser->label);
+		if (!fwkern->branch) { 
+			duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL;
+		} else if (fwkern->branch == IP_FW_INPUT_CHAIN 
+			   || fwkern->branch == IP_FW_FORWARD_CHAIN
+			   || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
+			duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
+				 fwuser->label);
+			kfree(fwkern);
+			*errno = ENOENT;
+			return NULL; 
+		}
+	} else 
+		fwkern->branch = NULL;
+	memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
+
+	/* Handle empty vianame by making it a wildcard */
+	if ((fwkern->ipfw.fw_vianame)[0] == '\0')
+	    fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
 
-		switch (stage) 
-		{
-			case IP_ACCT_INSERT:
-				return( insert_in_chain(&ip_acct_chain,frwl,len));
-			case IP_ACCT_APPEND:
-				return( append_to_chain(&ip_acct_chain,frwl,len));
-		    	case IP_ACCT_DELETE:
-				return( del_from_chain(&ip_acct_chain,frwl));
-			default:
-				/*
- 				 *	Should be panic but... (Why ??? - AC)
-				 */
-#ifdef DEBUG_IP_FIREWALL
-				printk("ip_acct_ctl:  unknown request %d\n",stage);
-#endif
-				return(EINVAL);
-		}
-	}
-#ifdef DEBUG_IP_FIREWALL
-	printk("ip_acct_ctl:  unknown request %d\n",stage);
-#endif
-	return(EINVAL);
+	fwkern->next = NULL;
+	return fwkern;
 }
-#endif
 
-#ifdef CONFIG_IP_FIREWALL
-int ip_fw_ctl(int stage, void *m, int len)
+int ip_fw_ctl(int cmd, void *m, int len)
 {
-	int cmd, fwtype;
-
-	cmd = stage & IP_FW_COMMAND;
-	fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT;
+	int ret;
+	struct ip_chain *chain;
+	unsigned long flags;
 
-	if ( cmd == IP_FW_FLUSH )
-	{
-		free_fw_chain(chains[fwtype]);
-		return(0);
-	}  
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
 
-	if ( cmd == IP_FW_ZERO )
-	{
-		zero_fw_chain(*chains[fwtype]);
-		return(0);
-	}  
+	switch (cmd) {
+	case IP_FW_FLUSH:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;		
+		else ret = clear_fw_chain(chain);
+		break;
 
-	if ( cmd == IP_FW_POLICY )
-	{
-		int *tmp_policy_ptr;
-		tmp_policy_ptr=(int *)m;
-		*policies[fwtype] = *tmp_policy_ptr;
-		return 0;
-	}
+	case IP_FW_ZERO:
+		if (len != sizeof(ip_chainlabel) || !check_label(m))
+			ret = EINVAL;
+		else if ((chain = find_label(m)) == NULL)
+			ret = ENOENT;
+		else ret = zero_fw_chain(chain);
+		break;
 
-	if ( cmd == IP_FW_CHECK )
-	{
-		struct device *viadev;
-		struct ip_fwpkt *ipfwp;
+	case IP_FW_CHECK: {
+		struct ip_fwtest *new = m;
 		struct iphdr *ip;
 
-		if ( len != sizeof(struct ip_fwpkt) )
-		{
-#ifdef DEBUG_IP_FIREWALL
-			printk("ip_fw_ctl: length=%d, expected %d\n",
-				len, sizeof(struct ip_fwpkt));
-#endif
-			return( EINVAL );
-		}
+		/* Don't need write lock. */
+		FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+		
+		if (len != sizeof(struct ip_fwtest) || !check_label(m))
+			return EINVAL;
 
-	 	ipfwp = (struct ip_fwpkt *)m;
-	 	ip = &(ipfwp->fwp_iph);
+		/* Need readlock to do find_label */
+		FWC_READ_LOCK(&ip_fw_lock);
 
-		if ( !(viadev = dev_get(ipfwp->fwp_vianame)) ) {
-#ifdef DEBUG_IP_FIREWALL
-			printk("ip_fw_ctl: invalid device \"%s\"\n", ipfwp->fwp_vianame);
-#endif
-			return(EINVAL);
-		} else if ( ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
-#ifdef DEBUG_IP_FIREWALL
-			printk("ip_fw_ctl: ip->ihl=%d, want %d\n",ip->ihl,
-					sizeof(struct iphdr)/sizeof(int));
-#endif
-			return(EINVAL);
-		}
-
-		switch (ip_fw_chk(ip, viadev, NULL, *chains[fwtype],
-				*policies[fwtype], IP_FW_MODE_CHK))
-		{
-			case FW_ACCEPT:
-				return(0);
-	    		case FW_REDIRECT:
-				return(ECONNABORTED);
-	    		case FW_MASQUERADE:
-				return(ECONNRESET);
-	    		case FW_REJECT:
-				return(ECONNREFUSED);
-			default: /* FW_BLOCK */
-				return(ETIMEDOUT);
+		if ((chain = find_label(new->fwt_label)) == NULL)
+			ret = ENOENT;
+		else {
+			ip = &(new->fwt_packet.fwp_iph);
+
+			if (ip->ihl != sizeof(struct iphdr) / sizeof(int)) {
+			    duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
+				     ip->ihl,
+				     sizeof(struct iphdr) / sizeof(int));
+			    ret = EINVAL;
+			}
+			else {
+				ret = ip_fw_check(ip, new->fwt_packet.fwp_vianame,
+						  NULL, chain,
+						  NULL, SLOT_NUMBER(), 1);
+				switch (ret) {
+				case FW_ACCEPT:
+					ret = 0; break;
+				case FW_REDIRECT:
+					ret = ECONNABORTED; break;
+				case FW_MASQUERADE:
+					ret = ECONNRESET; break;
+				case FW_REJECT:
+					ret = ECONNREFUSED; break;
+					/* Hack to help diag; these only get
+					   returned when testing. */
+				case FW_SKIP+1:
+					ret = ELOOP; break;
+				case FW_SKIP:
+					ret = ENFILE; break;
+				default: /* FW_BLOCK */
+					ret = ETIMEDOUT; break;
+				}
+			}
 		}
+		FWC_READ_UNLOCK(&ip_fw_lock);
+		return ret;
 	}
 
-	if ( cmd == IP_FW_MASQ_TIMEOUTS )
-	{
+	case IP_FW_MASQ_TIMEOUTS: {
 #ifdef CONFIG_IP_MASQUERADE
 		struct ip_fw_masq *masq;
 
-		if ( len != sizeof(struct ip_fw_masq) )
-		{
-#ifdef DEBUG_IP_FIREWALL
-			printk("ip_fw_ctl (masq): length %d, expected %d\n",
+		if (len != sizeof(struct ip_fw_masq)) {
+			duprintf("ip_fw_ctl (masq): length %d, expected %d\n",
 				len, sizeof(struct ip_fw_masq));
-
-#endif
-			return( EINVAL );
-		}
-
-		masq = (struct ip_fw_masq *) m;
-
-		if (masq->tcp_timeout)
-		{
-			ip_masq_expire->tcp_timeout = masq->tcp_timeout;
+			ret = EINVAL;
 		}
-
-		if (masq->tcp_fin_timeout)
-		{
-			ip_masq_expire->tcp_fin_timeout = masq->tcp_fin_timeout;
-		}
-
-		if (masq->udp_timeout)
-		{
-			ip_masq_expire->udp_timeout = masq->udp_timeout;
+		else {
+			masq = (struct ip_fw_masq *)m;
+			if (masq->tcp_timeout)
+				ip_masq_expire->tcp_timeout 
+					= masq->tcp_timeout;
+
+			if (masq->tcp_fin_timeout)
+				ip_masq_expire->tcp_fin_timeout 
+					= masq->tcp_fin_timeout;
+
+			if (masq->udp_timeout)
+				ip_masq_expire->udp_timeout 
+					= masq->udp_timeout;
+			ret = 0;
 		}
-
-		return 0;
 #else
-		return( EINVAL );
+		ret = EINVAL;
 #endif
 	}
+	break;
 
-/*
- *	Here we really working hard-adding new elements
- *	to blocking/forwarding chains or deleting 'em
- */
-
-	if ( cmd == IP_FW_INSERT || cmd == IP_FW_APPEND || cmd == IP_FW_DELETE )
-	{
-		struct ip_fw *frwl;
-		int fwtype;
-
-		frwl=check_ipfw_struct(m,len);
-		if (frwl==NULL)
-			return (EINVAL);
-		fwtype = (stage & IP_FW_TYPE) >> IP_FW_SHIFT;
-		
-		switch (cmd) 
-		{
-			case IP_FW_INSERT:
-				return(insert_in_chain(chains[fwtype],frwl,len));
-			case IP_FW_APPEND:
-				return(append_to_chain(chains[fwtype],frwl,len));
-			case IP_FW_DELETE:
-				return(del_from_chain(chains[fwtype],frwl));
+	case IP_FW_REPLACE: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew) 
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = replace_in_chain(chain, ip_fwkern, 
+					       new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_APPEND: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL)
+			ret = append_to_chain(chain, ip_fwkern);
+	}
+	break;
+
+	case IP_FW_INSERT: {
+		struct ip_fwkernel *ip_fwkern;
+		struct ip_fwnew *new = m;
+
+		if (len != sizeof(struct ip_fwnew)
+		    || !check_label(new->fwn_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwn_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
+			 != NULL)
+			ret = insert_in_chain(chain, ip_fwkern,
+					      new->fwn_rulenum);
+	}
+	break;
+
+	case IP_FW_DELETE: {
+		struct ip_fwchange *new = m;
+		struct ip_fwkernel *ip_fwkern;
+
+		if (len != sizeof(struct ip_fwchange)
+		    || !check_label(new->fwc_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwc_label)) == NULL)
+			ret = ENOENT;
+		else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
+			 != NULL)
+			ret = del_rule_from_chain(chain, ip_fwkern);
+	}
+	break;
+
+	case IP_FW_DELETE_NUM: {
+		struct ip_fwdelnum *new = m;
+
+		if (len != sizeof(struct ip_fwdelnum)
+		    || !check_label(new->fwd_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwd_label)) == NULL)
+			ret = ENOENT;		
+		else ret = del_num_from_chain(chain, new->fwd_rulenum);
+	}
+	break;
+
+	case IP_FW_CREATECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("create_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = create_chain(m);
+	}
+	break;
+
+	case IP_FW_DELETECHAIN: {
+		if (len != sizeof(ip_chainlabel)) {
+			duprintf("delete_chain: bad size %i\n", len);
+			ret = EINVAL;
+		}
+		else ret = del_chain(m);
+	}
+	break;
+
+	case IP_FW_POLICY: {
+		struct ip_fwpolicy *new = m;
+
+		if (len != sizeof(struct ip_fwpolicy)
+		    || !check_label(new->fwp_label))
+			ret = EINVAL;
+		else if ((chain = find_label(new->fwp_label)) == NULL)
+			ret = ENOENT;
+		else if (chain != IP_FW_INPUT_CHAIN
+			 && chain != IP_FW_FORWARD_CHAIN
+			 && chain != IP_FW_OUTPUT_CHAIN) {
+			duprintf("change_policy: can't change policy on user" 
+				 " defined chain.\n");
+			ret = EINVAL;
+		}
+		else {
+		        int pol = FW_SKIP;
+			find_special(new->fwp_policy, &pol);
+
+			switch(pol) {
+			case FW_MASQUERADE:
+				if (chain != IP_FW_FORWARD_CHAIN) {
+					ret = EINVAL;
+					break;
+				}
+				/* Fall thru... */
+			case FW_BLOCK:
+			case FW_ACCEPT:
+			case FW_REJECT:
+				ret = change_policy(chain, pol);
+				break;
 			default:
-			/*
-	 		 *	Should be panic but... (Why are BSD people panic obsessed ??)
-			 */
-#ifdef DEBUG_IP_FIREWALL
-				printk("ip_fw_ctl:  unknown request %d\n",stage);
-#endif
-				return(EINVAL);
+			        duprintf("change_policy: bad policy `%s'\n",
+					 new->fwp_policy);
+				ret = EINVAL;
+			}
 		}
-	} 
+		break;
+		
+	}
+	default:
+		duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
+		ret = EINVAL;
+	}
 
-#ifdef DEBUG_IP_FIREWALL
-	printk("ip_fw_ctl:  unknown request %d\n",stage);
-#endif
-	return(EINVAL);
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	return ret;
 }
-#endif /* CONFIG_IP_FIREWALL */
 
-#ifdef CONFIG_PROC_FS
-#if defined(CONFIG_IP_FIREWALL) || defined(CONFIG_IP_ACCT)
+/* Returns bytes used - doesn't NUL terminate */
+static int dump_rule(char *buffer, 
+		     const char *chainlabel, 
+		     const struct ip_fwkernel *rule)
+{
+	int len;
+	unsigned int i;
+	__u64 packets = 0, bytes = 0;
+
+	FWC_HAVE_LOCK(fwc_wlocks);
+	for (i = 0; i < NUM_SLOTS; i++) {
+		packets += rule->counters[i].pcnt;
+		bytes += rule->counters[i].bcnt;
+	}
+
+	len=sprintf(buffer,
+		    "%9s "			/* Chain name */
+		    "%08lX/%08lX->%08lX/%08lX "	/* Source & Destination IPs */
+		    "%.16s "			/* Interface */
+		    "%hX %hX "			/* fw_flg and fw_invflg fields */
+		    "%hu "			/* Protocol */
+		    "%-9u %-9u %-9u %-9u "	/* Packet & byte counters */
+		    "%hu-%hu %hu-%hu "		/* Source & Dest port ranges */
+		    "A%02X X%02X "		/* TOS and and xor masks */
+		    "%08X "			/* Redirection port */
+		    "%u "			/* fw_mark field */
+		    "%hu "			/* output size */
+		    "%9s\n",			/* Target */
+		    chainlabel,
+		    ntohl(rule->ipfw.fw_src.s_addr),
+		    ntohl(rule->ipfw.fw_smsk.s_addr),
+		    ntohl(rule->ipfw.fw_dst.s_addr),
+		    ntohl(rule->ipfw.fw_dmsk.s_addr),
+		    (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
+		    rule->ipfw.fw_flg,
+		    rule->ipfw.fw_invflg,
+		    rule->ipfw.fw_proto,
+		    (__u32)(packets >> 32), (__u32)packets,
+		    (__u32)(bytes >> 32), (__u32)bytes,
+		    rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
+		    rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1], 
+		    rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor, 
+		    rule->ipfw.fw_redirpt, 
+		    rule->ipfw.fw_mark, 
+		    rule->ipfw.fw_outputsize,
+		    branchname(rule->branch,rule->simplebranch));
+
+	duprintf("dump_rule: %i bytes done.\n", len);
+	return len;
+}
 
-static int ip_chain_procinfo(int stage, char *buffer, char **start,
+/* File offset is actually in records, not bytes. */
+static int ip_chain_procinfo(char *buffer, char **start,
 			     off_t offset, int length, int reset)
 {
-	off_t pos=0, begin=0;
-	struct ip_fw *i;
+	struct ip_chain *i;
+	struct ip_fwkernel *j = ip_fw_chains->chain;
 	unsigned long flags;
-	int len, p;
+	int len = 0;
 	int last_len = 0;
-	
+	off_t upto = 0;
 
-	switch(stage)
-	{
-#ifdef CONFIG_IP_FIREWALL
-		case IP_FW_IN:
-			i = ip_fw_in_chain;
-			len=sprintf(buffer, "IP firewall input rules, default %d\n",
-				ip_fw_in_policy);
-			break;
-		case IP_FW_OUT:
-			i = ip_fw_out_chain;
-			len=sprintf(buffer, "IP firewall output rules, default %d\n",
-				ip_fw_out_policy);
-			break;
-		case IP_FW_FWD:
-			i = ip_fw_fwd_chain;
-			len=sprintf(buffer, "IP firewall forward rules, default %d\n",
-				ip_fw_fwd_policy);
-			break;
-#endif
-#ifdef CONFIG_IP_ACCT
-		case IP_FW_ACCT:
-			i = ip_acct_chain;
-			len=sprintf(buffer,"IP accounting rules\n");
-			break;
-#endif
-		default:
-			/* this should never be reached, but safety first... */
-			i = NULL;
-			len=0;
-			break;
+	duprintf("Offset starts at %lu\n", offset);
+	duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next) {
+	    for (j = i->chain; j; j = j->next) {
+		if (upto == offset) break;
+		duprintf("Skipping rule in chain `%s'\n", 
+			 i->label);
+		upto++;
+	    }
+	    if (upto == offset) break;
+	}
+
+	/* Don't init j first time, or once i = NULL */
+	for (; i; (void)((i = i->next) && (j = i->chain))) {
+		duprintf("Dumping chain `%s'\n", i->label);
+		for (; j; j = j->next, upto++, last_len = len)
+		{
+			len += dump_rule(buffer+len, i->label, j);
+			if (len > length) {
+				duprintf("Dumped to %i (past %i).  "
+					 "Moving back to %i.\n",
+					 len, length, last_len);
+				len = last_len;
+				goto outside;
+			}
+			else if (reset)
+				memset(j->counters, 0,
+				       sizeof(struct ip_counters)*NUM_SLOTS);
+		}
 	}
+outside:
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+	buffer[len] = '\0';
+
+	duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
+		 len, length, upto);
+	/* `start' hack - see fs/proc/generic.c line ~165 */
+	*start=(char *)((unsigned int)upto-offset);
+	return len;
+}
 
-	save_flags(flags);
-	cli();
-	
-	while(i!=NULL)
+static int ip_chain_name_procinfo(char *buffer, char **start,
+				  off_t offset, int length, int reset)
+{
+	struct ip_chain *i;
+	int len = 0,last_len = 0;
+	off_t pos = 0,begin = 0;
+	unsigned long flags;
+
+	/* Need a write lock to lock out ``readers'' which update counters. */
+	FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
+
+	for (i = ip_fw_chains; i; i = i->next)
 	{
-		len+=sprintf(buffer+len,"%08lX/%08lX->%08lX/%08lX %.16s %08lX %X ",
-			ntohl(i->fw_src.s_addr),ntohl(i->fw_smsk.s_addr),
-			ntohl(i->fw_dst.s_addr),ntohl(i->fw_dmsk.s_addr),
-			(i->fw_vianame)[0] ? i->fw_vianame : "-",
-			ntohl(i->fw_via.s_addr),i->fw_flg);
-		/* 10 is enough for a 32 bit box but the counters are 64bit on
-		   the Alpha and Ultrapenguin */
-		len+=sprintf(buffer+len,"%u %u %-20lu %-20lu",
-			i->fw_nsp,i->fw_ndp, i->fw_pcnt,i->fw_bcnt);
-		for (p = 0; p < IP_FW_MAX_PORTS; p++)
-			len+=sprintf(buffer+len, " %u", i->fw_pts[p]);
-		len+=sprintf(buffer+len, " A%02X X%02X", i->fw_tosand, i->fw_tosxor);
-		buffer[len++]='\n';
-		buffer[len]='\0';
+		unsigned int j;
+		__u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
+
+		for (j = 0; j < NUM_SLOTS; j++) {
+			packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
+			packetsHi += ((i->reent[j].counters.pcnt >> 32) 
+				      & 0xFFFFFFFF);
+			bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
+			bytesHi += ((i->reent[j].counters.bcnt >> 32) 
+				    & 0xFFFFFFFF);
+		}
+
+		/* print the label and the policy */
+		len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
+			     i->label,branchname(NULL, i->policy),i->refcount,
+			     packetsHi, packetsLo, bytesHi, bytesLo);
 		pos=begin+len;
-		if(pos<offset)
-		{
+		if(pos<offset) {
 			len=0;
 			begin=pos;
 		}
-		else if(pos>offset+length)
-		{
+		else if(pos>offset+length) {
 			len = last_len;
 			break;		
 		}
-		else if(reset)
-		{
-			/* This needs to be done at this specific place! */
-			i->fw_pcnt=0L;
-			i->fw_bcnt=0L;
-		}
+		
 		last_len = len;
-		i=i->fw_next;
 	}
-	restore_flags(flags);
-	*start=buffer+(offset-begin);
+	FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
+
+	*start = buffer+(offset-begin);
 	len-=(offset-begin);
 	if(len>length)
-		len=length;	
+		len=length;
 	return len;
 }
-#endif
-
-#ifdef CONFIG_IP_ACCT
-
-static int ip_acct_procinfo(char *buffer, char **start, off_t offset,
-			    int length, int reset)
-{
-	return ip_chain_procinfo(IP_FW_ACCT, buffer,start, offset,length,
-				 reset);
-}
-
-#endif
-
-#ifdef CONFIG_IP_FIREWALL
-
-static int ip_fw_in_procinfo(char *buffer, char **start, off_t offset,
-			      int length, int reset)
-{
-	return ip_chain_procinfo(IP_FW_IN, buffer,start,offset,length,
-				 reset);
-}
-
-static int ip_fw_out_procinfo(char *buffer, char **start, off_t offset,
-			      int length, int reset)
-{
-	return ip_chain_procinfo(IP_FW_OUT, buffer,start,offset,length,
-				 reset);
-}
-
-static int ip_fw_fwd_procinfo(char *buffer, char **start, off_t offset,
-			      int length, int reset)
-{
-	return ip_chain_procinfo(IP_FW_FWD, buffer,start,offset,length,
-				 reset);
-}
-#endif
-#endif
 
-
-#ifdef CONFIG_IP_FIREWALL
 /*
  *	Interface to the generic firewall chains.
  */
- 
-int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb)
+int ipfw_input_check(struct firewall_ops *this, int pf, struct device *dev, 
+		     void *phdr, void *arg, struct sk_buff **pskb)
 {
-	return ip_fw_chk(phdr, dev, arg, ip_fw_in_chain, ip_fw_in_policy, IP_FW_MODE_FW);
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_INPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
 }
 
-int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb)
+int ipfw_output_check(struct firewall_ops *this, int pf, struct device *dev, 
+		      void *phdr, void *arg, struct sk_buff **pskb)
 {
-	return ip_fw_chk(phdr, dev, arg, ip_fw_out_chain, ip_fw_out_policy, IP_FW_MODE_FW);
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_OUTPUT_CHAIN, *pskb, SLOT_NUMBER(), 0);
 }
 
-int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, void *phdr, void *arg, struct sk_buff **pskb)
+int ipfw_forward_check(struct firewall_ops *this, int pf, struct device *dev, 
+		       void *phdr, void *arg, struct sk_buff **pskb)
 {
-	return ip_fw_chk(phdr, dev, arg, ip_fw_fwd_chain, ip_fw_fwd_policy, IP_FW_MODE_FW);
+	return ip_fw_check(phdr, dev->name,
+			   arg, IP_FW_FORWARD_CHAIN, *pskb, SLOT_NUMBER(), 0);
 }
- 
+
 struct firewall_ops ipfw_ops=
 {
 	NULL,
@@ -1283,106 +1664,45 @@
 	0	/* We don't even allow a fall through so we are last */
 };
 
-#endif
-
-#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL)
-
-int ipfw_device_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct device *dev=ptr;
-	char *devname = dev->name;
-	unsigned long flags;
-	struct ip_fw *fw;
-	int chn;
-
-	save_flags(flags);
-	cli();
-	
-	if (event == NETDEV_UP) {
-		for (chn = 0; chn < IP_FW_CHAINS; chn++)
-			for (fw = *chains[chn]; fw; fw = fw->fw_next)
-				if ((fw->fw_vianame)[0] && !strncmp(devname,
-						fw->fw_vianame, IFNAMSIZ))
-					fw->fw_viadev = dev;
-	} else if (event == NETDEV_DOWN) {
-		for (chn = 0; chn < IP_FW_CHAINS; chn++)
-			for (fw = *chains[chn]; fw; fw = fw->fw_next)
-				/* we could compare just the pointers ... */
-				if ((fw->fw_vianame)[0] && !strncmp(devname,
-						fw->fw_vianame, IFNAMSIZ))
-					fw->fw_viadev = (struct device *) -1;
-	}
-
-	restore_flags(flags);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipfw_dev_notifier={
-	ipfw_device_event,
-	NULL,
-	0
+#ifdef CONFIG_PROC_FS		
+static struct proc_dir_entry proc_net_ipfwchains_chain = {
+	PROC_NET_IPFW_CHAINS, sizeof(IP_FW_PROC_CHAINS)-1, 
+	IP_FW_PROC_CHAINS, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+	0, &proc_net_inode_operations, ip_chain_procinfo
 };
 
-#endif
-
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_ACCT
-static struct proc_dir_entry proc_net_ipacct = {
-	PROC_NET_IPACCT, 7, "ip_acct",
-	S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
-	0, &proc_net_inode_operations,
-	ip_acct_procinfo
+static struct proc_dir_entry proc_net_ipfwchains_chainnames = {
+	PROC_NET_IPFW_CHAIN_NAMES, sizeof(IP_FW_PROC_CHAIN_NAMES)-1, 
+	IP_FW_PROC_CHAIN_NAMES, S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
+	0, &proc_net_inode_operations, ip_chain_name_procinfo
 };
-#endif
-#endif
 
-#ifdef CONFIG_IP_FIREWALL
-#ifdef CONFIG_PROC_FS		
-static struct proc_dir_entry proc_net_ipfwin = {
-	PROC_NET_IPFWIN, 8, "ip_input",
-	S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
-	0, &proc_net_inode_operations,
-	ip_fw_in_procinfo
-};
-static struct proc_dir_entry proc_net_ipfwout = {
-	PROC_NET_IPFWOUT, 9, "ip_output",
-	S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
-	0, &proc_net_inode_operations,
-	ip_fw_out_procinfo
-};
-static struct proc_dir_entry proc_net_ipfwfwd = {
-	PROC_NET_IPFWFWD, 10, "ip_forward",
-	S_IFREG | S_IRUGO | S_IWUSR, 1, 0, 0,
-	0, &proc_net_inode_operations,
-	ip_fw_fwd_procinfo
-};
-#endif
 #endif
 
-
 __initfunc(void ip_fw_init(void))
 {
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_ACCT
-	proc_net_register(&proc_net_ipacct);
-#endif
+#ifdef DEBUG_IP_FIRWALL_LOCKING
+	fwc_wlocks = fwc_rlocks = 0;
 #endif
-#ifdef CONFIG_IP_FIREWALL
+
+	IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
+	IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
+	IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
 
 	if(register_firewall(PF_INET,&ipfw_ops)<0)
 		panic("Unable to register IP firewall.\n");
+
 #ifdef CONFIG_PROC_FS		
-	proc_net_register(&proc_net_ipfwin);
-	proc_net_register(&proc_net_ipfwout);
-	proc_net_register(&proc_net_ipfwfwd);
-#endif
+	proc_net_register(&proc_net_ipfwchains_chain);
+	proc_net_register(&proc_net_ipfwchains_chainnames);
 #endif
 
-#if defined(CONFIG_IP_ACCT) || defined(CONFIG_IP_FIREWALL)
-	/* Register for device up/down reports */
-	register_netdevice_notifier(&ipfw_dev_notifier);
-#endif
 #ifdef CONFIG_IP_FIREWALL_NETLINK
 	ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
+	if (ipfwsk == NULL)
+		panic("ip_fw_init: cannot initialize netlink\n");
+#endif
+#if defined(DEBUG_IP_FIREWALL) || defined(DEBUG_IP_FIREWALL_USER)
+	printk("Firewall graphs enabled! Untested kernel coming thru. \n");
 #endif
 }

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov