From: Davide Libenzi <davidel@xmailserver.org>

The following patch implements a lazy TSS's I/O bitmap copy for the i386
architecture.  Instead of copying the bitmap at every context switch, the
TSS's I/O bitmap offset is set to an invalid offset, so that an attempt to
access the bitmap from the CPU will trigger a GP fault.  It is lazily at
that stage that the bitmap is updated, by hence avoiding bitmap copies in
cases where the switched task do not perfom any I/O operation.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/i386/kernel/ioport.c    |    7 +++++--
 25-akpm/arch/i386/kernel/process.c   |   26 ++++++++++++--------------
 25-akpm/arch/i386/kernel/traps.c     |   29 +++++++++++++++++++++++++++++
 25-akpm/include/asm-i386/processor.h |    7 ++++++-
 4 files changed, 52 insertions(+), 17 deletions(-)

diff -puN arch/i386/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/ioport.c
--- 25/arch/i386/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-i386	2004-09-03 22:54:46.582091752 -0700
+++ 25-akpm/arch/i386/kernel/ioport.c	2004-09-03 22:54:46.590090536 -0700
@@ -105,8 +105,11 @@ asmlinkage long sys_ioperm(unsigned long
 
 	t->io_bitmap_max = bytes;
 
-	/* Update the TSS: */
-	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+	/*
+	 * Sets the lazy trigger so that the next I/O operation will
+	 * reload the correct bitmap.
+	 */
+	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
 
 	put_cpu();
 
diff -puN arch/i386/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/process.c
--- 25/arch/i386/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-i386	2004-09-03 22:54:46.583091600 -0700
+++ 25-akpm/arch/i386/kernel/process.c	2004-09-03 22:56:21.932596272 -0700
@@ -596,20 +596,18 @@ struct task_struct fastcall * __switch_t
 		loaddebug(next, 7);
 	}
 
-	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
-		if (next->io_bitmap_ptr)
-			/*
-			 * Copy the relevant range of the IO bitmap.
-			 * Normally this is 128 bytes or less:
-			 */
-			memcpy(tss->io_bitmap, next->io_bitmap_ptr,
-				max(prev->io_bitmap_max, next->io_bitmap_max));
-		else
-			/*
-			 * Clear any possible leftover bits:
-			 */
-			memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
-	}
+	/*
+	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here and we let
+	 * the task to get a GPF in case an I/O instruction is performed.
+	 * The handler of the GPF will verify that the faulting task has a valid
+	 * I/O bitmap and, if true, does the real copy and restart the
+	 * instruction.  This will save us for redoundant copies when the
+	 * currently switched task does not perform any I/O during its
+	 * timeslice.
+	 */
+	tss->io_bitmap_base = next->io_bitmap_ptr ?
+			INVALID_IO_BITMAP_OFFSET_LAZY :
+			INVALID_IO_BITMAP_OFFSET;
 
 	perfctr_resume_thread(next);
 
diff -puN arch/i386/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-i386 arch/i386/kernel/traps.c
--- 25/arch/i386/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-i386	2004-09-03 22:54:46.585091296 -0700
+++ 25-akpm/arch/i386/kernel/traps.c	2004-09-03 22:54:46.593090080 -0700
@@ -524,6 +524,35 @@ DO_ERROR_INFO(17, SIGBUS, "alignment che
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
+	int cpu = get_cpu();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct thread_struct *tsk_th = &current->thread;
+
+	/*
+	 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+	 * invalid offset set (the LAZY one) and the faulting thread has
+	 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
+	 * and we set the offset field correctly. Then we let the CPU to
+	 * restart the faulting instruction.
+	 */
+	if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+	    tsk_th->io_bitmap_ptr) {
+		memcpy(tss->io_bitmap, tsk_th->io_bitmap_ptr,
+		       tsk_th->io_bitmap_max);
+		/*
+		 * If the previously set map was extending to higher ports
+		 * than the current one, pad extra space with 0xff (no access).
+		 */
+		if (tsk_th->io_bitmap_max < tss->map_size)
+			memset((char *) tss->io_bitmap + tsk_th->io_bitmap_max, 0xff,
+			       tss->map_size - tsk_th->io_bitmap_max);
+		tss->map_size = tsk_th->io_bitmap_max;
+		tss->io_bitmap_base = IO_BITMAP_OFFSET;
+		put_cpu();
+		return;
+	}
+	put_cpu();
+
 	if (regs->eflags & VM_MASK)
 		goto gp_in_vm86;
 
diff -puN include/asm-i386/processor.h~lazy-tsss-i-o-bitmap-copy-for-i386 include/asm-i386/processor.h
--- 25/include/asm-i386/processor.h~lazy-tsss-i-o-bitmap-copy-for-i386	2004-09-03 22:54:46.586091144 -0700
+++ 25-akpm/include/asm-i386/processor.h	2004-09-03 22:54:46.594089928 -0700
@@ -307,6 +307,7 @@ extern unsigned int mca_pentium_flag;
 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
 #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
 #define INVALID_IO_BITMAP_OFFSET 0x8000
+#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
 
 struct i387_fsave_struct {
 	long	cwd;
@@ -392,9 +393,13 @@ struct tss_struct {
 	 */
 	unsigned long	io_bitmap[IO_BITMAP_LONGS + 1];
 	/*
+	 * Effective size of the currently set I/O bitmap.
+	 */
+	unsigned long	map_size;
+	/*
 	 * pads the TSS to be cacheline-aligned (size is 0x100)
 	 */
-	unsigned long __cacheline_filler[37];
+	unsigned long __cacheline_filler[36];
 	/*
 	 * .. and then another 0x100 bytes for emergency kernel stack
 	 */
_