From: Davide Libenzi <davidel@xmailserver.org>

The following patch implements a lazy TSS's I/O bitmap copy for the x86-64
architecture.  Instead of copying the bitmap at every context switch, the
TSS's I/O bitmap offset is set to an invalid offset, so that an attempt to
access the bitmap from the CPU will trigger a GP fault.  It is lazily at
that stage that the bitmap is updated, by hence avoiding bitmap copies in
cases where the switched task do not perfom any I/O operation.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-power4-akpm/arch/x86_64/kernel/ioport.c    |   13 +++++-----
 25-power4-akpm/arch/x86_64/kernel/process.c   |   33 +++++++-------------------
 25-power4-akpm/arch/x86_64/kernel/traps.c     |   21 ++++++++++++++++
 25-power4-akpm/include/asm-x86_64/processor.h |    4 ++-
 4 files changed, 41 insertions(+), 30 deletions(-)

diff -puN arch/x86_64/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/ioport.c
--- 25-power4/arch/x86_64/kernel/ioport.c~lazy-tsss-i-o-bitmap-copy-for-x86-64	2004-09-07 22:47:48.018555672 -0700
+++ 25-power4-akpm/arch/x86_64/kernel/ioport.c	2004-09-07 22:47:48.027554304 -0700
@@ -62,12 +62,13 @@ asmlinkage long sys_ioperm(unsigned long
 	 */
 	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
 	tss = init_tss + get_cpu();
-	if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */
-		set_bitmap(tss->io_bitmap, from, num, !turn_on);
-	} else {
-		memcpy(tss->io_bitmap, t->io_bitmap_ptr, IO_BITMAP_BYTES);
-		tss->io_bitmap_base = IO_BITMAP_OFFSET; /* Activate it in the TSS */
-	}
+
+	/*
+	 * Sets the lazy trigger so that the next I/O operation will
+	 * reload the correct bitmap.
+	 */
+	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+
 	put_cpu();
 	return 0;
 }
diff -puN arch/x86_64/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/process.c
--- 25-power4/arch/x86_64/kernel/process.c~lazy-tsss-i-o-bitmap-copy-for-x86-64	2004-09-07 22:47:48.020555368 -0700
+++ 25-power4-akpm/arch/x86_64/kernel/process.c	2004-09-07 22:47:48.028554152 -0700
@@ -485,29 +485,16 @@ struct task_struct *__switch_to(struct t
 		loaddebug(next, 7);
 	}
 
-
-	/* 
-	 * Handle the IO bitmap 
-	 */ 
-	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
-		if (next->io_bitmap_ptr) {
-			/*
-			 * 2 cachelines copy ... not good, but not that
-			 * bad either. Anyone got something better?
-			 * This only affects processes which use ioperm().
-			 */
-			memcpy(tss->io_bitmap, next->io_bitmap_ptr, IO_BITMAP_BYTES);
-			tss->io_bitmap_base = IO_BITMAP_OFFSET;
-		} else {
-			/*
-			 * a bitmap offset pointing outside of the TSS limit
-			 * causes a nicely controllable SIGSEGV if a process
-			 * tries to use a port IO instruction. The first
-			 * sys_ioperm() call sets up the bitmap properly.
-			 */
-			tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
-		}
-	}
+	/*
+	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here and
+	 * we let the task to get a GPF in case an I/O instruction is performed.
+	 * The handler of the GPF will verify that the faulting task has a valid
+	 * I/O bitmap and, if true, does the real copy and restart the instruction.
+	 * This will save us for redoundant copies when the currently switched task
+	 * does not perform any I/O during its timeslice.
+	 */
+	tss->io_bitmap_base = next->io_bitmap_ptr ? INVALID_IO_BITMAP_OFFSET_LAZY:
+		INVALID_IO_BITMAP_OFFSET;
 
 	return prev_p;
 }
diff -puN arch/x86_64/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-x86-64 arch/x86_64/kernel/traps.c
--- 25-power4/arch/x86_64/kernel/traps.c~lazy-tsss-i-o-bitmap-copy-for-x86-64	2004-09-07 22:47:48.021555216 -0700
+++ 25-power4-akpm/arch/x86_64/kernel/traps.c	2004-09-07 22:47:48.029554000 -0700
@@ -488,6 +488,27 @@ DO_ERROR_STACK( 8, SIGSEGV, "double faul
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
+	int cpu = get_cpu();
+	struct tss_struct *tss = &per_cpu(init_tss, cpu);
+	struct thread_struct *tsk_th = &current->thread;
+
+	/*
+	 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+	 * invalid offset set (the LAZY one) and the faulting thread has
+	 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
+	 * and we set the offset field correctly. Then we let the CPU to
+	 * restart the faulting instruction.
+	 */
+	if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+	    tsk_th->io_bitmap_ptr) {
+		memcpy(tss->io_bitmap, tsk_th->io_bitmap_ptr,
+		       IO_BITMAP_BYTES);
+		tss->io_bitmap_base = IO_BITMAP_OFFSET;
+		put_cpu();
+		return;
+	}
+	put_cpu();
+
 	conditional_sti(regs);
 
 #ifdef CONFIG_CHECKING
diff -puN include/asm-x86_64/processor.h~lazy-tsss-i-o-bitmap-copy-for-x86-64 include/asm-x86_64/processor.h
--- 25-power4/include/asm-x86_64/processor.h~lazy-tsss-i-o-bitmap-copy-for-x86-64	2004-09-07 22:47:48.023554912 -0700
+++ 25-power4-akpm/include/asm-x86_64/processor.h	2004-09-07 22:47:48.030553848 -0700
@@ -187,6 +187,7 @@ static inline void clear_in_cr4 (unsigne
 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
 #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
 #define INVALID_IO_BITMAP_OFFSET 0x8000
+#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
 
 struct i387_fxsave_struct {
 	u16	cwd;
@@ -217,13 +218,14 @@ struct tss_struct {
 	u32 reserved4;
 	u16 reserved5;
 	u16 io_bitmap_base;
+
 	/*
 	 * The extra 1 is there because the CPU will access an
 	 * additional byte beyond the end of the IO permission
 	 * bitmap. The extra byte must be all 1 bits, and must
 	 * be within the limit. Thus we have:
 	 *
-	 * 128 bytes, the bitmap itself, for ports 0..0x3ff
+	 * 8192 bytes, the bitmap itself, for ports 0..65535
 	 * 8 bytes, for an extra "long" of ~0UL
 	 */
 	unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
_