From: Jack Steiner <steiner@sgi.com>

Here is an updated patch for flushing the TLB on SN2 after a process
migration.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/ia64/kernel/machvec.c        |    6 ++++++
 25-akpm/arch/ia64/sn/kernel/sn2/sn2_smp.c |   15 +++++++++++++++
 25-akpm/include/asm-generic/tlb.h         |    2 ++
 25-akpm/include/asm-ia64/machvec.h        |    9 +++++++++
 25-akpm/include/asm-ia64/machvec_sn2.h    |    2 ++
 25-akpm/include/asm-ia64/tlb.h            |    3 +++
 25-akpm/kernel/sched.c                    |    3 +++
 7 files changed, 40 insertions(+)

diff -puN arch/ia64/kernel/machvec.c~reduce-tlb-flushing-during-process-migration arch/ia64/kernel/machvec.c
--- 25/arch/ia64/kernel/machvec.c~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.262149704 -0700
+++ 25-akpm/arch/ia64/kernel/machvec.c	2004-06-02 18:02:49.274147880 -0700
@@ -62,6 +62,12 @@ machvec_timer_interrupt (int irq, void *
 EXPORT_SYMBOL(machvec_timer_interrupt);
 
 void
+machvec_tlb_migrate_finish (struct mm_struct *mm)
+{
+}
+EXPORT_SYMBOL(machvec_tlb_migrate_finish);
+
+void
 machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
 {
 	mb();
diff -puN arch/ia64/sn/kernel/sn2/sn2_smp.c~reduce-tlb-flushing-during-process-migration arch/ia64/sn/kernel/sn2/sn2_smp.c
--- 25/arch/ia64/sn/kernel/sn2/sn2_smp.c~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.264149400 -0700
+++ 25-akpm/arch/ia64/sn/kernel/sn2/sn2_smp.c	2004-06-02 18:02:49.274147880 -0700
@@ -27,6 +27,7 @@
 #include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/smp.h>
+#include <asm/tlb.h>
 #include <asm/numa.h>
 #include <asm/bitops.h>
 #include <asm/hw_irq.h>
@@ -60,6 +61,13 @@ wait_piowc(void)
 }
 
 
+void
+sn_tlb_migrate_finish(struct mm_struct *mm)
+{
+	if (mm == current->mm)
+		flush_tlb_mm(mm);
+}
+
 
 /**
  * sn2_global_tlb_purge - globally purge translation cache of virtual address range
@@ -114,6 +122,13 @@ sn2_global_tlb_purge (unsigned long star
 		return;
 	}
 
+	if (atomic_read(&mm->mm_users) == 1) {
+		flush_tlb_mm(mm);
+		preempt_enable();
+		return;
+	}
+
+
 	nix = 0;
 	for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; 
 			cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode))
diff -puN include/asm-generic/tlb.h~reduce-tlb-flushing-during-process-migration include/asm-generic/tlb.h
--- 25/include/asm-generic/tlb.h~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.265149248 -0700
+++ 25-akpm/include/asm-generic/tlb.h	2004-06-02 18:02:49.275147728 -0700
@@ -147,4 +147,6 @@ static inline void tlb_remove_page(struc
 		__pmd_free_tlb(tlb, pmdp);			\
 	} while (0)
 
+#define tlb_migrate_finish(mm) flush_tlb_mm(mm)
+
 #endif /* _ASM_GENERIC__TLB_H */
diff -puN include/asm-ia64/machvec.h~reduce-tlb-flushing-during-process-migration include/asm-ia64/machvec.h
--- 25/include/asm-ia64/machvec.h~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.266149096 -0700
+++ 25-akpm/include/asm-ia64/machvec.h	2004-06-02 18:02:49.275147728 -0700
@@ -19,6 +19,7 @@ struct pt_regs;
 struct scatterlist;
 struct irq_desc;
 struct page;
+struct mm_struct;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_cpu_init_t (void);
@@ -26,6 +27,7 @@ typedef void ia64_mv_irq_init_t (void);
 typedef void ia64_mv_send_ipi_t (int, int, int, int);
 typedef void ia64_mv_timer_interrupt_t (int, void *, struct pt_regs *);
 typedef void ia64_mv_global_tlb_purge_t (unsigned long, unsigned long, unsigned long);
+typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
 typedef struct irq_desc *ia64_mv_irq_desc (unsigned int);
 typedef u8 ia64_mv_irq_to_vector (u8);
 typedef unsigned int ia64_mv_local_vector_to_irq (u8 vector);
@@ -72,6 +74,7 @@ typedef unsigned long ia64_mv_readq_rela
 extern void machvec_noop (void);
 extern void machvec_setup (char **);
 extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
+extern void machvec_tlb_migrate_finish (struct mm_struct *);
 extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
 extern void machvec_dma_sync_sg (struct device *, struct scatterlist *, int, int);
 
@@ -95,6 +98,7 @@ extern void machvec_dma_sync_sg (struct 
 #  define platform_send_ipi	ia64_mv.send_ipi
 #  define platform_timer_interrupt	ia64_mv.timer_interrupt
 #  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
+#  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
 #  define platform_dma_init		ia64_mv.dma_init
 #  define platform_dma_alloc_coherent	ia64_mv.dma_alloc_coherent
 #  define platform_dma_free_coherent	ia64_mv.dma_free_coherent
@@ -140,6 +144,7 @@ struct ia64_machine_vector {
 	ia64_mv_send_ipi_t *send_ipi;
 	ia64_mv_timer_interrupt_t *timer_interrupt;
 	ia64_mv_global_tlb_purge_t *global_tlb_purge;
+	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
 	ia64_mv_dma_init *dma_init;
 	ia64_mv_dma_alloc_coherent *dma_alloc_coherent;
 	ia64_mv_dma_free_coherent *dma_free_coherent;
@@ -181,6 +186,7 @@ struct ia64_machine_vector {
 	platform_send_ipi,			\
 	platform_timer_interrupt,		\
 	platform_global_tlb_purge,		\
+	platform_tlb_migrate_finish,		\
 	platform_dma_init,			\
 	platform_dma_alloc_coherent,		\
 	platform_dma_free_coherent,		\
@@ -260,6 +266,9 @@ extern ia64_mv_dma_supported		swiotlb_dm
 #ifndef platform_global_tlb_purge
 # define platform_global_tlb_purge	ia64_global_tlb_purge /* default to architected version */
 #endif
+#ifndef platform_tlb_migrate_finish
+# define platform_tlb_migrate_finish machvec_tlb_migrate_finish
+#endif
 #ifndef platform_dma_init
 # define platform_dma_init		swiotlb_init
 #endif
diff -puN include/asm-ia64/machvec_sn2.h~reduce-tlb-flushing-during-process-migration include/asm-ia64/machvec_sn2.h
--- 25/include/asm-ia64/machvec_sn2.h~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.268148792 -0700
+++ 25-akpm/include/asm-ia64/machvec_sn2.h	2004-06-02 18:02:49.276147576 -0700
@@ -39,6 +39,7 @@ extern ia64_mv_irq_init_t sn_irq_init;
 extern ia64_mv_send_ipi_t sn2_send_IPI;
 extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
 extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
+extern ia64_mv_tlb_migrate_finish_t	sn_tlb_migrate_finish;
 extern ia64_mv_irq_desc sn_irq_desc;
 extern ia64_mv_irq_to_vector sn_irq_to_vector;
 extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
@@ -83,6 +84,7 @@ extern ia64_mv_dma_supported		sn_dma_sup
 #define platform_send_ipi		sn2_send_IPI
 #define platform_timer_interrupt	sn_timer_interrupt
 #define platform_global_tlb_purge       sn2_global_tlb_purge
+#define platform_tlb_migrate_finish	sn_tlb_migrate_finish
 #define platform_pci_fixup		sn_pci_fixup
 #define platform_inb			__sn_inb
 #define platform_inw			__sn_inw
diff -puN include/asm-ia64/tlb.h~reduce-tlb-flushing-during-process-migration include/asm-ia64/tlb.h
--- 25/include/asm-ia64/tlb.h~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.269148640 -0700
+++ 25-akpm/include/asm-ia64/tlb.h	2004-06-02 18:02:49.276147576 -0700
@@ -44,6 +44,7 @@
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/machvec.h>
 
 #ifdef CONFIG_SMP
 # define FREE_PTE_NR		2048
@@ -211,6 +212,8 @@ __tlb_remove_tlb_entry (struct mmu_gathe
 	tlb->end_addr = address + PAGE_SIZE;
 }
 
+#define tlb_migrate_finish(mm)	platform_tlb_migrate_finish(mm)
+
 #define tlb_start_vma(tlb, vma)			do { } while (0)
 #define tlb_end_vma(tlb, vma)			do { } while (0)
 
diff -puN kernel/sched.c~reduce-tlb-flushing-during-process-migration kernel/sched.c
--- 25/kernel/sched.c~reduce-tlb-flushing-during-process-migration	2004-06-02 18:02:49.271148336 -0700
+++ 25-akpm/kernel/sched.c	2004-06-02 18:02:49.279147120 -0700
@@ -26,6 +26,7 @@
 #include <linux/highmem.h>
 #include <linux/smp_lock.h>
 #include <asm/mmu_context.h>
+#include <asm/tlb.h>
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
@@ -1444,6 +1445,7 @@ static void sched_migrate_task(task_t *p
 		wake_up_process(mt);
 		put_task_struct(mt);
 		wait_for_completion(&req.done);
+		tlb_migrate_finish(p->mm);
 		return;
 	}
 out:
@@ -3501,6 +3503,7 @@ int set_cpus_allowed(task_t *p, cpumask_
 		task_rq_unlock(rq, &flags);
 		wake_up_process(rq->migration_thread);
 		wait_for_completion(&req.done);
+		tlb_migrate_finish(p->mm);
 		return 0;
 	}
 out:
_