From: "Andi Kleen" <ak@suse.de>

Collected ioremap fixes

- Call change_page_attr correctly
- Fix a race during iounmap
- Handle mmio holes without mem_map correctly (needed for SRAT patch) 
- Some cleanups

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/x86_64/mm/ioremap.c        |   76 ++++++++++++++++++++------------
 25-akpm/arch/x86_64/mm/pageattr.c       |   36 ++++++++-------
 25-akpm/include/asm-x86_64/cacheflush.h |    1 
 3 files changed, 70 insertions(+), 43 deletions(-)

diff -puN arch/x86_64/mm/ioremap.c~x86_64-collected-ioremap-fixes arch/x86_64/mm/ioremap.c
--- 25/arch/x86_64/mm/ioremap.c~x86_64-collected-ioremap-fixes	2004-12-15 03:45:53.252252392 -0800
+++ 25-akpm/arch/x86_64/mm/ioremap.c	2004-12-15 03:45:53.259251328 -0800
@@ -16,7 +16,7 @@
 #include <asm/fixmap.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-
+#include <asm/proto.h>
 
 static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
 	unsigned long phys_addr, unsigned long flags)
@@ -122,7 +122,31 @@ static int remap_area_pages(unsigned lon
 }
 
 /*
- * Generic mapping function (not visible outside):
+ * Fix up the linear direct mapping of the kernel to avoid cache attribute
+ * conflicts.
+ */
+static int
+ioremap_change_attr(unsigned long phys_addr, unsigned long size,
+					unsigned long flags)
+{
+	int err = 0;
+	if (flags && phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
+		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		unsigned long vaddr = (unsigned long) __va(phys_addr);
+
+		/*
+ 		 * Must use a address here and not struct page because the phys addr
+		 * can be a in hole between nodes and not have an memmap entry.
+		 */
+		err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
+		if (!err)
+			global_flush_tlb();
+	}
+	return err;
+}
+
+/*
+ * Generic mapping function
  */
 
 /*
@@ -178,12 +202,17 @@ void __iomem * __ioremap(unsigned long p
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP);
+	area = get_vm_area(size, VM_IOREMAP | (flags << 24));
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
 	addr = area->addr;
 	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+		return NULL;
+	}
+	if (ioremap_change_attr(phys_addr, size, flags) < 0) {
+		area->flags &= 0xffffff;
 		vunmap(addr);
 		return NULL;
 	}
@@ -214,43 +243,34 @@ void __iomem * __ioremap(unsigned long p
 
 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
-	void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
-	if (!p) 
-		return p; 
-
-	if (phys_addr + size < virt_to_phys(high_memory)) { 
-		struct page *ppage = virt_to_page(__va(phys_addr));		
-		unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-		BUG_ON(phys_addr+size > (unsigned long)high_memory);
-		BUG_ON(phys_addr + size < phys_addr);
-
-		if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
-			iounmap(p); 
-			p = NULL;
-		}
-		global_flush_tlb();
-	} 
-
-	return p;					
+	return __ioremap(phys_addr, size, _PAGE_PCD);
 }
 
 void iounmap(volatile void __iomem *addr)
 {
-	struct vm_struct *p;
+	struct vm_struct *p, **pprev;
+
 	if (addr <= high_memory) 
 		return; 
-	p = remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); 
+
+	write_lock(&vmlist_lock);
+	for (p = vmlist, pprev = &vmlist; p != NULL; pprev = &p->next, p = *pprev)
+		if (p->addr == (void *)(PAGE_MASK & (unsigned long)addr))
+			break;
 	if (!p) { 
 		printk("__iounmap: bad address %p\n", addr);
-		return;
-	} 
-
-	if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { 
+		goto out_unlock;
+	}
+	*pprev = p->next;
+	unmap_vm_area(p);
+	if ((p->flags >> 24) &&
+		p->phys_addr + p->size - 1 < virt_to_phys(high_memory)) {
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
 				 p->size >> PAGE_SHIFT,
 				 PAGE_KERNEL); 				 
 		global_flush_tlb();
 	} 
+out_unlock:
+	write_unlock(&vmlist_lock);
 	kfree(p); 
 }
diff -puN arch/x86_64/mm/pageattr.c~x86_64-collected-ioremap-fixes arch/x86_64/mm/pageattr.c
--- 25/arch/x86_64/mm/pageattr.c~x86_64-collected-ioremap-fixes	2004-12-15 03:45:53.254252088 -0800
+++ 25-akpm/arch/x86_64/mm/pageattr.c	2004-12-15 03:45:53.260251176 -0800
@@ -119,13 +119,12 @@ static void revert_page(unsigned long ad
 }      
 
 static int
-__change_page_attr(unsigned long address, struct page *page, pgprot_t prot, 
-		   pgprot_t ref_prot)
+__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
+				   pgprot_t ref_prot)
 { 
 	pte_t *kpte; 
 	struct page *kpte_page;
 	unsigned kpte_flags;
-
 	kpte = lookup_address(address);
 	if (!kpte) return 0;
 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
@@ -133,20 +132,20 @@ __change_page_attr(unsigned long address
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
 		if ((kpte_flags & _PAGE_PSE) == 0) { 
 			pte_t old = *kpte;
-			pte_t standard = mk_pte(page, ref_prot); 
+			pte_t standard = pfn_pte(pfn, ref_prot);
 
-			set_pte(kpte, mk_pte(page, prot)); 
+			set_pte(kpte, pfn_pte(pfn, prot));
 			if (pte_same(old,standard))
 				get_page(kpte_page);
 		} else {
 			struct page *split = split_large_page(address, prot, ref_prot); 
 			if (!split)
 				return -ENOMEM;
-			get_page(kpte_page);
+			get_page(split);
 			set_pte(kpte,mk_pte(split, ref_prot));
 		}	
 	} else if ((kpte_flags & _PAGE_PSE) == 0) { 
-		set_pte(kpte, mk_pte(page, ref_prot));
+		set_pte(kpte, pfn_pte(pfn, ref_prot));
 		__put_page(kpte_page);
 	}
 
@@ -170,31 +169,38 @@ __change_page_attr(unsigned long address
  * 
  * Caller must call global_flush_tlb() after this.
  */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
 {
 	int err = 0; 
 	int i; 
 
 	down_write(&init_mm.mmap_sem);
-	for (i = 0; i < numpages; !err && i++, page++) { 
-		unsigned long address = (unsigned long)page_address(page); 
-		err = __change_page_attr(address, page, prot, PAGE_KERNEL); 
+	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
+		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
+
+		err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
 		if (err) 
 			break; 
 		/* Handle kernel mapping too which aliases part of the
 		 * lowmem */
 		/* Disabled right now. Fixme */ 
-		if (0 && page_to_phys(page) < KERNEL_TEXT_SIZE) {		
+		if (0 && __pa(address) < KERNEL_TEXT_SIZE) {
 			unsigned long addr2;
-			addr2 = __START_KERNEL_map + page_to_phys(page);
-			err = __change_page_attr(addr2, page, prot, 
-						 PAGE_KERNEL_EXEC);
+			addr2 = __START_KERNEL_map + __pa(address);
+			err = __change_page_attr(addr2, pfn, prot, PAGE_KERNEL_EXEC);
 		} 
 	} 	
 	up_write(&init_mm.mmap_sem); 
 	return err;
 }
 
+/* Don't call this for MMIO areas that may not have a mem_map entry */
+int change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+	return change_page_attr_addr(addr, numpages, prot);
+}
+
 void global_flush_tlb(void)
 { 
 	struct deferred_page *df, *next_df;
diff -puN include/asm-x86_64/cacheflush.h~x86_64-collected-ioremap-fixes include/asm-x86_64/cacheflush.h
--- 25/include/asm-x86_64/cacheflush.h~x86_64-collected-ioremap-fixes	2004-12-15 03:45:53.255251936 -0800
+++ 25-akpm/include/asm-x86_64/cacheflush.h	2004-12-15 03:45:53.260251176 -0800
@@ -25,5 +25,6 @@
 
 void global_flush_tlb(void); 
 int change_page_attr(struct page *page, int numpages, pgprot_t prot);
+int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
 
 #endif /* _X8664_CACHEFLUSH_H */
_