From: "David S. Miller" <davem@redhat.com>

I audited %99 of the drivers, finding crap like:

	pci_dma_sync_*();
	pci_unmap_*();

which makes the DMA sync a total NOP of course.  Good cleanup overall and
sanitization, so worth the leaf raking of it all :-)



---

 Documentation/DMA-mapping.txt        |   56 ++++++++++++++---
 arch/arm/common/sa1111-pcibuf.c      |   50 ++++++++++++++--
 arch/ia64/lib/swiotlb.c              |   44 +++++++++++---
 arch/ia64/sn/io/machvec/pci_dma.c    |   56 ++++++++++++++---
 arch/mips/mm/dma-coherent.c          |   49 +++++++++++++--
 arch/mips/mm/dma-ip27.c              |   49 +++++++++++++--
 arch/mips/mm/dma-noncoherent.c       |   67 ++++++++++++++++++---
 arch/parisc/kernel/pci-dma.c         |   36 +++++++++--
 arch/sparc/kernel/ioport.c           |   69 +++++++++++++++++++---
 arch/sparc/kernel/sparc_ksyms.c      |   11 ++-
 arch/sparc64/kernel/pci_iommu.c      |    4 -
 arch/sparc64/kernel/sbus.c           |   12 +++
 arch/sparc64/kernel/sparc64_ksyms.c  |    8 +-
 arch/v850/kernel/rte_mb_a_pci.c      |   37 +++++++++--
 drivers/atm/fore200e.c               |   42 ++++++++++---
 drivers/atm/fore200e.h               |    3 
 drivers/atm/idt77252.c               |    7 +-
 drivers/ieee1394/dma.c               |   18 +++++
 drivers/ieee1394/dma.h               |    6 +
 drivers/ieee1394/dv1394.c            |    8 +-
 drivers/ieee1394/ieee1394_core.c     |    3 
 drivers/ieee1394/ohci1394.c          |    6 -
 drivers/ieee1394/sbp2.c              |   58 +++++++++---------
 drivers/media/video/video-buf.c      |    2 
 drivers/message/fusion/mptlan.c      |   30 +++++++--
 drivers/message/i2o/i2o_core.c       |    2 
 drivers/net/3c59x.c                  |    3 
 drivers/net/b44.c                    |   10 ++-
 drivers/net/dl2k.c                   |   10 ++-
 drivers/net/e100.c                   |   13 +---
 drivers/net/e1000/e1000_ethtool.c    |   10 +--
 drivers/net/eepro100.c               |   39 +++++++-----
 drivers/net/epic100.c                |   10 ++-
 drivers/net/fealnx.c                 |   22 +++++--
 drivers/net/hamachi.c                |   18 ++++-
 drivers/net/irda/vlsi_ir.c           |   19 ++----
 drivers/net/lasi_82596.c             |    3 
 drivers/net/myri_sbus.c              |   14 +++-
 drivers/net/natsemi.c                |    6 +
 drivers/net/pcnet32.c                |   12 ++-
 drivers/net/rrunner.c                |   16 +++--
 drivers/net/sis190.c                 |   10 ++-
 drivers/net/sis900.c                 |    3 
 drivers/net/sk98lin/skge.c           |   18 ++---
 drivers/net/starfire.c               |    9 +-
 drivers/net/sunbmac.c                |    8 +-
 drivers/net/sundance.c               |   12 ++-
 drivers/net/sungem.c                 |    3 
 drivers/net/sunhme.c                 |   31 +++++++--
 drivers/net/sunhme.h                 |    3 
 drivers/net/tg3.c                    |    3 
 drivers/net/tokenring/3c359.c        |    6 +
 drivers/net/tokenring/olympic.c      |   10 ++-
 drivers/net/tulip/de2104x.c          |    4 -
 drivers/net/tulip/interrupt.c        |   18 +++--
 drivers/net/tulip/winbond-840.c      |    9 +-
 drivers/net/typhoon.c                |    8 +-
 drivers/net/via-rhine.c              |    4 -
 drivers/net/wan/dscc4.c              |    1 
 drivers/net/yellowfin.c              |    5 +
 drivers/parisc/ccio-dma.c            |    6 +
 drivers/parisc/ccio-rm-dma.c         |    6 +
 drivers/parisc/sba_iommu.c           |    6 +
 drivers/scsi/53c700.c                |    8 +-
 drivers/scsi/dc395x.c                |   18 ++---
 drivers/scsi/eata.c                  |    6 -
 drivers/scsi/megaraid.c              |   32 ----------
 drivers/scsi/ncr53c8xx.c             |    3 
 drivers/scsi/sym53c8xx_2/sym_glue.c  |   32 ++++++++--
 drivers/scsi/sym53c8xx_comm.h        |   30 +++++++--
 drivers/scsi/u14-34f.c               |    6 -
 drivers/usb/core/usb.c               |   20 ++++++
 include/asm-alpha/pci.h              |   46 ++++++++++----
 include/asm-arm/dma-mapping.h        |   70 ++++++++++++++++------
 include/asm-generic/dma-mapping.h    |   49 ++++++++++++---
 include/asm-generic/pci-dma-compat.h |   22 +++++--
 include/asm-i386/dma-mapping.h       |   32 ++++++++--
 include/asm-i386/pci.h               |   15 +++-
 include/asm-ia64/dma-mapping.h       |   12 ++-
 include/asm-ia64/machvec.h           |   44 +++++++++-----
 include/asm-ia64/machvec_hpzx1.h     |    6 +
 include/asm-ia64/machvec_sn2.h       |   12 ++-
 include/asm-ia64/pci.h               |    3 
 include/asm-mips/dma-mapping.h       |   12 ++-
 include/asm-mips/pci.h               |    4 -
 include/asm-parisc/dma-mapping.h     |   53 ++++++++++++-----
 include/asm-ppc/pci.h                |   65 +++++++++++++++-----
 include/asm-ppc64/pci.h              |   28 +++++++-
 include/asm-sh/pci.h                 |   39 ++++++++----
 include/asm-sparc/pci.h              |   15 ++--
 include/asm-sparc/sbus.h             |    8 +-
 include/asm-sparc64/pci.h            |   47 +++++++++++----
 include/asm-sparc64/sbus.h           |    8 +-
 include/asm-v850/pci.h               |   13 ++--
 include/asm-x86_64/pci.h             |  109 ++++++++++++++++++++++++++---------
 include/linux/dma-mapping.h          |    4 +
 include/linux/pci.h                  |    4 +
 include/linux/usb.h                  |    4 +
 include/linux/usb_gadget.h           |    2 
 include/net/irda/vlsi_ir.h           |   13 ----
 100 files changed, 1464 insertions(+), 551 deletions(-)

diff -puN arch/arm/common/sa1111-pcibuf.c~dma_sync_for_device-cpu arch/arm/common/sa1111-pcibuf.c
--- 25/arch/arm/common/sa1111-pcibuf.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/arm/common/sa1111-pcibuf.c	2004-02-28 18:08:54.000000000 -0800
@@ -457,8 +457,8 @@ void sa1111_unmap_sg(struct device *dev,
 	local_irq_restore(flags);
 }
 
-void sa1111_dma_sync_single(struct device *dev, dma_addr_t dma_addr,
-			    size_t size, enum dma_data_direction dir)
+void sa1111_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
+				    size_t size, enum dma_data_direction dir)
 {
 	unsigned long flags;
 
@@ -472,8 +472,44 @@ void sa1111_dma_sync_single(struct devic
 	local_irq_restore(flags);
 }
 
-void sa1111_dma_sync_sg(struct device *dev, struct scatterlist *sg,
-			int nents, enum dma_data_direction dir)
+void sa1111_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
+				       size_t size, enum dma_data_direction dir)
+{
+	unsigned long flags;
+
+	dev_dbg(dev, "%s(ptr=%08lx,size=%d,dir=%x)\n",
+		__func__, dma_addr, size, dir);
+
+	local_irq_save(flags);
+
+	sync_single(dev, dma_addr, size, dir);
+
+	local_irq_restore(flags);
+}
+
+void sa1111_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				int nents, enum dma_data_direction dir)
+{
+	unsigned long flags;
+	int i;
+
+	dev_dbg(dev, "%s(sg=%p,nents=%d,dir=%x)\n",
+		__func__, sg, nents, dir);
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nents; i++, sg++) {
+		dma_addr_t dma_addr = sg->dma_address;
+		unsigned int length = sg->length;
+
+		sync_single(dev, dma_addr, length, dir);
+	}
+
+	local_irq_restore(flags);
+}
+
+void sa1111_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				   int nents, enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i;
@@ -497,8 +533,10 @@ EXPORT_SYMBOL(sa1111_map_single);
 EXPORT_SYMBOL(sa1111_unmap_single);
 EXPORT_SYMBOL(sa1111_map_sg);
 EXPORT_SYMBOL(sa1111_unmap_sg);
-EXPORT_SYMBOL(sa1111_dma_sync_single);
-EXPORT_SYMBOL(sa1111_dma_sync_sg);
+EXPORT_SYMBOL(sa1111_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(sa1111_dma_sync_single_for_device);
+EXPORT_SYMBOL(sa1111_dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(sa1111_dma_sync_sg_for_device);
 
 /* **************************************** */
 
diff -puN arch/ia64/lib/swiotlb.c~dma_sync_for_device-cpu arch/ia64/lib/swiotlb.c
--- 25/arch/ia64/lib/swiotlb.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/ia64/lib/swiotlb.c	2004-02-28 18:08:54.000000000 -0800
@@ -47,7 +47,7 @@
 #define IO_TLB_SHIFT 11
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and swiotlb_sync_single, to see
+ * Used to do a quick range check in swiotlb_unmap_single and swiotlb_sync_single_*, to see
  * if the memory was in fact allocated by this API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -381,11 +381,24 @@ swiotlb_unmap_single (struct device *hwd
  *
  * If you perform a swiotlb_map_single() but wish to interrogate the buffer using the cpu,
  * yet do not wish to teardown the PCI dma mapping, you must call this function before
- * doing so.  At the next point you give the PCI dma address back to the card, the device
- * again owns the buffer.
+ * doing so.  At the next point you give the PCI dma address back to the card, you must
+ * first perform a swiotlb_dma_sync_for_device, and then the device again owns the buffer
  */
 void
-swiotlb_sync_single (struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir)
+swiotlb_sync_single_for_cpu (struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir)
+{
+	char *dma_addr = phys_to_virt(dev_addr);
+
+	if (dir == DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		sync_single(hwdev, dma_addr, size, dir);
+	else if (dir == DMA_FROM_DEVICE)
+		mark_clean(dma_addr, size);
+}
+
+void
+swiotlb_sync_single_for_device (struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir)
 {
 	char *dma_addr = phys_to_virt(dev_addr);
 
@@ -456,11 +469,24 @@ swiotlb_unmap_sg (struct device *hwdev, 
  * Make physical memory consistent for a set of streaming mode DMA translations after a
  * transfer.
  *
- * The same as swiotlb_dma_sync_single but for a scatter-gather list, same rules and
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules and
  * usage.
  */
 void
-swiotlb_sync_sg (struct device *hwdev, struct scatterlist *sg, int nelems, int dir)
+swiotlb_sync_sg_for_cpu (struct device *hwdev, struct scatterlist *sg, int nelems, int dir)
+{
+	int i;
+
+	if (dir == DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
+			sync_single(hwdev, (void *) sg->dma_address, sg->dma_length, dir);
+}
+
+void
+swiotlb_sync_sg_for_device (struct device *hwdev, struct scatterlist *sg, int nelems, int dir)
 {
 	int i;
 
@@ -488,8 +514,10 @@ EXPORT_SYMBOL(swiotlb_map_single);
 EXPORT_SYMBOL(swiotlb_unmap_single);
 EXPORT_SYMBOL(swiotlb_map_sg);
 EXPORT_SYMBOL(swiotlb_unmap_sg);
-EXPORT_SYMBOL(swiotlb_sync_single);
-EXPORT_SYMBOL(swiotlb_sync_sg);
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
 EXPORT_SYMBOL(swiotlb_free_coherent);
 EXPORT_SYMBOL(swiotlb_dma_supported);
diff -puN arch/ia64/sn/io/machvec/pci_dma.c~dma_sync_for_device-cpu arch/ia64/sn/io/machvec/pci_dma.c
--- 25/arch/ia64/sn/io/machvec/pci_dma.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/ia64/sn/io/machvec/pci_dma.c	2004-02-28 18:08:54.000000000 -0800
@@ -437,7 +437,8 @@ sn_pci_unmap_single(struct pci_dev *hwde
 }
 
 /**
- * sn_pci_dma_sync_single - make sure all DMAs have completed
+ * sn_pci_dma_sync_single_* - make sure all DMAs or CPU accesses
+ * have completed
  * @hwdev: device to sync
  * @dma_handle: DMA address to sync
  * @size: size of region
@@ -448,14 +449,19 @@ sn_pci_unmap_single(struct pci_dev *hwde
  * anything on our platform.
  */
 void
-sn_pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+sn_pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
 {
 	return;
+}
 
+void
+sn_pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+	return;
 }
 
 /**
- * sn_pci_dma_sync_sg - make sure all DMAs have completed
+ * sn_pci_dma_sync_sg_* - make sure all DMAs or CPU accesses have completed
  * @hwdev: device to sync
  * @sg: scatterlist to sync
  * @nents: number of entries in the scatterlist
@@ -466,10 +472,15 @@ sn_pci_dma_sync_single(struct pci_dev *h
  * on our platform.
  */
 void
-sn_pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+sn_pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
 {
 	return;
+}
 
+void
+sn_pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+	return;
 }
 
 /**
@@ -602,28 +613,51 @@ sn_dma_unmap_sg(struct device *dev, stru
 EXPORT_SYMBOL(sn_dma_unmap_sg);
 
 void
-sn_dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			   int direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	sn_pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle, size, (int)direction);
+}
+EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
+
+void
+sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
 		int direction)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	sn_pci_dma_sync_single(to_pci_dev(dev), dma_handle, size, (int)direction);
+	sn_pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle, size, (int)direction);
+}
+EXPORT_SYMBOL(sn_dma_sync_single_for_device);
+
+void
+sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+	    int direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	sn_pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction);
 }
-EXPORT_SYMBOL(sn_dma_sync_single);
+EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
 
 void
-sn_dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 	    int direction)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	sn_pci_dma_sync_sg(to_pci_dev(dev), sg, nelems, (int)direction);
+	sn_pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction);
 }
-EXPORT_SYMBOL(sn_dma_sync_sg);
+EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
 
 EXPORT_SYMBOL(sn_pci_unmap_single);
 EXPORT_SYMBOL(sn_pci_map_single);
-EXPORT_SYMBOL(sn_pci_dma_sync_single);
+EXPORT_SYMBOL(sn_pci_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(sn_pci_dma_sync_single_for_device);
+EXPORT_SYMBOL(sn_pci_dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(sn_pci_dma_sync_sg_for_device);
 EXPORT_SYMBOL(sn_pci_map_sg);
 EXPORT_SYMBOL(sn_pci_unmap_sg);
 EXPORT_SYMBOL(sn_pci_alloc_consistent);
diff -puN arch/mips/mm/dma-coherent.c~dma_sync_for_device-cpu arch/mips/mm/dma-coherent.c
--- 25/arch/mips/mm/dma-coherent.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/mips/mm/dma-coherent.c	2004-02-28 18:08:54.000000000 -0800
@@ -119,30 +119,55 @@ void dma_unmap_sg(struct device *dev, st
 
 EXPORT_SYMBOL(dma_unmap_sg);
 
-void dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
 		enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_single);
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
 
-void dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
 		      unsigned long offset, size_t size,
 		      enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_single_range);
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
 
-void dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 		 enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_sg);
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
 
 int dma_supported(struct device *dev, u64 mask)
 {
@@ -204,12 +229,20 @@ unsigned long pci_dac_dma_to_offset(stru
 
 EXPORT_SYMBOL(pci_dac_dma_to_offset);
 
-void pci_dac_dma_sync_single(struct pci_dev *pdev,
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
 }
 
-EXPORT_SYMBOL(pci_dac_dma_sync_single);
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
 
 #endif /* CONFIG_PCI */
diff -puN arch/mips/mm/dma-ip27.c~dma_sync_for_device-cpu arch/mips/mm/dma-ip27.c
--- 25/arch/mips/mm/dma-ip27.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/mips/mm/dma-ip27.c	2004-02-28 18:08:54.000000000 -0800
@@ -125,30 +125,55 @@ void dma_unmap_sg(struct device *dev, st
 
 EXPORT_SYMBOL(dma_unmap_sg);
 
-void dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
 		enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_single);
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
 
-void dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
 		      unsigned long offset, size_t size,
 		      enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_single_range);
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
 
-void dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 		 enum dma_data_direction direction)
 {
 	BUG_ON(direction == DMA_NONE);
 }
 
-EXPORT_SYMBOL(dma_sync_sg);
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	BUG_ON(direction == DMA_NONE);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
 
 int dma_supported(struct device *dev, u64 mask)
 {
@@ -208,10 +233,18 @@ unsigned long pci_dac_dma_to_offset(stru
 
 EXPORT_SYMBOL(pci_dac_dma_to_offset);
 
-void pci_dac_dma_sync_single(struct pci_dev *pdev,
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
 }
 
-EXPORT_SYMBOL(pci_dac_dma_sync_single);
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
diff -puN arch/mips/mm/dma-noncoherent.c~dma_sync_for_device-cpu arch/mips/mm/dma-noncoherent.c
--- 25/arch/mips/mm/dma-noncoherent.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/mips/mm/dma-noncoherent.c	2004-02-28 18:08:54.000000000 -0800
@@ -226,7 +226,7 @@ void dma_unmap_sg(struct device *dev, st
 
 EXPORT_SYMBOL(dma_unmap_sg);
 
-void dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
 	enum dma_data_direction direction)
 {
 	unsigned long addr;
@@ -237,9 +237,35 @@ void dma_sync_single(struct device *dev,
 	__dma_sync(addr, size, direction);
 }
 
-EXPORT_SYMBOL(dma_sync_single);
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
 
-void dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
+void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+	enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = dma_handle + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	unsigned long addr;
+
+	BUG_ON(direction == DMA_NONE);
+
+	addr = dma_handle + offset + PAGE_OFFSET;
+	__dma_sync(addr, size, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
+
+void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
 	unsigned long offset, size_t size, enum dma_data_direction direction)
 {
 	unsigned long addr;
@@ -250,9 +276,9 @@ void dma_sync_single_range(struct device
 	__dma_sync(addr, size, direction);
 }
 
-EXPORT_SYMBOL(dma_sync_single_range);
+EXPORT_SYMBOL(dma_sync_single_range_for_device);
 
-void dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
 	enum dma_data_direction direction)
 {
 	int i;
@@ -265,7 +291,22 @@ void dma_sync_sg(struct device *dev, str
 		           sg->length, direction);
 }
 
-EXPORT_SYMBOL(dma_sync_sg);
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction)
+{
+	int i;
+
+	BUG_ON(direction == DMA_NONE);
+
+	/* Make sure that gcc doesn't leave the empty loop body.  */
+	for (i = 0; i < nelems; i++, sg++)
+		__dma_sync((unsigned long)page_address(sg->page),
+		           sg->length, direction);
+}
+
+EXPORT_SYMBOL(dma_sync_sg_for_device);
 
 int dma_supported(struct device *dev, u64 mask)
 {
@@ -329,7 +370,17 @@ unsigned long pci_dac_dma_to_offset(stru
 
 EXPORT_SYMBOL(pci_dac_dma_to_offset);
 
-void pci_dac_dma_sync_single(struct pci_dev *pdev,
+void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+	dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len);
+}
+
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu);
+
+void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
@@ -337,6 +388,6 @@ void pci_dac_dma_sync_single(struct pci_
 	dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len);
 }
 
-EXPORT_SYMBOL(pci_dac_dma_sync_single);
+EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device);
 
 #endif /* CONFIG_PCI */
diff -puN arch/parisc/kernel/pci-dma.c~dma_sync_for_device-cpu arch/parisc/kernel/pci-dma.c
--- 25/arch/parisc/kernel/pci-dma.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/parisc/kernel/pci-dma.c	2004-02-28 18:08:54.000000000 -0800
@@ -413,7 +413,7 @@ static void pa11_dma_unmap_single(struct
 	/*
 	 * For PCI_DMA_FROMDEVICE this flush is not necessary for the
 	 * simple map/unmap case. However, it IS necessary if if
-	 * pci_dma_sync_single has been called and the buffer reused.
+	 * pci_dma_sync_single_* has been called and the buffer reused.
 	 */
 
 	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle), size);
@@ -453,7 +453,7 @@ static void pa11_dma_unmap_sg(struct dev
 	return;
 }
 
-static void pa11_dma_sync_single(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction)
+static void pa11_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction)
 {
 	if (direction == DMA_NONE)
 	    BUG();
@@ -461,7 +461,25 @@ static void pa11_dma_sync_single(struct 
 	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle) + offset, size);
 }
 
-static void pa11_dma_sync_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
+static void pa11_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, unsigned long offset, size_t size, enum dma_data_direction direction)
+{
+	if (direction == DMA_NONE)
+	    BUG();
+
+	flush_kernel_dcache_range((unsigned long) phys_to_virt(dma_handle) + offset, size);
+}
+
+static void pa11_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	/* once we do combining we'll need to use phys_to_virt(sg_dma_address(sglist)) */
+
+	for (i = 0; i < nents; i++, sglist++ )
+		flush_kernel_dcache_range(sg_virt_addr(sglist), sglist->length);
+}
+
+static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction)
 {
 	int i;
 
@@ -480,8 +498,10 @@ struct hppa_dma_ops pcxl_dma_ops = {
 	.unmap_single =		pa11_dma_unmap_single,
 	.map_sg =		pa11_dma_map_sg,
 	.unmap_sg =		pa11_dma_unmap_sg,
-	.dma_sync_single =	pa11_dma_sync_single,
-	.dma_sync_sg =		pa11_dma_sync_sg,
+	.dma_sync_single_for_cpu = pa11_dma_sync_single_for_cpu,
+	.dma_sync_single_for_device = pa11_dma_sync_single_for_device,
+	.dma_sync_sg_for_cpu = pa11_dma_sync_sg_for_cpu,
+	.dma_sync_sg_for_device = pa11_dma_sync_sg_for_device,
 };
 
 static void *fail_alloc_consistent(struct device *dev, size_t size,
@@ -519,8 +539,10 @@ struct hppa_dma_ops pcx_dma_ops = {
 	.unmap_single =		pa11_dma_unmap_single,
 	.map_sg =		pa11_dma_map_sg,
 	.unmap_sg =		pa11_dma_unmap_sg,
-	.dma_sync_single =	pa11_dma_sync_single,
-	.dma_sync_sg =		pa11_dma_sync_sg,
+	.dma_sync_single_cpu =	pa11_dma_sync_single_cpu,
+	.dma_sync_single_device = pa11_dma_sync_single_device,
+	.dma_sync_sg_cpu =	pa11_dma_sync_sg_cpu,
+	.dma_sync_sg_device =	pa11_dma_sync_sg_device,
 };
 
 
diff -puN arch/sparc64/kernel/pci_iommu.c~dma_sync_for_device-cpu arch/sparc64/kernel/pci_iommu.c
--- 25/arch/sparc64/kernel/pci_iommu.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/sparc64/kernel/pci_iommu.c	2004-02-28 18:08:54.000000000 -0800
@@ -661,7 +661,7 @@ void pci_unmap_sg(struct pci_dev *pdev, 
 /* Make physical memory consistent for a single
  * streaming mode DMA translation after a transfer.
  */
-void pci_dma_sync_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
+void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction)
 {
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
@@ -722,7 +722,7 @@ void pci_dma_sync_single(struct pci_dev 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  */
-void pci_dma_sync_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
+void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction)
 {
 	struct pcidev_cookie *pcp;
 	struct pci_iommu *iommu;
diff -puN arch/sparc64/kernel/sbus.c~dma_sync_for_device-cpu arch/sparc64/kernel/sbus.c
--- 25/arch/sparc64/kernel/sbus.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/sparc64/kernel/sbus.c	2004-02-28 18:08:54.000000000 -0800
@@ -540,7 +540,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-void sbus_dma_sync_single(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction)
+void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction)
 {
 	struct sbus_iommu *iommu = sdev->bus->iommu;
 	unsigned long flags;
@@ -552,7 +552,11 @@ void sbus_dma_sync_single(struct sbus_de
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
-void sbus_dma_sync_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction)
+void sbus_dma_sync_single_for_device(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction)
+{
+}
+
+void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction)
 {
 	struct sbus_iommu *iommu = sdev->bus->iommu;
 	unsigned long flags, size;
@@ -572,6 +576,10 @@ void sbus_dma_sync_sg(struct sbus_dev *s
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+void sbus_dma_sync_sg_for_device(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction)
+{
+}
+
 /* Enable 64-bit DVMA mode for the given device. */
 void sbus_set_sbus64(struct sbus_dev *sdev, int bursts)
 {
diff -puN arch/sparc64/kernel/sparc64_ksyms.c~dma_sync_for_device-cpu arch/sparc64/kernel/sparc64_ksyms.c
--- 25/arch/sparc64/kernel/sparc64_ksyms.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/sparc64/kernel/sparc64_ksyms.c	2004-02-28 18:08:54.000000000 -0800
@@ -214,8 +214,8 @@ EXPORT_SYMBOL(sbus_map_single);
 EXPORT_SYMBOL(sbus_unmap_single);
 EXPORT_SYMBOL(sbus_map_sg);
 EXPORT_SYMBOL(sbus_unmap_sg);
-EXPORT_SYMBOL(sbus_dma_sync_single);
-EXPORT_SYMBOL(sbus_dma_sync_sg);
+EXPORT_SYMBOL(sbus_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(sbus_dma_sync_sg_for_cpu);
 #endif
 EXPORT_SYMBOL(outsb);
 EXPORT_SYMBOL(outsw);
@@ -233,8 +233,8 @@ EXPORT_SYMBOL(pci_map_single);
 EXPORT_SYMBOL(pci_unmap_single);
 EXPORT_SYMBOL(pci_map_sg);
 EXPORT_SYMBOL(pci_unmap_sg);
-EXPORT_SYMBOL(pci_dma_sync_single);
-EXPORT_SYMBOL(pci_dma_sync_sg);
+EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
 EXPORT_SYMBOL(pci_dma_supported);
 #endif
 
diff -puN arch/sparc/kernel/ioport.c~dma_sync_for_device-cpu arch/sparc/kernel/ioport.c
--- 25/arch/sparc/kernel/ioport.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/sparc/kernel/ioport.c	2004-02-28 18:08:54.000000000 -0800
@@ -360,7 +360,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev
 
 /*
  */
-void sbus_dma_sync_single(struct sbus_dev *sdev, dma_addr_t ba, size_t size, int direction)
+void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t ba, size_t size, int direction)
 {
 #if 0
 	unsigned long va;
@@ -380,9 +380,34 @@ void sbus_dma_sync_single(struct sbus_de
 #endif
 }
 
-void sbus_dma_sync_sg(struct sbus_dev *sdev, struct scatterlist *sg, int n, int direction)
+void sbus_dma_sync_single_for_device(struct sbus_dev *sdev, dma_addr_t ba, size_t size, int direction)
 {
-	printk("sbus_dma_sync_sg: not implemented yet\n");
+#if 0
+	unsigned long va;
+	struct resource *res;
+
+	/* We do not need the resource, just print a message if invalid. */
+	res = _sparc_find_resource(&_sparc_dvma, ba);
+	if (res == NULL)
+		panic("sbus_dma_sync_single: 0x%x\n", ba);
+
+	va = page_address(mmu_translate_dvma(ba)); /* XXX higmem */
+	/*
+	 * XXX This bogosity will be fixed with the iommu rewrite coming soon
+	 * to a kernel near you. - Anton
+	 */
+	/* mmu_inval_dma_area(va, (size + PAGE_SIZE-1) & PAGE_MASK); */
+#endif
+}
+
+void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int n, int direction)
+{
+	printk("sbus_dma_sync_sg_for_cpu: not implemented yet\n");
+}
+
+void sbus_dma_sync_sg_for_device(struct sbus_dev *sdev, struct scatterlist *sg, int n, int direction)
+{
+	printk("sbus_dma_sync_sg_for_device: not implemented yet\n");
 }
 #endif /* CONFIG_SBUS */
 
@@ -482,7 +507,7 @@ void pci_free_consistent(struct pci_dev 
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_* is performed.
  */
 dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
     int direction)
@@ -591,10 +616,21 @@ void pci_unmap_sg(struct pci_dev *hwdev,
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the
  * device again owns the buffer.
  */
-void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
+void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (direction != PCI_DMA_TODEVICE) {
+		mmu_inval_dma_area((unsigned long)phys_to_virt(ba),
+		    (size + PAGE_SIZE-1) & PAGE_MASK);
+	}
+}
+
+void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction)
 {
 	if (direction == PCI_DMA_NONE)
 		BUG();
@@ -607,10 +643,27 @@ void pci_dma_sync_single(struct pci_dev 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+	int n;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (direction != PCI_DMA_TODEVICE) {
+		for (n = 0; n < nents; n++) {
+			if (page_address(sg->page) == NULL) BUG();
+			mmu_inval_dma_area(
+			    (unsigned long) page_address(sg->page),
+			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
+			sg++;
+		}
+	}
+}
+
+void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
 {
 	int n;
 
diff -puN arch/sparc/kernel/sparc_ksyms.c~dma_sync_for_device-cpu arch/sparc/kernel/sparc_ksyms.c
--- 25/arch/sparc/kernel/sparc_ksyms.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/sparc/kernel/sparc_ksyms.c	2004-02-28 18:08:54.000000000 -0800
@@ -206,8 +206,10 @@ EXPORT_SYMBOL(sbus_map_single);
 EXPORT_SYMBOL(sbus_unmap_single);
 EXPORT_SYMBOL(sbus_map_sg);
 EXPORT_SYMBOL(sbus_unmap_sg);
-EXPORT_SYMBOL(sbus_dma_sync_single);
-EXPORT_SYMBOL(sbus_dma_sync_sg);
+EXPORT_SYMBOL(sbus_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(sbus_dma_sync_single_for_device);
+EXPORT_SYMBOL(sbus_dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(sbus_dma_sync_sg_for_device);
 EXPORT_SYMBOL(sbus_iounmap);
 EXPORT_SYMBOL(sbus_ioremap);
 #endif
@@ -219,7 +221,10 @@ EXPORT_SYMBOL(pci_alloc_consistent);
 EXPORT_SYMBOL(pci_free_consistent);
 EXPORT_SYMBOL(pci_map_single);
 EXPORT_SYMBOL(pci_unmap_single);
-EXPORT_SYMBOL(pci_dma_sync_single);
+EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
+EXPORT_SYMBOL(pci_dma_sync_single_for_device);
+EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
+EXPORT_SYMBOL(pci_dma_sync_sg_for_device);
 /* Actually, ioremap/iounmap are not PCI specific. But it is ok for drivers. */
 EXPORT_SYMBOL(ioremap);
 EXPORT_SYMBOL(iounmap);
diff -puN arch/v850/kernel/rte_mb_a_pci.c~dma_sync_for_device-cpu arch/v850/kernel/rte_mb_a_pci.c
--- 25/arch/v850/kernel/rte_mb_a_pci.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/arch/v850/kernel/rte_mb_a_pci.c	2004-02-28 18:08:54.000000000 -0800
@@ -687,10 +687,11 @@ void pci_unmap_single (struct pci_dev *p
    If you perform a pci_map_single() but wish to interrogate the
    buffer using the cpu, yet do not wish to teardown the PCI dma
    mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, the device
-   again owns the buffer.  */
+   point you give the PCI dma address back to the card, you must first
+   perform a pci_dma_sync_for_device, and then the device again owns
+   the buffer.  */
 void
-pci_dma_sync_single (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
+pci_dma_sync_single_for_cpu (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
 		     int dir)
 {
 	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
@@ -700,6 +701,22 @@ pci_dma_sync_single (struct pci_dev *pde
 	if (dir == PCI_DMA_FROMDEVICE)
 		memcpy (mapping->cpu_addr, mb_sram_addr, size);
 	else if (dir == PCI_DMA_TODEVICE)
+		; /* nothing to do */
+	else
+		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
+}
+
+void
+pci_dma_sync_single_for_device (struct pci_dev *pdev, dma_addr_t dma_addr, size_t size,
+				int dir)
+{
+	void *mb_sram_addr = PCI_TO_MB_SRAM (dma_addr);
+	struct dma_mapping *mapping = find_dma_mapping (mb_sram_addr);
+
+	/* Synchronize the DMA buffer with the CPU buffer if necessary.  */
+	if (dir == PCI_DMA_FROMDEVICE)
+		; /* nothing to do */
+	else if (dir == PCI_DMA_TODEVICE)
 		memcpy (mb_sram_addr, mapping->cpu_addr, size);
 	else
 		panic("pci_dma_sync_single: unsupported sync dir: %d", dir);
@@ -724,11 +741,18 @@ pci_unmap_sg (struct pci_dev *pdev, stru
 }
 
 /* Make physical memory consistent for a set of streaming mode DMA
-   translations after a transfer.  The same as pci_dma_sync_single but
+   translations after a transfer.  The same as pci_dma_sync_single_* but
    for a scatter-gather list, same rules and usage.  */
 
 void
-pci_dma_sync_sg (struct pci_dev *dev, struct scatterlist *sg, int sg_len,
+pci_dma_sync_sg_for_cpu (struct pci_dev *dev, struct scatterlist *sg, int sg_len,
+		 int dir)
+{
+	BUG ();
+}
+
+void
+pci_dma_sync_sg_for_device (struct pci_dev *dev, struct scatterlist *sg, int sg_len,
 		 int dir)
 {
 	BUG ();
@@ -770,4 +794,5 @@ EXPORT_SYMBOL (pci_map_single);
 EXPORT_SYMBOL (pci_unmap_single);
 EXPORT_SYMBOL (pci_alloc_consistent);
 EXPORT_SYMBOL (pci_free_consistent);
-EXPORT_SYMBOL (pci_dma_sync_single);
+EXPORT_SYMBOL (pci_dma_sync_single_for_cpu);
+EXPORT_SYMBOL (pci_dma_sync_single_for_device);
diff -puN Documentation/DMA-mapping.txt~dma_sync_for_device-cpu Documentation/DMA-mapping.txt
--- 25/Documentation/DMA-mapping.txt~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/Documentation/DMA-mapping.txt	2004-02-28 18:08:54.000000000 -0800
@@ -283,7 +283,7 @@ There are two types of DMA mappings:
              in order to get correct behavior on all platforms.
 
 - Streaming DMA mappings which are usually mapped for one DMA transfer,
-  unmapped right after it (unless you use pci_dma_sync below) and for which
+  unmapped right after it (unless you use pci_dma_sync_* below) and for which
   hardware can optimize for sequential accesses.
 
   This of "streaming" as "asynchronous" or "outside the coherency
@@ -543,14 +543,30 @@ same bus address space) and you could re
 all bus addresses.
 
 If you need to use the same streaming DMA region multiple times and touch
-the data in between the DMA transfers, just map it with
-pci_map_{single,sg}, and after each DMA transfer call either:
+the data in between the DMA transfers, the buffer needs to be synced
+properly in order for the cpu and device to see the most uptodate and
+correct copy of the DMA buffer.
 
-	pci_dma_sync_single(dev, dma_handle, size, direction);
+So, firstly, just map it with pci_map_{single,sg}, and after each DMA
+transfer call either:
+
+	pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction);
 
 or:
 
-	pci_dma_sync_sg(dev, sglist, nents, direction);
+	pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction);
+
+as appropriate.
+
+Then, if you wish to let the device get at the DMA area again,
+finish accessing the data with the cpu, and then before actually
+giving the buffer to the hardware call either:
+
+	pci_dma_sync_single_for_device(dev, dma_handle, size, direction);
+
+or:
+
+	pci_dma_sync_sg_for_device(dev, sglist, nents, direction);
 
 as appropriate.
 
@@ -590,8 +606,9 @@ to use the pci_dma_sync_*() interfaces.
 			 * the DMA transfer with the CPU first
 			 * so that we see updated contents.
 			 */
-			pci_dma_sync_single(cp->pdev, cp->rx_dma, cp->rx_len,
-					    PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(cp->pdev, cp->rx_dma,
+						    cp->rx_len,
+						    PCI_DMA_FROMDEVICE);
 
 			/* Now it is safe to examine the buffer. */
 			hp = (struct my_card_header *) cp->rx_buf;
@@ -601,7 +618,13 @@ to use the pci_dma_sync_*() interfaces.
 				pass_to_upper_layers(cp->rx_buf);
 				make_and_setup_new_rx_buf(cp);
 			} else {
-				/* Just give the buffer back to the card. */
+				/* Just sync the buffer and give it back
+				 * to the card.
+				 */
+				pci_dma_sync_single_for_device(cp->pdev,
+							       cp->rx_dma,
+							       cp->rx_len,
+							       PCI_DMA_FROMDEVICE);
 				give_rx_buf_to_card(cp);
 			}
 		}
@@ -709,12 +732,21 @@ interfaces.  To reiterate:
 
 When the DMA transfer is complete, invoke:
 
-	void pci_dac_dma_sync_single(struct pci_dev *pdev,
-				     dma64_addr_t dma_addr,
-				     size_t len, int direction);
+	void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+					     dma64_addr_t dma_addr,
+					     size_t len, int direction);
 
 This must be done before the CPU looks at the buffer again.
-This interface behaves identically to pci_dma_sync_{single,sg}().
+This interface behaves identically to pci_dma_sync_{single,sg}_for_cpu().
+
+And likewise, if you wish to let the device get back at the buffer after
+the cpu has read/written it, invoke:
+
+	void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
+						dma64_addr_t dma_addr,
+						size_t len, int direction);
+
+before letting the device access the DMA area again.
 
 If you need to get back to the PAGE/OFFSET tuple from a dma64_addr_t
 the following interfaces are provided:
diff -puN drivers/atm/fore200e.c~dma_sync_for_device-cpu drivers/atm/fore200e.c
--- 25/drivers/atm/fore200e.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/atm/fore200e.c	2004-02-28 18:08:54.000000000 -0800
@@ -482,11 +482,19 @@ fore200e_pca_dma_unmap(struct fore200e* 
 
 
 static void
-fore200e_pca_dma_sync(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
+fore200e_pca_dma_sync_for_cpu(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
 {
     DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
 
-    pci_dma_sync_single((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    pci_dma_sync_single_for_cpu((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
+}
+
+static void
+fore200e_pca_dma_sync_for_device(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
+{
+    DPRINTK(3, "PCI DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
+
+    pci_dma_sync_single_for_device((struct pci_dev*)fore200e->bus_dev, dma_addr, size, direction);
 }
 
 
@@ -761,11 +769,19 @@ fore200e_sba_dma_unmap(struct fore200e* 
 
 
 static void
-fore200e_sba_dma_sync(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
+fore200e_sba_dma_sync_for_cpu(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
 {
     DPRINTK(3, "SBUS DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
     
-    sbus_dma_sync_single((struct sbus_dev*)fore200e->bus_dev, dma_addr, size, direction);
+    sbus_dma_sync_single_for_cpu((struct sbus_dev*)fore200e->bus_dev, dma_addr, size, direction);
+}
+
+static void
+fore200e_sba_dma_sync_for_device(struct fore200e* fore200e, u32 dma_addr, int size, int direction)
+{
+    DPRINTK(3, "SBUS DVMA sync: dma_addr = 0x%08x, size = %d, direction = %d\n", dma_addr, size, direction);
+
+    sbus_dma_sync_single_for_device((struct sbus_dev*)fore200e->bus_dev, dma_addr, size, direction);
 }
 
 
@@ -1149,10 +1165,13 @@ fore200e_push_rpd(struct fore200e* fore2
 	/* rebuild rx buffer address from rsd handle */
 	buffer = FORE200E_HDL2BUF(rpd->rsd[ i ].handle);
 	
-	/* ensure DMA synchronisation */
-	fore200e->bus->dma_sync(fore200e, buffer->data.dma_addr, rpd->rsd[ i ].length, FORE200E_DMA_FROMDEVICE);
+	/* Make device DMA transfer visible to CPU.  */
+	fore200e->bus->dma_sync_for_cpu(fore200e, buffer->data.dma_addr, rpd->rsd[ i ].length, FORE200E_DMA_FROMDEVICE);
 	
 	memcpy(skb_put(skb, rpd->rsd[ i ].length), buffer->data.align_addr, rpd->rsd[ i ].length);
+
+	/* Now let the device get at it again.  */
+	fore200e->bus->dma_sync_for_device(fore200e, buffer->data.dma_addr, rpd->rsd[ i ].length, FORE200E_DMA_FROMDEVICE);
     }
     
     DPRINTK(3, "rx skb: len = %d, truesize = %d\n", skb->len, skb->truesize);
@@ -1584,8 +1603,9 @@ fore200e_send(struct atm_vcc *vcc, struc
 
     tasklet_enable(&fore200e->tasklet);
 
-    /* ensure DMA synchronisation */
-    fore200e->bus->dma_sync(fore200e, tpd->tsd[ 0 ].buffer, tpd->tsd[ 0 ].length, FORE200E_DMA_TODEVICE);
+    /* The dma_map call above implies a dma_sync so the device can use it,
+     * thus no explicit dma_sync call is necessary here.
+     */
     
     DPRINTK(3, "tx on %d.%d.%d:%d, len = %u (%u)\n", 
 	    vcc->itf, vcc->vpi, vcc->vci, fore200e_atm2fore_aal(vcc->qos.aal),
@@ -2918,7 +2938,8 @@ static const struct fore200e_bus fore200
       fore200e_pca_write,
       fore200e_pca_dma_map,
       fore200e_pca_dma_unmap,
-      fore200e_pca_dma_sync,
+      fore200e_pca_dma_sync_for_cpu,
+      fore200e_pca_dma_sync_for_device,
       fore200e_pca_dma_chunk_alloc,
       fore200e_pca_dma_chunk_free,
       fore200e_pca_detect,
@@ -2940,7 +2961,8 @@ static const struct fore200e_bus fore200
       fore200e_sba_write,
       fore200e_sba_dma_map,
       fore200e_sba_dma_unmap,
-      fore200e_sba_dma_sync,
+      fore200e_sba_dma_sync_for_cpu,
+      fore200e_sba_dma_sync_for_device,
       fore200e_sba_dma_chunk_alloc,
       fore200e_sba_dma_chunk_free,
       fore200e_sba_detect, 
diff -puN drivers/atm/fore200e.h~dma_sync_for_device-cpu drivers/atm/fore200e.h
--- 25/drivers/atm/fore200e.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/atm/fore200e.h	2004-02-28 18:08:54.000000000 -0800
@@ -801,7 +801,8 @@ typedef struct fore200e_bus {
     void                 (*write)(u32, volatile u32*);
     u32                  (*dma_map)(struct fore200e*, void*, int, int);
     void                 (*dma_unmap)(struct fore200e*, u32, int, int);
-    void                 (*dma_sync)(struct fore200e*, u32, int, int);
+    void                 (*dma_sync_for_cpu)(struct fore200e*, u32, int, int);
+    void                 (*dma_sync_for_device)(struct fore200e*, u32, int, int);
     int                  (*dma_chunk_alloc)(struct fore200e*, struct chunk*, int, int, int);
     void                 (*dma_chunk_free)(struct fore200e*, struct chunk*);
     struct fore200e*     (*detect)(const struct fore200e_bus*, int);
diff -puN drivers/atm/idt77252.c~dma_sync_for_device-cpu drivers/atm/idt77252.c
--- 25/drivers/atm/idt77252.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/atm/idt77252.c	2004-02-28 18:08:54.000000000 -0800
@@ -1064,8 +1064,8 @@ dequeue_rx(struct idt77252_dev *card, st
 
 	vcc = vc->rx_vcc;
 
-	pci_dma_sync_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			    skb->end - skb->data, PCI_DMA_FROMDEVICE);
+	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
+				    skb->end - skb->data, PCI_DMA_FROMDEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1903,6 +1903,9 @@ recycle_rx_skb(struct idt77252_dev *card
 	u32 handle = IDT77252_PRV_POOL(skb);
 	int err;
 
+	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
+				       skb->end - skb->data, PCI_DMA_FROMDEVICE);
+
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
diff -puN drivers/ieee1394/dma.c~dma_sync_for_device-cpu drivers/ieee1394/dma.c
--- 25/drivers/ieee1394/dma.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/dma.c	2004-02-28 18:08:54.000000000 -0800
@@ -168,7 +168,7 @@ dma_addr_t dma_region_offset_to_bus(stru
 	return sg_dma_address(sg) + rem;
 }
 
-void dma_region_sync(struct dma_region *dma, unsigned long offset, unsigned long len)
+void dma_region_sync_for_cpu(struct dma_region *dma, unsigned long offset, unsigned long len)
 {
 	int first, last;
 	unsigned long rem;
@@ -179,7 +179,21 @@ void dma_region_sync(struct dma_region *
 	first = dma_region_find(dma, offset, &rem);
 	last = dma_region_find(dma, offset + len - 1, &rem);
 
-	pci_dma_sync_sg(dma->dev, &dma->sglist[first], last - first + 1, dma->direction);
+	pci_dma_sync_sg_for_cpu(dma->dev, &dma->sglist[first], last - first + 1, dma->direction);
+}
+
+void dma_region_sync_for_device(struct dma_region *dma, unsigned long offset, unsigned long len)
+{
+	int first, last;
+	unsigned long rem;
+
+	if (!len)
+		len = 1;
+
+	first = dma_region_find(dma, offset, &rem);
+	last = dma_region_find(dma, offset + len - 1, &rem);
+
+	pci_dma_sync_sg_for_device(dma->dev, &dma->sglist[first], last - first + 1, dma->direction);
 }
 
 /* nopage() handler for mmap access */
diff -puN drivers/ieee1394/dma.h~dma_sync_for_device-cpu drivers/ieee1394/dma.h
--- 25/drivers/ieee1394/dma.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/dma.h	2004-02-28 18:08:54.000000000 -0800
@@ -60,8 +60,10 @@ int  dma_region_alloc(struct dma_region 
 /* unmap and free the buffer */
 void dma_region_free(struct dma_region *dma);
 
-/* sync the IO bus' view of the buffer with the CPU's view */
-void dma_region_sync(struct dma_region *dma, unsigned long offset, unsigned long len);
+/* sync the CPU's view of the buffer */
+void dma_region_sync_for_cpu(struct dma_region *dma, unsigned long offset, unsigned long len);
+/* sync the IO bus' view of the buffer */
+void dma_region_sync_for_device(struct dma_region *dma, unsigned long offset, unsigned long len);
 
 /* map the buffer into a user space process */
 int  dma_region_mmap(struct dma_region *dma, struct file *file, struct vm_area_struct *vma);
diff -puN drivers/ieee1394/dv1394.c~dma_sync_for_device-cpu drivers/ieee1394/dv1394.c
--- 25/drivers/ieee1394/dv1394.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/dv1394.c	2004-02-28 18:08:54.000000000 -0800
@@ -553,7 +553,7 @@ static void frame_prepare(struct video_c
 	*(f->frame_end_branch) = cpu_to_le32(f->descriptor_pool_dma | f->first_n_descriptors);
 
 	/* make the latest version of this frame visible to the PCI card */
-	dma_region_sync(&video->dv_buf, f->data - (unsigned long) video->dv_buf.kvirt, video->frame_size);
+	dma_region_sync_for_device(&video->dv_buf, f->data - (unsigned long) video->dv_buf.kvirt, video->frame_size);
 
 	/* lock against DMA interrupt */
 	spin_lock_irqsave(&video->spinlock, irq_flags);
@@ -2033,9 +2033,9 @@ static void ir_tasklet_func(unsigned lon
 			struct packet *p = dma_region_i(&video->packet_buf, struct packet, video->current_packet);
 
 			/* make sure we are seeing the latest changes to p */
-			dma_region_sync(&video->packet_buf,
-					(unsigned long) p - (unsigned long) video->packet_buf.kvirt,
-					sizeof(struct packet));
+			dma_region_sync_for_cpu(&video->packet_buf,
+						(unsigned long) p - (unsigned long) video->packet_buf.kvirt,
+						sizeof(struct packet));
 					
 			packet_length = le16_to_cpu(p->data_length);
 			packet_time   = le16_to_cpu(p->timestamp);
diff -puN drivers/ieee1394/ieee1394_core.c~dma_sync_for_device-cpu drivers/ieee1394/ieee1394_core.c
--- 25/drivers/ieee1394/ieee1394_core.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/ieee1394_core.c	2004-02-28 18:08:54.000000000 -0800
@@ -1216,7 +1216,8 @@ EXPORT_SYMBOL(dma_prog_region_free);
 EXPORT_SYMBOL(dma_region_init);
 EXPORT_SYMBOL(dma_region_alloc);
 EXPORT_SYMBOL(dma_region_free);
-EXPORT_SYMBOL(dma_region_sync);
+EXPORT_SYMBOL(dma_region_sync_for_cpu);
+EXPORT_SYMBOL(dma_region_sync_for_device);
 EXPORT_SYMBOL(dma_region_mmap);
 EXPORT_SYMBOL(dma_region_offset_to_bus);
 
diff -puN drivers/ieee1394/ohci1394.c~dma_sync_for_device-cpu drivers/ieee1394/ohci1394.c
--- 25/drivers/ieee1394/ohci1394.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/ohci1394.c	2004-02-28 18:08:54.000000000 -0800
@@ -1729,7 +1729,7 @@ static void ohci_iso_recv_bufferfill_tas
 		/* OK, the block is finished... */
 		
 		/* sync our view of the block */
-		dma_region_sync(&iso->data_buf, recv->block_dma*recv->buf_stride, recv->buf_stride);
+		dma_region_sync_for_cpu(&iso->data_buf, recv->block_dma*recv->buf_stride, recv->buf_stride);
 		
 		/* reset the DMA descriptor */
 		im->status = recv->buf_stride;
@@ -1785,7 +1785,7 @@ static void ohci_iso_recv_packetperbuf_t
 		}
 
 		/* sync our view of the buffer */
-		dma_region_sync(&iso->data_buf, iso->pkt_dma * recv->buf_stride, recv->buf_stride);
+		dma_region_sync_for_cpu(&iso->data_buf, iso->pkt_dma * recv->buf_stride, recv->buf_stride);
 			
 		/* record the per-packet info */
 		{
@@ -2009,7 +2009,7 @@ static int ohci_iso_xmit_queue(struct hp
 	sy = info->sy;
 
 	/* sync up the card's view of the buffer */
-	dma_region_sync(&iso->data_buf, offset, len);
+	dma_region_sync_for_device(&iso->data_buf, offset, len);
 
 	/* append first_packet to the DMA chain */
 	/* by linking the previous descriptor to it */
diff -puN drivers/ieee1394/sbp2.c~dma_sync_for_device-cpu drivers/ieee1394/sbp2.c
--- 25/drivers/ieee1394/sbp2.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/ieee1394/sbp2.c	2004-02-28 18:08:54.000000000 -0800
@@ -1948,12 +1948,12 @@ static int sbp2_link_orb_command(struct 
 	SBP2_ORB_DEBUG("sending command orb %p, total orbs = %x",
 			command_orb, global_outstanding_command_orbs);
 
-	pci_dma_sync_single(hi->host->pdev, command->command_orb_dma,
-			    sizeof(struct sbp2_command_orb),
-			    PCI_DMA_BIDIRECTIONAL);
-	pci_dma_sync_single(hi->host->pdev, command->sge_dma,
-			    sizeof(command->scatter_gather_element),
-			    PCI_DMA_BIDIRECTIONAL);
+	pci_dma_sync_single_for_device(hi->host->pdev, command->command_orb_dma,
+				       sizeof(struct sbp2_command_orb),
+				       PCI_DMA_BIDIRECTIONAL);
+	pci_dma_sync_single_for_device(hi->host->pdev, command->sge_dma,
+				       sizeof(command->scatter_gather_element),
+				       PCI_DMA_BIDIRECTIONAL);
 	/*
 	 * Check to see if there are any previous orbs to use
 	 */
@@ -1994,9 +1994,9 @@ static int sbp2_link_orb_command(struct 
 			cpu_to_be32(command->command_orb_dma);
 		/* Tells hardware that this pointer is valid */
 		scsi_id->last_orb->next_ORB_hi = 0x0;
-		pci_dma_sync_single(hi->host->pdev, scsi_id->last_orb_dma,
-				    sizeof(struct sbp2_command_orb),
-				    PCI_DMA_BIDIRECTIONAL);
+		pci_dma_sync_single_for_device(hi->host->pdev, scsi_id->last_orb_dma,
+					       sizeof(struct sbp2_command_orb),
+					       PCI_DMA_BIDIRECTIONAL);
 
 		/*
 		 * Ring the doorbell
@@ -2358,12 +2358,12 @@ static int sbp2_handle_status_write(stru
 	if (command) {
 
 		SBP2_DEBUG("Found status for command ORB");
-		pci_dma_sync_single(hi->host->pdev, command->command_orb_dma,
-				    sizeof(struct sbp2_command_orb),
-				    PCI_DMA_BIDIRECTIONAL);
-		pci_dma_sync_single(hi->host->pdev, command->sge_dma,
-				    sizeof(command->scatter_gather_element),
-				    PCI_DMA_BIDIRECTIONAL);
+		pci_dma_sync_single_for_cpu(hi->host->pdev, command->command_orb_dma,
+					    sizeof(struct sbp2_command_orb),
+					    PCI_DMA_BIDIRECTIONAL);
+		pci_dma_sync_single_for_cpu(hi->host->pdev, command->sge_dma,
+					    sizeof(command->scatter_gather_element),
+					    PCI_DMA_BIDIRECTIONAL);
 
 		SBP2_ORB_DEBUG("matched command orb %p", &command->command_orb);
 		outstanding_orb_decr;
@@ -2534,12 +2534,12 @@ static void sbp2scsi_complete_all_comman
 		SBP2_DEBUG("Found pending command to complete");
 		lh = scsi_id->sbp2_command_orb_inuse.next;
 		command = list_entry(lh, struct sbp2_command_info, list);
-		pci_dma_sync_single(hi->host->pdev, command->command_orb_dma,
-				    sizeof(struct sbp2_command_orb),
-				    PCI_DMA_BIDIRECTIONAL);
-		pci_dma_sync_single(hi->host->pdev, command->sge_dma,
-				    sizeof(command->scatter_gather_element),
-				    PCI_DMA_BIDIRECTIONAL);
+		pci_dma_sync_single_for_cpu(hi->host->pdev, command->command_orb_dma,
+					    sizeof(struct sbp2_command_orb),
+					    PCI_DMA_BIDIRECTIONAL);
+		pci_dma_sync_single_for_cpu(hi->host->pdev, command->sge_dma,
+					    sizeof(command->scatter_gather_element),
+					    PCI_DMA_BIDIRECTIONAL);
 		sbp2util_mark_command_completed(scsi_id, command);
 		if (command->Current_SCpnt) {
 			void (*done)(Scsi_Cmnd *) = command->Current_done;
@@ -2699,14 +2699,14 @@ static int sbp2scsi_abort (Scsi_Cmnd *SC
 		command = sbp2util_find_command_for_SCpnt(scsi_id, SCpnt);
 		if (command) {
 			SBP2_DEBUG("Found command to abort");
-			pci_dma_sync_single(hi->host->pdev,
-					    command->command_orb_dma,
-					    sizeof(struct sbp2_command_orb),
-					    PCI_DMA_BIDIRECTIONAL);
-			pci_dma_sync_single(hi->host->pdev,
-					    command->sge_dma,
-					    sizeof(command->scatter_gather_element),
-					    PCI_DMA_BIDIRECTIONAL);
+			pci_dma_sync_single_for_cpu(hi->host->pdev,
+						    command->command_orb_dma,
+						    sizeof(struct sbp2_command_orb),
+						    PCI_DMA_BIDIRECTIONAL);
+			pci_dma_sync_single_for_cpu(hi->host->pdev,
+						    command->sge_dma,
+						    sizeof(command->scatter_gather_element),
+						    PCI_DMA_BIDIRECTIONAL);
 			sbp2util_mark_command_completed(scsi_id, command);
 			if (command->Current_SCpnt) {
 				void (*done)(Scsi_Cmnd *) = command->Current_done;
diff -puN drivers/media/video/video-buf.c~dma_sync_for_device-cpu drivers/media/video/video-buf.c
--- 25/drivers/media/video/video-buf.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/media/video/video-buf.c	2004-02-28 18:08:54.000000000 -0800
@@ -215,7 +215,7 @@ int videobuf_dma_pci_sync(struct pci_dev
 		BUG();
 
 	if (!dma->bus_addr)
-		pci_dma_sync_sg(dev,dma->sglist,dma->nr_pages,dma->direction);
+		pci_dma_sync_sg_for_cpu(dev,dma->sglist,dma->nr_pages,dma->direction);
 	return 0;
 }
 
diff -puN drivers/message/fusion/mptlan.c~dma_sync_for_device-cpu drivers/message/fusion/mptlan.c
--- 25/drivers/message/fusion/mptlan.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/message/fusion/mptlan.c	2004-02-28 18:08:54.000000000 -0800
@@ -955,11 +955,13 @@ mpt_lan_receive_post_turbo(struct net_de
 			return -ENOMEM;
 		}
 
-		pci_dma_sync_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
-				    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
+					    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
 
 		memcpy(skb_put(skb, len), old_skb->data, len);
 
+		pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
+					       priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
 		goto out;
 	}
 
@@ -1113,12 +1115,17 @@ mpt_lan_receive_post_reply(struct net_de
 //					IOC_AND_NETDEV_NAMES_s_s(dev),
 //					i, l));
 
-			pci_dma_sync_single(mpt_dev->pcidev,
-					    priv->RcvCtl[ctx].dma,
-					    priv->RcvCtl[ctx].len,
-					    PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
+						    priv->RcvCtl[ctx].dma,
+						    priv->RcvCtl[ctx].len,
+						    PCI_DMA_FROMDEVICE);
 			memcpy(skb_put(skb, l), old_skb->data, l);
 
+			pci_dma_sync_single_for_device(mpt_dev->pcidev,
+						       priv->RcvCtl[ctx].dma,
+						       priv->RcvCtl[ctx].len,
+						       PCI_DMA_FROMDEVICE);
+
 			priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
 			szrem -= l;
 		}
@@ -1136,11 +1143,18 @@ mpt_lan_receive_post_reply(struct net_de
 			return -ENOMEM;
 		}
 
-		pci_dma_sync_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
-				    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(mpt_dev->pcidev,
+					    priv->RcvCtl[ctx].dma,
+					    priv->RcvCtl[ctx].len,
+					    PCI_DMA_FROMDEVICE);
 
 		memcpy(skb_put(skb, len), old_skb->data, len);
 
+		pci_dma_sync_single_for_device(mpt_dev->pcidev,
+					       priv->RcvCtl[ctx].dma,
+					       priv->RcvCtl[ctx].len,
+					       PCI_DMA_FROMDEVICE);
+
 		spin_lock_irqsave(&priv->rxfidx_lock, flags);
 		priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx;
 		spin_unlock_irqrestore(&priv->rxfidx_lock, flags);
diff -puN drivers/message/i2o/i2o_core.c~dma_sync_for_device-cpu drivers/message/i2o/i2o_core.c
--- 25/drivers/message/i2o/i2o_core.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/message/i2o/i2o_core.c	2004-02-28 18:08:54.000000000 -0800
@@ -1177,7 +1177,7 @@ void i2o_run_queue(struct i2o_controller
 		 *	the processor 
 	 	 */
 
-		pci_dma_sync_single(c->pdev, c->page_frame_map, MSG_FRAME_SIZE, PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(c->pdev, c->page_frame_map, MSG_FRAME_SIZE, PCI_DMA_FROMDEVICE);
 	
 		/*
 		 *	Despatch it
diff -puN drivers/net/3c59x.c~dma_sync_for_device-cpu drivers/net/3c59x.c
--- 25/drivers/net/3c59x.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/3c59x.c	2004-02-28 18:08:54.000000000 -0800
@@ -2577,11 +2577,12 @@ boomerang_rx(struct net_device *dev)
 			if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
-				pci_dma_sync_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+				pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				memcpy(skb_put(skb, pkt_len),
 					   vp->rx_skbuff[entry]->tail,
 					   pkt_len);
+				pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				vp->rx_copy++;
 			} else {
 				/* Pass up the skbuff already on the Rx ring. */
diff -puN drivers/net/b44.c~dma_sync_for_device-cpu drivers/net/b44.c
--- 25/drivers/net/b44.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/b44.c	2004-02-28 18:08:54.000000000 -0800
@@ -667,6 +667,10 @@ static void b44_recycle_rx(struct b44 *b
 	dest_desc->ctrl = ctrl;
 	dest_desc->addr = src_desc->addr;
 	src_map->skb = NULL;
+
+	pci_dma_sync_single_for_device(bp->pdev, src_desc->addr,
+				       RX_PKT_BUF_SZ,
+				       PCI_DMA_FROMDEVICE);
 }
 
 static int b44_rx(struct b44 *bp, int budget)
@@ -686,9 +690,9 @@ static int b44_rx(struct b44 *bp, int bu
 		struct rx_header *rh;
 		u16 len;
 
-		pci_dma_sync_single(bp->pdev, map,
-				    RX_PKT_BUF_SZ,
-				    PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(bp->pdev, map,
+					    RX_PKT_BUF_SZ,
+					    PCI_DMA_FROMDEVICE);
 		rh = (struct rx_header *) skb->data;
 		len = cpu_to_le16(rh->len);
 		if ((len > (RX_PKT_BUF_SZ - bp->rx_offset)) ||
diff -puN drivers/net/dl2k.c~dma_sync_for_device-cpu drivers/net/dl2k.c
--- 25/drivers/net/dl2k.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/dl2k.c	2004-02-28 18:08:54.000000000 -0800
@@ -874,8 +874,6 @@ receive_packet (struct net_device *dev)
 		frame_status = le64_to_cpu (desc->status);
 		if (--cnt < 0)
 			break;
-		pci_dma_sync_single (np->pdev, desc->fraginfo, np->rx_buf_sz,
-				     PCI_DMA_FROMDEVICE);
 		/* Update rx error statistics, drop packet. */
 		if (frame_status & RFS_Errors) {
 			np->stats.rx_errors++;
@@ -898,6 +896,10 @@ receive_packet (struct net_device *dev)
 				skb_put (skb = np->rx_skbuff[entry], pkt_len);
 				np->rx_skbuff[entry] = NULL;
 			} else if ((skb = dev_alloc_skb (pkt_len + 2)) != NULL) {
+				pci_dma_sync_single_for_cpu(np->pdev,
+							    desc->fraginfo,
+							    np->rx_buf_sz,
+							    PCI_DMA_FROMDEVICE);
 				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
@@ -905,6 +907,10 @@ receive_packet (struct net_device *dev)
 						  np->rx_skbuff[entry]->tail,
 						  pkt_len, 0);
 				skb_put (skb, pkt_len);
+				pci_dma_sync_single_for_device(np->pdev,
+							       desc->fraginfo,
+							       np->rx_buf_sz,
+							       PCI_DMA_FROMDEVICE);
 			}
 			skb->protocol = eth_type_trans (skb, dev);
 #if 0			
diff -puN drivers/net/e1000/e1000_ethtool.c~dma_sync_for_device-cpu drivers/net/e1000/e1000_ethtool.c
--- 25/drivers/net/e1000/e1000_ethtool.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/e1000/e1000_ethtool.c	2004-02-28 18:08:54.000000000 -0800
@@ -1191,16 +1191,16 @@ e1000_run_loopback_test(struct e1000_ada
 
 	for(i = 0; i < 64; i++) {
 		e1000_create_lbtest_frame(txdr->buffer_info[i].skb, 1024);
-		pci_dma_sync_single(pdev, txdr->buffer_info[i].dma,
-				    txdr->buffer_info[i].length,
-				    PCI_DMA_TODEVICE);
+		pci_dma_sync_single_for_device(pdev, txdr->buffer_info[i].dma,
+					       txdr->buffer_info[i].length,
+					       PCI_DMA_TODEVICE);
 	}
 	E1000_WRITE_REG(&adapter->hw, TDT, i);
 
 	msec_delay(200);
 
-	pci_dma_sync_single(pdev, rxdr->buffer_info[0].dma,
-			    rxdr->buffer_info[0].length, PCI_DMA_FROMDEVICE);
+	pci_dma_sync_single_for_cpu(pdev, rxdr->buffer_info[0].dma,
+				    rxdr->buffer_info[0].length, PCI_DMA_FROMDEVICE);
 
 	return e1000_check_lbtest_frame(rxdr->buffer_info[0].skb, 1024);
 }
diff -puN drivers/net/e100.c~dma_sync_for_device-cpu drivers/net/e100.c
--- 25/drivers/net/e100.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/e100.c	2004-02-28 18:08:54.000000000 -0800
@@ -1386,8 +1386,8 @@ static inline int e100_rx_alloc_skb(stru
 			(u32 *)&prev_rfd->link);
 		wmb();
 		prev_rfd->command &= ~cpu_to_le16(cb_el);
-		pci_dma_sync_single(nic->pdev, rx->prev->dma_addr,
-			sizeof(struct rfd), PCI_DMA_TODEVICE);
+		pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr,
+					       sizeof(struct rfd), PCI_DMA_TODEVICE);
 	}
 
 	return 0;
@@ -1404,8 +1404,8 @@ static inline int e100_rx_indicate(struc
 		return -EAGAIN;
 
 	/* Need to sync before taking a peek at cb_complete bit */
-	pci_dma_sync_single(nic->pdev, rx->dma_addr,
-		sizeof(struct rfd), PCI_DMA_FROMDEVICE);
+	pci_dma_sync_single_for_cpu(nic->pdev, rx->dma_addr,
+				    sizeof(struct rfd), PCI_DMA_FROMDEVICE);
 	rfd_status = le16_to_cpu(rfd->status);
 
 	DPRINTK(RX_STATUS, DEBUG, "status=0x%04X\n", rfd_status);
@@ -1420,11 +1420,8 @@ static inline int e100_rx_indicate(struc
 		actual_size = RFD_BUF_LEN - sizeof(struct rfd);
 
 	/* Get data */
-	pci_dma_sync_single(nic->pdev, rx->dma_addr,
-		sizeof(struct rfd) + actual_size,
-		PCI_DMA_FROMDEVICE);
 	pci_unmap_single(nic->pdev, rx->dma_addr,
-		RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
+			 RFD_BUF_LEN, PCI_DMA_FROMDEVICE);
 
 	/* Pull off the RFD and put the actual data (minus eth hdr) */
 	skb_reserve(skb, sizeof(struct rfd));
diff -puN drivers/net/eepro100.c~dma_sync_for_device-cpu drivers/net/eepro100.c
--- 25/drivers/net/eepro100.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/eepro100.c	2004-02-28 18:08:54.000000000 -0800
@@ -1326,8 +1326,8 @@ speedo_init_rx_ring(struct net_device *d
 		skb_reserve(skb, sizeof(struct RxFD));
 		if (last_rxf) {
 			last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]);
-			pci_dma_sync_single(sp->pdev, last_rxf_dma,
-					sizeof(struct RxFD), PCI_DMA_TODEVICE);
+			pci_dma_sync_single_for_device(sp->pdev, last_rxf_dma,
+										   sizeof(struct RxFD), PCI_DMA_TODEVICE);
 		}
 		last_rxf = rxf;
 		last_rxf_dma = sp->rx_ring_dma[i];
@@ -1336,14 +1336,14 @@ speedo_init_rx_ring(struct net_device *d
 		/* This field unused by i82557. */
 		rxf->rx_buf_addr = 0xffffffff;
 		rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
-		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i],
-				sizeof(struct RxFD), PCI_DMA_TODEVICE);
+		pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[i],
+									   sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	}
 	sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
 	/* Mark the last entry as end-of-list. */
 	last_rxf->status = cpu_to_le32(0xC0000002);	/* '2' is flag value only. */
-	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1],
-			sizeof(struct RxFD), PCI_DMA_TODEVICE);
+	pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1],
+								   sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = last_rxf;
 	sp->last_rxf_dma = last_rxf_dma;
 }
@@ -1716,8 +1716,8 @@ static inline struct RxFD *speedo_rx_all
 	skb->dev = dev;
 	skb_reserve(skb, sizeof(struct RxFD));
 	rxf->rx_buf_addr = 0xffffffff;
-	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
-			sizeof(struct RxFD), PCI_DMA_TODEVICE);
+	pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
+								   sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	return rxf;
 }
 
@@ -1730,8 +1730,8 @@ static inline void speedo_rx_link(struct
 	rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
 	sp->last_rxf->link = cpu_to_le32(rxf_dma);
 	sp->last_rxf->status &= cpu_to_le32(~0xC0000000);
-	pci_dma_sync_single(sp->pdev, sp->last_rxf_dma,
-			sizeof(struct RxFD), PCI_DMA_TODEVICE);
+	pci_dma_sync_single_for_device(sp->pdev, sp->last_rxf_dma,
+								   sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = rxf;
 	sp->last_rxf_dma = rxf_dma;
 }
@@ -1803,8 +1803,8 @@ speedo_rx(struct net_device *dev)
 		int status;
 		int pkt_len;
 
-		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
-			sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
+									sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
 		status = le32_to_cpu(sp->rx_ringp[entry]->status);
 		pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
 
@@ -1850,8 +1850,9 @@ speedo_rx(struct net_device *dev)
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
-				pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
-					sizeof(struct RxFD) + pkt_len, PCI_DMA_FROMDEVICE);
+				pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
+											sizeof(struct RxFD) + pkt_len,
+											PCI_DMA_FROMDEVICE);
 
 #if 1 || USE_IP_CSUM
 				/* Packet is in one chunk -- we can copy + cksum. */
@@ -1861,6 +1862,9 @@ speedo_rx(struct net_device *dev)
 				memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->tail,
 					   pkt_len);
 #endif
+				pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
+											   sizeof(struct RxFD) + pkt_len,
+											   PCI_DMA_FROMDEVICE);
 				npkts++;
 			} else {
 				/* Pass up the already-filled skbuff. */
@@ -1875,7 +1879,8 @@ speedo_rx(struct net_device *dev)
 				npkts++;
 				sp->rx_ringp[entry] = NULL;
 				pci_unmap_single(sp->pdev, sp->rx_ring_dma[entry],
-						PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+								 PKT_BUF_SZ + sizeof(struct RxFD),
+								 PCI_DMA_FROMDEVICE);
 			}
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
@@ -2307,8 +2312,8 @@ static void set_rx_mode(struct net_devic
 		mc_setup_frm->link =
 			cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
 
-		pci_dma_sync_single(sp->pdev, mc_blk->frame_dma,
-				mc_blk->len, PCI_DMA_TODEVICE);
+		pci_dma_sync_single_for_device(sp->pdev, mc_blk->frame_dma,
+									   mc_blk->len, PCI_DMA_TODEVICE);
 
 		wait_for_cmd_done(dev);
 		clear_suspend(last_cmd);
diff -puN drivers/net/epic100.c~dma_sync_for_device-cpu drivers/net/epic100.c
--- 25/drivers/net/epic100.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/epic100.c	2004-02-28 18:08:54.000000000 -0800
@@ -1199,8 +1199,6 @@ static int epic_rx(struct net_device *de
 			short pkt_len = (status >> 16) - 4;
 			struct sk_buff *skb;
 
-			pci_dma_sync_single(ep->pci_dev, ep->rx_ring[entry].bufaddr, 
-					    ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
 			if (pkt_len > PKT_BUF_SZ - 4) {
 				printk(KERN_ERR "%s: Oversized Ethernet frame, status %x "
 					   "%d bytes.\n",
@@ -1213,6 +1211,10 @@ static int epic_rx(struct net_device *de
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+				pci_dma_sync_single_for_cpu(ep->pci_dev,
+							    ep->rx_ring[entry].bufaddr,
+							    ep->rx_buf_sz,
+							    PCI_DMA_FROMDEVICE);
 #if 1 /* HAS_IP_COPYSUM */
 				eth_copy_and_sum(skb, ep->rx_skbuff[entry]->tail, pkt_len, 0);
 				skb_put(skb, pkt_len);
@@ -1220,6 +1222,10 @@ static int epic_rx(struct net_device *de
 				memcpy(skb_put(skb, pkt_len), ep->rx_skbuff[entry]->tail,
 					   pkt_len);
 #endif
+				pci_dma_sync_single_for_device(ep->pci_dev,
+							       ep->rx_ring[entry].bufaddr,
+							       ep->rx_buf_sz,
+							       PCI_DMA_FROMDEVICE);
 			} else {
 				pci_unmap_single(ep->pci_dev, 
 					ep->rx_ring[entry].bufaddr, 
diff -puN drivers/net/fealnx.c~dma_sync_for_device-cpu drivers/net/fealnx.c
--- 25/drivers/net/fealnx.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/fealnx.c	2004-02-28 18:08:54.000000000 -0800
@@ -1647,10 +1647,6 @@ static int netdev_rx(struct net_device *
 				printk(KERN_DEBUG "  netdev_rx() normal Rx pkt length %d"
 				       " status %x.\n", pkt_len, rx_status);
 #endif
-			pci_dma_sync_single(np->pci_dev, np->cur_rx->buffer,
-				np->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			pci_unmap_single(np->pci_dev, np->cur_rx->buffer,
-				np->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
 			/* Check if the packet is long enough to accept without copying
 			   to a minimally-sized skbuff. */
@@ -1658,6 +1654,10 @@ static int netdev_rx(struct net_device *
 			    (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+				pci_dma_sync_single_for_cpu(np->pci_dev,
+							    np->cur_rx->buffer,
+							    np->rx_buf_sz,
+							    PCI_DMA_FROMDEVICE);
 				/* Call copy + cksum if available. */
 
 #if ! defined(__alpha__)
@@ -1668,7 +1668,15 @@ static int netdev_rx(struct net_device *
 				memcpy(skb_put(skb, pkt_len),
 					np->cur_rx->skbuff->tail, pkt_len);
 #endif
+				pci_dma_sync_single_for_device(np->pci_dev,
+							       np->cur_rx->buffer,
+							       np->rx_buf_sz,
+							       PCI_DMA_FROMDEVICE);
 			} else {
+				pci_unmap_single(np->pci_dev,
+						 np->cur_rx->buffer,
+						 np->rx_buf_sz,
+						 PCI_DMA_FROMDEVICE);
 				skb_put(skb = np->cur_rx->skbuff, pkt_len);
 				np->cur_rx->skbuff = NULL;
 				if (np->really_rx_count == RX_RING_SIZE)
@@ -1689,8 +1697,10 @@ static int netdev_rx(struct net_device *
 
 			if (skb != NULL) {
 				skb->dev = dev;	/* Mark as being used by this device. */
-				np->cur_rx->buffer = pci_map_single(np->pci_dev, skb->tail,
-					np->rx_buf_sz, PCI_DMA_FROMDEVICE);
+				np->cur_rx->buffer = pci_map_single(np->pci_dev,
+								    skb->tail,
+								    np->rx_buf_sz,
+								    PCI_DMA_FROMDEVICE);
 				np->cur_rx->skbuff = skb;
 				++np->really_rx_count;
 			}
diff -puN drivers/net/hamachi.c~dma_sync_for_device-cpu drivers/net/hamachi.c
--- 25/drivers/net/hamachi.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/hamachi.c	2004-02-28 18:08:54.000000000 -0800
@@ -1498,8 +1498,10 @@ static int hamachi_rx(struct net_device 
 		
 		if (desc_status & DescOwn)
 			break;
-		pci_dma_sync_single(hmp->pci_dev, desc->addr, hmp->rx_buf_sz, 
-			PCI_DMA_FROMDEVICE);
+		pci_dma_sync_single_for_cpu(hmp->pci_dev,
+					    desc->addr,
+					    hmp->rx_buf_sz,
+					    PCI_DMA_FROMDEVICE);
 		buf_addr = desc_to_virt(desc->addr);
 		frame_status = le32_to_cpu(get_unaligned((s32*)&(buf_addr[data_size - 12])));
 		if (hamachi_debug > 4)
@@ -1563,6 +1565,10 @@ static int hamachi_rx(struct net_device 
 #endif
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+				pci_dma_sync_single_for_cpu(hmp->pci_dev,
+							    hmp->rx_ring[entry].addr,
+							    hmp->rx_buf_sz,
+							    PCI_DMA_FROMDEVICE);
 				/* Call copy + cksum if available. */
 #if 1 || USE_IP_COPYSUM
 				eth_copy_and_sum(skb, 
@@ -1572,10 +1578,14 @@ static int hamachi_rx(struct net_device 
 				memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma
 					+ entry*sizeof(*desc), pkt_len);
 #endif
+				pci_dma_sync_single_for_device(hmp->pci_dev,
+							       hmp->rx_ring[entry].addr,
+							       hmp->rx_buf_sz,
+							       PCI_DMA_FROMDEVICE);
 			} else {
 				pci_unmap_single(hmp->pci_dev, 
-					hmp->rx_ring[entry].addr, 
-					hmp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+						 hmp->rx_ring[entry].addr,
+						 hmp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 				skb_put(skb = hmp->rx_skbuff[entry], pkt_len);
 				hmp->rx_skbuff[entry] = NULL;
 			}
diff -puN drivers/net/irda/vlsi_ir.c~dma_sync_for_device-cpu drivers/net/irda/vlsi_ir.c
--- 25/drivers/net/irda/vlsi_ir.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/irda/vlsi_ir.c	2004-02-28 18:08:54.000000000 -0800
@@ -173,7 +173,7 @@ static int vlsi_proc_pdev(struct pci_dev
 			PCIDEV_NAME(pdev), (int)pdev->vendor, (int)pdev->device);
 	out += sprintf(out, "pci-power-state: %u\n", (unsigned) pdev->current_state);
 	out += sprintf(out, "resources: irq=%u / io=0x%04x / dma_mask=0x%016Lx\n",
-			pdev->irq, (unsigned)pci_resource_start(pdev, 0), (u64)pdev->dma_mask);
+			pdev->irq, (unsigned)pci_resource_start(pdev, 0), (unsigned long long)pdev->dma_mask);
 	out += sprintf(out, "hw registers: ");
 	for (i = 0; i < 0x20; i++)
 		out += sprintf(out, "%02x", (unsigned)inb((iobase+i)));
@@ -566,7 +566,6 @@ static struct vlsi_ring *vlsi_alloc_ring
 			return NULL;
 		}
 		rd_set_addr_status(rd, busaddr, 0);
-		pci_dma_sync_single(pdev, busaddr, len, dir);
 		/* initially, the dma buffer is owned by the CPU */
 		rd->skb = NULL;
 	}
@@ -660,7 +659,7 @@ static int vlsi_process_rx(struct vlsi_r
 	struct net_device *ndev = (struct net_device *)pci_get_drvdata(r->pdev);
 	vlsi_irda_dev_t *idev = ndev->priv;
 
-	pci_dma_sync_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+	pci_dma_sync_single_for_cpu(r->pdev, rd_get_addr(rd), r->len, r->dir);
 	/* dma buffer now owned by the CPU */
 	status = rd_get_status(rd);
 	if (status & RD_RX_ERROR) {
@@ -746,7 +745,7 @@ static void vlsi_fill_rx(struct vlsi_rin
 				break;	/* probably not worth logging? */
 		}
 		/* give dma buffer back to busmaster */
-		pci_dma_prep_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+		pci_dma_sync_single_for_device(r->pdev, rd_get_addr(rd), r->len, r->dir);
 		rd_activate(rd);
 	}
 }
@@ -816,7 +815,7 @@ static void vlsi_unarm_rx(vlsi_irda_dev_
 				ret = -VLSI_RX_DROP;
 			}
 			rd_set_count(rd, 0);
-			pci_dma_sync_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+			pci_dma_sync_single_for_cpu(r->pdev, rd_get_addr(rd), r->len, r->dir);
 			if (rd->skb) {
 				dev_kfree_skb_any(rd->skb);
 				rd->skb = NULL;
@@ -854,7 +853,7 @@ static int vlsi_process_tx(struct vlsi_r
 	int		len;
 	int		ret;
 
-	pci_dma_sync_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+	pci_dma_sync_single_for_cpu(r->pdev, rd_get_addr(rd), r->len, r->dir);
 	/* dma buffer now owned by the CPU */
 	status = rd_get_status(rd);
 	if (status & RD_TX_UNDRN)
@@ -1077,8 +1076,8 @@ static int vlsi_hard_start_xmit(struct s
 		}
 	}
 
-	/* tx buffer already owned by CPU due to pci_dma_sync_single() either
-	 * after initial pci_map_single or after subsequent tx-completion
+	/* tx buffer already owned by CPU due to pci_dma_sync_single_for_cpu()
+	 * after subsequent tx-completion
 	 */
 
 	if (idev->mode == IFF_SIR) {
@@ -1120,7 +1119,7 @@ static int vlsi_hard_start_xmit(struct s
 	 * CPU-driven changes visible from the pci bus).
 	 */
 
-	pci_dma_prep_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+	pci_dma_sync_single_for_device(r->pdev, rd_get_addr(rd), r->len, r->dir);
 
 /*	Switching to TX mode here races with the controller
  *	which may stop TX at any time when fetching an inactive descriptor
@@ -1248,7 +1247,7 @@ static void vlsi_unarm_tx(vlsi_irda_dev_
 		if (rd_is_active(rd)) {
 			rd_set_status(rd, 0);
 			rd_set_count(rd, 0);
-			pci_dma_sync_single(r->pdev, rd_get_addr(rd), r->len, r->dir);
+			pci_dma_sync_single_for_cpu(r->pdev, rd_get_addr(rd), r->len, r->dir);
 			if (rd->skb) {
 				dev_kfree_skb_any(rd->skb);
 				rd->skb = NULL;
diff -puN drivers/net/lasi_82596.c~dma_sync_for_device-cpu drivers/net/lasi_82596.c
--- 25/drivers/net/lasi_82596.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/lasi_82596.c	2004-02-28 18:08:54.000000000 -0800
@@ -802,9 +802,10 @@ memory_squeeze:
 				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
-					dma_sync_single(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
+					dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
 					skb_reserve(skb, 2);
 					memcpy(skb_put(skb,pkt_len), rbd->v_data, pkt_len);
+					dma_sync_single_for_device(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
 				}
 				skb->len = pkt_len;
 				skb->protocol=eth_type_trans(skb,dev);
diff -puN drivers/net/myri_sbus.c~dma_sync_for_device-cpu drivers/net/myri_sbus.c
--- 25/drivers/net/myri_sbus.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/myri_sbus.c	2004-02-28 18:08:54.000000000 -0800
@@ -435,9 +435,9 @@ static void myri_rx(struct myri_eth *mp,
 
 		/* Check for errors. */
 		DRX(("rxd[%d]: %p len[%d] csum[%08x] ", entry, rxd, len, csum));
-		sbus_dma_sync_single(mp->myri_sdev,
-				     sbus_readl(&rxd->myri_scatters[0].addr),
-				     RX_ALLOC_SIZE, SBUS_DMA_FROMDEVICE);
+		sbus_dma_sync_single_for_cpu(mp->myri_sdev,
+					     sbus_readl(&rxd->myri_scatters[0].addr),
+					     RX_ALLOC_SIZE, SBUS_DMA_FROMDEVICE);
 		if (len < (ETH_HLEN + MYRI_PAD_LEN) || (skb->data[0] != MYRI_PAD_LEN)) {
 			DRX(("ERROR["));
 			mp->enet_stats.rx_errors++;
@@ -454,6 +454,10 @@ static void myri_rx(struct myri_eth *mp,
 			drops++;
 			DRX(("DROP "));
 			mp->enet_stats.rx_dropped++;
+			sbus_dma_sync_single_for_device(mp->myri_sdev,
+							sbus_readl(&rxd->myri_scatters[0].addr),
+							RX_ALLOC_SIZE,
+							SBUS_DMA_FROMDEVICE);
 			sbus_writel(RX_ALLOC_SIZE, &rxd->myri_scatters[0].len);
 			sbus_writel(index, &rxd->ctx);
 			sbus_writel(1, &rxd->num_sg);
@@ -508,6 +512,10 @@ static void myri_rx(struct myri_eth *mp,
 
 			/* Reuse original ring buffer. */
 			DRX(("reuse "));
+			sbus_dma_sync_single_for_device(mp->myri_sdev,
+							sbus_readl(&rxd->myri_scatters[0].addr),
+							RX_ALLOC_SIZE,
+							SBUS_DMA_FROMDEVICE);
 			sbus_writel(RX_ALLOC_SIZE, &rxd->myri_scatters[0].len);
 			sbus_writel(index, &rxd->ctx);
 			sbus_writel(1, &rxd->num_sg);
diff -puN drivers/net/natsemi.c~dma_sync_for_device-cpu drivers/net/natsemi.c
--- 25/drivers/net/natsemi.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/natsemi.c	2004-02-28 18:08:54.000000000 -0800
@@ -1789,7 +1789,7 @@ static void netdev_rx(struct net_device 
 				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve(skb, 2);
-				pci_dma_sync_single(np->pci_dev,
+				pci_dma_sync_single_for_cpu(np->pci_dev,
 					np->rx_dma[entry],
 					np->rx_skbuff[entry]->len,
 					PCI_DMA_FROMDEVICE);
@@ -1801,6 +1801,10 @@ static void netdev_rx(struct net_device 
 				memcpy(skb_put(skb, pkt_len),
 					np->rx_skbuff[entry]->tail, pkt_len);
 #endif
+				pci_dma_sync_single_for_device(np->pci_dev,
+					np->rx_dma[entry],
+					np->rx_skbuff[entry]->len,
+					PCI_DMA_FROMDEVICE);
 			} else {
 				pci_unmap_single(np->pci_dev, np->rx_dma[entry],
 					np->rx_skbuff[entry]->len,
diff -puN drivers/net/pcnet32.c~dma_sync_for_device-cpu drivers/net/pcnet32.c
--- 25/drivers/net/pcnet32.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/pcnet32.c	2004-02-28 18:08:54.000000000 -0800
@@ -1744,13 +1744,17 @@ pcnet32_rx(struct net_device *dev)
 		if (!rx_in_place) {
 		    skb_reserve(skb,2); /* 16 byte align */
 		    skb_put(skb,pkt_len);	/* Make room */
-		    pci_dma_sync_single(lp->pci_dev,
-		                        lp->rx_dma_addr[entry],
-		                        PKT_BUF_SZ-2,
-		                        PCI_DMA_FROMDEVICE);
+		    pci_dma_sync_single_for_cpu(lp->pci_dev,
+						lp->rx_dma_addr[entry],
+						PKT_BUF_SZ-2,
+						PCI_DMA_FROMDEVICE);
 		    eth_copy_and_sum(skb,
 			    (unsigned char *)(lp->rx_skbuff[entry]->tail),
 			    pkt_len,0);
+		    pci_dma_sync_single_for_device(lp->pci_dev,
+						   lp->rx_dma_addr[entry],
+						   PKT_BUF_SZ-2,
+						   PCI_DMA_FROMDEVICE);
 		}
 		lp->stats.rx_bytes += skb->len;
 		skb->protocol=eth_type_trans(skb,dev);
diff -puN drivers/net/rrunner.c~dma_sync_for_device-cpu drivers/net/rrunner.c
--- 25/drivers/net/rrunner.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/rrunner.c	2004-02-28 18:08:54.000000000 -0800
@@ -983,18 +983,26 @@ static void rx_int(struct net_device *de
 
 			rx_skb = rrpriv->rx_skbuff[index];
 
-	        	pci_dma_sync_single(rrpriv->pci_dev, desc->addr.addrlo,
-				pkt_len, PCI_DMA_FROMDEVICE);
-
 			if (pkt_len < PKT_COPY_THRESHOLD) {
 				skb = alloc_skb(pkt_len, GFP_ATOMIC);
 				if (skb == NULL){
 					printk(KERN_WARNING "%s: Unable to allocate skb (%i bytes), deferring packet\n", dev->name, pkt_len);
 					rrpriv->stats.rx_dropped++;
 					goto defer;
-				}else
+				} else {
+					pci_dma_sync_single_for_cpu(rrpriv->pci_dev,
+								    desc->addr.addrlo,
+								    pkt_len,
+								    PCI_DMA_FROMDEVICE);
+
 					memcpy(skb_put(skb, pkt_len),
 					       rx_skb->data, pkt_len);
+
+					pci_dma_sync_single_for_device(rrpriv->pci_dev,
+								       desc->addr.addrlo,
+								       pkt_len,
+								       PCI_DMA_FROMDEVICE);
+				}
 			}else{
 				struct sk_buff *newskb;
 
diff -puN drivers/net/sis190.c~dma_sync_for_device-cpu drivers/net/sis190.c
--- 25/drivers/net/sis190.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sis190.c	2004-02-28 18:08:54.000000000 -0800
@@ -1016,14 +1016,20 @@ SiS190_rx_interrupt(struct net_device *d
 			int pkt_size;
 
 			pkt_size = (int) (desc->PSize & 0x0000FFFF) - 4;
-			pci_dma_sync_single(tp->pci_dev, desc->buf_addr,
-				RX_BUF_SIZE, PCI_DMA_FROMDEVICE);
 			skb = dev_alloc_skb(pkt_size + 2);
 			if (skb != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	// 16 byte align the IP fields. //
+				pci_dma_sync_single_for_cpu(tp->pci_dev,
+							    desc->buf_addr,
+							    RX_BUF_SIZE,
+							    PCI_DMA_FROMDEVICE);
 				eth_copy_and_sum(skb, tp->RxBufferRing[cur_rx],
 						 pkt_size, 0);
+				pci_dma_sync_single_for_device(tp->pci_dev,
+							    desc->buf_addr,
+							    RX_BUF_SIZE,
+							    PCI_DMA_FROMDEVICE);
 				skb_put(skb, pkt_size);
 				skb->protocol = eth_type_trans(skb, dev);
 				netif_rx(skb);
diff -puN drivers/net/sis900.c~dma_sync_for_device-cpu drivers/net/sis900.c
--- 25/drivers/net/sis900.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sis900.c	2004-02-28 18:08:54.000000000 -0800
@@ -1650,9 +1650,6 @@ static int sis900_rx(struct net_device *
 				break;
 			}
 
-			pci_dma_sync_single(sis_priv->pci_dev, 
-				sis_priv->rx_ring[entry].bufptr, RX_BUF_SIZE, 
-				PCI_DMA_FROMDEVICE);
 			pci_unmap_single(sis_priv->pci_dev, 
 				sis_priv->rx_ring[entry].bufptr, RX_BUF_SIZE, 
 				PCI_DMA_FROMDEVICE);
diff -puN drivers/net/sk98lin/skge.c~dma_sync_for_device-cpu drivers/net/sk98lin/skge.c
--- 25/drivers/net/sk98lin/skge.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sk98lin/skge.c	2004-02-28 18:08:54.000000000 -0800
@@ -2533,12 +2533,6 @@ rx_start:	
 				"Control: %x\nRxStat: %x\n",
 				Control, FrameStat));
 
-			PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32;
-			PhysAddr |= (SK_U64) pRxd->VDataLow;
-			pci_dma_sync_single(pAC->PciDev,
-						(dma_addr_t) PhysAddr,
-						FrameLength,
-						PCI_DMA_FROMDEVICE);
 			ReQueueRxBuffer(pAC, pRxPort, pMsg,
 				pRxd->VDataHigh, pRxd->VDataLow);
 
@@ -2559,12 +2553,16 @@ rx_start:	
 			PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32;
 			PhysAddr |= (SK_U64) pRxd->VDataLow;
 
-			pci_dma_sync_single(pAC->PciDev,
-						(dma_addr_t) PhysAddr,
-						FrameLength,
-						PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(pAC->PciDev,
+						    (dma_addr_t) PhysAddr,
+						    FrameLength,
+						    PCI_DMA_FROMDEVICE);
 			eth_copy_and_sum(pNewMsg, pMsg->data,
 				FrameLength, 0);
+			pci_dma_sync_single_for_device(pAC->PciDev,
+						       (dma_addr_t) PhysAddr,
+						       FrameLength,
+						       PCI_DMA_FROMDEVICE);
 			ReQueueRxBuffer(pAC, pRxPort, pMsg,
 				pRxd->VDataHigh, pRxd->VDataLow);
 
diff -puN drivers/net/starfire.c~dma_sync_for_device-cpu drivers/net/starfire.c
--- 25/drivers/net/starfire.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/starfire.c	2004-02-28 18:08:54.000000000 -0800
@@ -1637,10 +1637,13 @@ static int __netdev_rx(struct net_device
 		    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align the IP header */
-			pci_dma_sync_single(np->pci_dev,
-					    np->rx_info[entry].mapping,
-					    pkt_len, PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(np->pci_dev,
+						    np->rx_info[entry].mapping,
+						    pkt_len, PCI_DMA_FROMDEVICE);
 			eth_copy_and_sum(skb, np->rx_info[entry].skb->tail, pkt_len, 0);
+			pci_dma_sync_single_for_device(np->pci_dev,
+						       np->rx_info[entry].mapping,
+						       pkt_len, PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
 		} else {
 			pci_unmap_single(np->pci_dev, np->rx_info[entry].mapping, np->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff -puN drivers/net/sunbmac.c~dma_sync_for_device-cpu drivers/net/sunbmac.c
--- 25/drivers/net/sunbmac.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sunbmac.c	2004-02-28 18:08:54.000000000 -0800
@@ -849,9 +849,13 @@ static void bigmac_rx(struct bigmac *bp)
 			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
-			sbus_dma_sync_single(bp->bigmac_sdev,
-					     this->rx_addr, len, SBUS_DMA_FROMDEVICE);
+			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
+						     this->rx_addr, len,
+						     SBUS_DMA_FROMDEVICE);
 			eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0);
+			sbus_dma_sync_single_for_device(bp->bigmac_sdev,
+							this->rx_addr, len,
+							SBUS_DMA_FROMDEVICE);
 
 			/* Reuse original ring buffer. */
 			this->rx_flags =
diff -puN drivers/net/sundance.c~dma_sync_for_device-cpu drivers/net/sundance.c
--- 25/drivers/net/sundance.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sundance.c	2004-02-28 18:08:54.000000000 -0800
@@ -1331,9 +1331,6 @@ static void rx_poll(unsigned long data)
 		if (netif_msg_rx_status(np))
 			printk(KERN_DEBUG "  netdev_rx() status was %8.8x.\n",
 				   frame_status);
-		pci_dma_sync_single(np->pci_dev, desc->frag[0].addr,
-			np->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
 		if (frame_status & 0x001f4000) {
 			/* There was a error. */
 			if (netif_msg_rx_err(np))
@@ -1363,7 +1360,16 @@ static void rx_poll(unsigned long data)
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+				pci_dma_sync_single_for_cpu(np->pci_dev,
+							    desc->frag[0].addr,
+							    np->rx_buf_sz,
+							    PCI_DMA_FROMDEVICE);
+
 				eth_copy_and_sum(skb, np->rx_skbuff[entry]->tail, pkt_len, 0);
+				pci_dma_sync_single_for_device(np->pci_dev,
+							       desc->frag[0].addr,
+							       np->rx_buf_sz,
+							       PCI_DMA_FROMDEVICE);
 				skb_put(skb, pkt_len);
 			} else {
 				pci_unmap_single(np->pci_dev,
diff -puN drivers/net/sungem.c~dma_sync_for_device-cpu drivers/net/sungem.c
--- 25/drivers/net/sungem.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sungem.c	2004-02-28 18:08:55.000000000 -0800
@@ -763,8 +763,9 @@ static void gem_rx(struct gem *gp)
 			copy_skb->dev = gp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
-			pci_dma_sync_single(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 			memcpy(copy_skb->data, skb->data, len);
+			pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
 			skb = copy_skb;
diff -puN drivers/net/sunhme.c~dma_sync_for_device-cpu drivers/net/sunhme.c
--- 25/drivers/net/sunhme.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sunhme.c	2004-02-28 18:08:55.000000000 -0800
@@ -273,8 +273,10 @@ static u32 pci_hme_read_desc32(u32 *p)
 	((__hp)->dma_map((__hp)->happy_dev, (__ptr), (__size), (__dir)))
 #define hme_dma_unmap(__hp, __addr, __size, __dir) \
 	((__hp)->dma_unmap((__hp)->happy_dev, (__addr), (__size), (__dir)))
-#define hme_dma_sync(__hp, __addr, __size, __dir) \
-	((__hp)->dma_sync((__hp)->happy_dev, (__addr), (__size), (__dir)))
+#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
+	((__hp)->dma_sync_for_cpu((__hp)->happy_dev, (__addr), (__size), (__dir)))
+#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
+	((__hp)->dma_sync_for_device((__hp)->happy_dev, (__addr), (__size), (__dir)))
 #else
 #ifdef CONFIG_SBUS
 /* SBUS only compilation */
@@ -297,8 +299,10 @@ do {	(__txd)->tx_addr = (__addr); \
 	sbus_map_single((__hp)->happy_dev, (__ptr), (__size), (__dir))
 #define hme_dma_unmap(__hp, __addr, __size, __dir) \
 	sbus_unmap_single((__hp)->happy_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync(__hp, __addr, __size, __dir) \
-	sbus_dma_sync_single((__hp)->happy_dev, (__addr), (__size), (__dir))
+#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
+	sbus_dma_sync_single_for_cpu((__hp)->happy_dev, (__addr), (__size), (__dir))
+#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
+	sbus_dma_sync_single_for_device((__hp)->happy_dev, (__addr), (__size), (__dir))
 #else
 /* PCI only compilation */
 #define hme_write32(__hp, __reg, __val) \
@@ -320,8 +324,10 @@ do {	(__txd)->tx_addr = cpu_to_le32(__ad
 	pci_map_single((__hp)->happy_dev, (__ptr), (__size), (__dir))
 #define hme_dma_unmap(__hp, __addr, __size, __dir) \
 	pci_unmap_single((__hp)->happy_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync(__hp, __addr, __size, __dir) \
-	pci_dma_sync_single((__hp)->happy_dev, (__addr), (__size), (__dir))
+#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
+	pci_dma_sync_single_for_cpu((__hp)->happy_dev, (__addr), (__size), (__dir))
+#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
+	pci_dma_sync_single_for_device((__hp)->happy_dev, (__addr), (__size), (__dir))
 #endif
 #endif
 
@@ -2069,8 +2075,9 @@ static void happy_meal_rx(struct happy_m
 			copy_skb->dev = dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
-			hme_dma_sync(hp, dma_addr, len, DMA_FROMDEVICE);
+			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
 			memcpy(copy_skb->data, skb->data, len);
+			hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE);
 
 			/* Reuse original ring buffer. */
 			hme_write_rxd(hp, this,
@@ -2838,7 +2845,10 @@ static int __init happy_meal_sbus_init(s
 	hp->write_rxd = sbus_hme_write_rxd;
 	hp->dma_map = (u32 (*)(void *, void *, long, int))sbus_map_single;
 	hp->dma_unmap = (void (*)(void *, u32, long, int))sbus_unmap_single;
-	hp->dma_sync = (void (*)(void *, u32, long, int))sbus_dma_sync_single;
+	hp->dma_sync_for_cpu = (void (*)(void *, u32, long, int))
+		sbus_dma_sync_single_for_cpu;
+	hp->dma_sync_for_device = (void (*)(void *, u32, long, int))
+		sbus_dma_sync_single_for_device;
 	hp->read32 = sbus_hme_read32;
 	hp->write32 = sbus_hme_write32;
 #endif
@@ -3182,7 +3192,10 @@ static int __init happy_meal_pci_init(st
 	hp->write_rxd = pci_hme_write_rxd;
 	hp->dma_map = (u32 (*)(void *, void *, long, int))pci_map_single;
 	hp->dma_unmap = (void (*)(void *, u32, long, int))pci_unmap_single;
-	hp->dma_sync = (void (*)(void *, u32, long, int))pci_dma_sync_single;
+	hp->dma_sync_for_cpu = (void (*)(void *, u32, long, int))
+		pci_dma_sync_single_for_cpu;
+	hp->dma_sync_for_device = (void (*)(void *, u32, long, int))
+		pci_dma_sync_single_for_device;
 	hp->read32 = pci_hme_read32;
 	hp->write32 = pci_hme_write32;
 #endif
diff -puN drivers/net/sunhme.h~dma_sync_for_device-cpu drivers/net/sunhme.h
--- 25/drivers/net/sunhme.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/sunhme.h	2004-02-28 18:08:55.000000000 -0800
@@ -406,7 +406,8 @@ struct happy_meal {
 	void (*write_rxd)(struct happy_meal_rxd *, u32, u32);
 	u32 (*dma_map)(void *, void *, long, int);
 	void (*dma_unmap)(void *, u32, long, int);
-	void (*dma_sync)(void *, u32, long, int);
+	void (*dma_sync_for_cpu)(void *, u32, long, int);
+	void (*dma_sync_for_device)(void *, u32, long, int);
 #endif
 
 	/* This is either a sbus_dev or a pci_dev. */
diff -puN drivers/net/tg3.c~dma_sync_for_device-cpu drivers/net/tg3.c
--- 25/drivers/net/tg3.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tg3.c	2004-02-28 18:08:55.000000000 -0800
@@ -2327,8 +2327,9 @@ static int tg3_rx(struct tg3 *tp, int bu
 			copy_skb->dev = tp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
-			pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 			memcpy(copy_skb->data, skb->data, len);
+			pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
 			skb = copy_skb;
diff -puN drivers/net/tokenring/3c359.c~dma_sync_for_device-cpu drivers/net/tokenring/3c359.c
--- 25/drivers/net/tokenring/3c359.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tokenring/3c359.c	2004-02-28 18:08:55.000000000 -0800
@@ -937,15 +937,17 @@ static void xl_rx(struct net_device *dev
 			while (xl_priv->rx_ring_tail != temp_ring_loc) { 
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
-				pci_dma_sync_single(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
+				pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 				memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, copy_len) ; 
+				pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 				adv_rx_ring(dev) ; 
 			} 
 
 			/* Now we have found the last fragment */
-			pci_dma_sync_single(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
+			pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 			memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, frame_length) ; 
 /*			memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */
+			pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 			adv_rx_ring(dev) ; 
 			skb->protocol = tr_type_trans(skb,dev) ; 
 			netif_rx(skb) ; 
diff -puN drivers/net/tokenring/olympic.c~dma_sync_for_device-cpu drivers/net/tokenring/olympic.c
--- 25/drivers/net/tokenring/olympic.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tokenring/olympic.c	2004-02-28 18:08:55.000000000 -0800
@@ -842,10 +842,13 @@ static void olympic_rx(struct net_device
 							olympic_priv->rx_ring_skb[rx_ring_last_received] = skb ; 
 							netif_rx(skb2) ; 
 						} else { 
-							pci_dma_sync_single(olympic_priv->pdev,
+							pci_dma_sync_single_for_cpu(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 							memcpy(skb_put(skb,length-4),olympic_priv->rx_ring_skb[rx_ring_last_received]->data,length-4) ; 
+							pci_dma_sync_single_for_device(olympic_priv->pdev,
+								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
+								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 							skb->protocol = tr_type_trans(skb,dev) ; 
 							netif_rx(skb) ; 
 						} 
@@ -854,12 +857,15 @@ static void olympic_rx(struct net_device
 							olympic_priv->rx_ring_last_received++ ; 
 							olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1);
 							rx_ring_last_received = olympic_priv->rx_ring_last_received ; 
-							pci_dma_sync_single(olympic_priv->pdev,
+							pci_dma_sync_single_for_cpu(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 							rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]);
 							cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length)); 
 							memcpy(skb_put(skb, cpy_length), olympic_priv->rx_ring_skb[rx_ring_last_received]->data, cpy_length) ;
+							pci_dma_sync_single_for_device(olympic_priv->pdev,
+								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
+								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 						} while (--i) ; 
 						skb_trim(skb,skb->len-4) ; 
 						skb->protocol = tr_type_trans(skb,dev);
diff -puN drivers/net/tulip/de2104x.c~dma_sync_for_device-cpu drivers/net/tulip/de2104x.c
--- 25/drivers/net/tulip/de2104x.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tulip/de2104x.c	2004-02-28 18:08:55.000000000 -0800
@@ -457,10 +457,12 @@ static void de_rx (struct de_private *de
 					       buflen, PCI_DMA_FROMDEVICE);
 			de->rx_skb[rx_tail].skb = copy_skb;
 		} else {
-			pci_dma_sync_single(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 			skb_reserve(copy_skb, RX_OFFSET);
 			memcpy(skb_put(copy_skb, len), skb->tail, len);
 
+			pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
+
 			/* We'll reuse the original ring buffer. */
 			skb = copy_skb;
 		}
diff -puN drivers/net/tulip/interrupt.c~dma_sync_for_device-cpu drivers/net/tulip/interrupt.c
--- 25/drivers/net/tulip/interrupt.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tulip/interrupt.c	2004-02-28 18:08:55.000000000 -0800
@@ -191,9 +191,9 @@ int tulip_poll(struct net_device *dev, i
                                    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
                                        skb->dev = dev;
                                        skb_reserve(skb, 2);    /* 16 byte align the IP header */
-                                       pci_dma_sync_single(tp->pdev,
-                                                           tp->rx_buffers[entry].mapping,
-                                                           pkt_len, PCI_DMA_FROMDEVICE);
+                                       pci_dma_sync_single_for_cpu(tp->pdev,
+								   tp->rx_buffers[entry].mapping,
+								   pkt_len, PCI_DMA_FROMDEVICE);
 #if ! defined(__alpha__)
                                        eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail,
                                                         pkt_len, 0);
@@ -203,6 +203,9 @@ int tulip_poll(struct net_device *dev, i
                                               tp->rx_buffers[entry].skb->tail,
                                               pkt_len);
 #endif
+                                       pci_dma_sync_single_for_device(tp->pdev,
+								      tp->rx_buffers[entry].mapping,
+								      pkt_len, PCI_DMA_FROMDEVICE);
                                } else {        /* Pass up the skb already on the Rx ring. */
                                        char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
                                                             pkt_len);
@@ -412,9 +415,9 @@ static int tulip_rx(struct net_device *d
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single(tp->pdev,
-						    tp->rx_buffers[entry].mapping,
-						    pkt_len, PCI_DMA_FROMDEVICE);
+				pci_dma_sync_single_for_cpu(tp->pdev,
+							    tp->rx_buffers[entry].mapping,
+							    pkt_len, PCI_DMA_FROMDEVICE);
 #if ! defined(__alpha__)
 				eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->tail,
 						 pkt_len, 0);
@@ -424,6 +427,9 @@ static int tulip_rx(struct net_device *d
 				       tp->rx_buffers[entry].skb->tail,
 				       pkt_len);
 #endif
+				pci_dma_sync_single_for_device(tp->pdev,
+							       tp->rx_buffers[entry].mapping,
+							       pkt_len, PCI_DMA_FROMDEVICE);
 			} else { 	/* Pass up the skb already on the Rx ring. */
 				char *temp = skb_put(skb = tp->rx_buffers[entry].skb,
 						     pkt_len);
diff -puN drivers/net/tulip/winbond-840.c~dma_sync_for_device-cpu drivers/net/tulip/winbond-840.c
--- 25/drivers/net/tulip/winbond-840.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/tulip/winbond-840.c	2004-02-28 18:08:55.000000000 -0800
@@ -1289,9 +1289,9 @@ static int netdev_rx(struct net_device *
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single(np->pci_dev,np->rx_addr[entry],
-							np->rx_skbuff[entry]->len,
-							PCI_DMA_FROMDEVICE);
+				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
+							    np->rx_skbuff[entry]->len,
+							    PCI_DMA_FROMDEVICE);
 				/* Call copy + cksum if available. */
 #if HAS_IP_COPYSUM
 				eth_copy_and_sum(skb, np->rx_skbuff[entry]->tail, pkt_len, 0);
@@ -1300,6 +1300,9 @@ static int netdev_rx(struct net_device *
 				memcpy(skb_put(skb, pkt_len), np->rx_skbuff[entry]->tail,
 					   pkt_len);
 #endif
+				pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry],
+							       np->rx_skbuff[entry]->len,
+							       PCI_DMA_FROMDEVICE);
 			} else {
 				pci_unmap_single(np->pci_dev,np->rx_addr[entry],
 							np->rx_skbuff[entry]->len,
diff -puN drivers/net/typhoon.c~dma_sync_for_device-cpu drivers/net/typhoon.c
--- 25/drivers/net/typhoon.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/typhoon.c	2004-02-28 18:08:55.000000000 -0800
@@ -1701,9 +1701,13 @@ typhoon_rx(struct typhoon *tp, struct ba
 		   (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 			new_skb->dev = tp->dev;
 			skb_reserve(new_skb, 2);
-			pci_dma_sync_single(tp->pdev, dma_addr, PKT_BUF_SZ,
-					    PCI_DMA_FROMDEVICE);
+			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
+						    PKT_BUF_SZ,
+						    PCI_DMA_FROMDEVICE);
 			eth_copy_and_sum(new_skb, skb->tail, pkt_len, 0);
+			pci_dma_sync_single_for_device(tp->pdev, dma_addr,
+						       PKT_BUF_SZ,
+						       PCI_DMA_FROMDEVICE);
 			skb_put(new_skb, pkt_len);
 			typhoon_recycle_rx_skb(tp, idx);
 		} else {
diff -puN drivers/net/via-rhine.c~dma_sync_for_device-cpu drivers/net/via-rhine.c
--- 25/drivers/net/via-rhine.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/via-rhine.c	2004-02-28 18:08:55.000000000 -0800
@@ -1536,7 +1536,7 @@ static void via_rhine_rx(struct net_devi
 				(skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
-				pci_dma_sync_single(np->pdev, np->rx_skbuff_dma[entry],
+				pci_dma_sync_single_for_cpu(np->pdev, np->rx_skbuff_dma[entry],
 						    np->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
 				/* *_IP_COPYSUM isn't defined anywhere and eth_copy_and_sum
@@ -1549,6 +1549,8 @@ static void via_rhine_rx(struct net_devi
 				memcpy(skb_put(skb, pkt_len), np->rx_skbuff[entry]->tail,
 					   pkt_len);
 #endif
+				pci_dma_sync_single_for_device(np->pdev, np->rx_skbuff_dma[entry],
+						    np->rx_buf_sz, PCI_DMA_FROMDEVICE);
 			} else {
 				skb = np->rx_skbuff[entry];
 				if (skb == NULL) {
diff -puN drivers/net/wan/dscc4.c~dma_sync_for_device-cpu drivers/net/wan/dscc4.c
--- 25/drivers/net/wan/dscc4.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/wan/dscc4.c	2004-02-28 18:08:55.000000000 -0800
@@ -652,7 +652,6 @@ static inline void dscc4_rx_skb(struct d
 		goto refill;
 	}
 	pkt_len = TO_SIZE(rx_fd->state2);
-	pci_dma_sync_single(pdev, rx_fd->data, pkt_len, PCI_DMA_FROMDEVICE);
 	pci_unmap_single(pdev, rx_fd->data, RX_MAX(HDLC_MAX_MRU), PCI_DMA_FROMDEVICE);
 	if ((skb->data[--pkt_len] & FrameOk) == FrameOk) {
 		stats->rx_packets++;
diff -puN drivers/net/yellowfin.c~dma_sync_for_device-cpu drivers/net/yellowfin.c
--- 25/drivers/net/yellowfin.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/net/yellowfin.c	2004-02-28 18:08:55.000000000 -0800
@@ -1124,7 +1124,7 @@ static int yellowfin_rx(struct net_devic
 
 		if(!desc->result_status)
 			break;
-		pci_dma_sync_single(yp->pci_dev, desc->addr, 
+		pci_dma_sync_single_for_cpu(yp->pci_dev, desc->addr,
 			yp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 		desc_status = le32_to_cpu(desc->result_status) >> 16;
 		buf_addr = rx_skb->tail;
@@ -1208,6 +1208,9 @@ static int yellowfin_rx(struct net_devic
 				memcpy(skb_put(skb, pkt_len), 
 					rx_skb->tail, pkt_len);
 #endif
+				pci_dma_sync_single_for_device(yp->pci_dev, desc->addr,
+											   yp->rx_buf_sz,
+											   PCI_DMA_FROMDEVICE);
 			}
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
diff -puN drivers/parisc/ccio-dma.c~dma_sync_for_device-cpu drivers/parisc/ccio-dma.c
--- 25/drivers/parisc/ccio-dma.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/parisc/ccio-dma.c	2004-02-28 18:08:55.000000000 -0800
@@ -1149,8 +1149,10 @@ static struct hppa_dma_ops ccio_ops = {
 	.unmap_single =		ccio_unmap_single,
 	.map_sg = 		ccio_map_sg,
 	.unmap_sg = 		ccio_unmap_sg,
-	.dma_sync_single =	NULL,	/* NOP for U2/Uturn */
-	.dma_sync_sg =		NULL,	/* ditto */
+	.dma_sync_single_for_cpu =	NULL,	/* NOP for U2/Uturn */
+	.dma_sync_single_for_device =	NULL,	/* NOP for U2/Uturn */
+	.dma_sync_sg_for_cpu =		NULL,	/* ditto */
+	.dma_sync_sg_for_device =		NULL,	/* ditto */
 };
 
 #ifdef CONFIG_PROC_FS
diff -puN drivers/parisc/ccio-rm-dma.c~dma_sync_for_device-cpu drivers/parisc/ccio-rm-dma.c
--- 25/drivers/parisc/ccio-rm-dma.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/parisc/ccio-rm-dma.c	2004-02-28 18:08:55.000000000 -0800
@@ -151,8 +151,10 @@ static struct pci_dma_ops ccio_ops = {
 	ccio_unmap_single,
 	ccio_map_sg,
 	ccio_unmap_sg,
-	NULL,                   /* dma_sync_single : NOP for U2 */
-	NULL,                   /* dma_sync_sg     : ditto */
+	NULL,                   /* dma_sync_single_for_cpu : NOP for U2 */
+	NULL,                   /* dma_sync_single_for_device : NOP for U2 */
+	NULL,                   /* dma_sync_sg_for_cpu     : ditto */
+	NULL,                   /* dma_sync_sg_for_device     : ditto */
 };
 
 
diff -puN drivers/parisc/sba_iommu.c~dma_sync_for_device-cpu drivers/parisc/sba_iommu.c
--- 25/drivers/parisc/sba_iommu.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/parisc/sba_iommu.c	2004-02-28 18:08:55.000000000 -0800
@@ -1410,8 +1410,10 @@ static struct hppa_dma_ops sba_ops = {
 	.unmap_single =		sba_unmap_single,
 	.map_sg =		sba_map_sg,
 	.unmap_sg =		sba_unmap_sg,
-	.dma_sync_single =	NULL,
-	.dma_sync_sg =		NULL,
+	.dma_sync_single_for_cpu =	NULL,
+	.dma_sync_single_for_device =	NULL,
+	.dma_sync_sg_for_cpu =		NULL,
+	.dma_sync_sg_for_device =	NULL,
 };
 
 
diff -puN drivers/scsi/53c700.c~dma_sync_for_device-cpu drivers/scsi/53c700.c
--- 25/drivers/scsi/53c700.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/53c700.c	2004-02-28 18:08:55.000000000 -0800
@@ -321,7 +321,7 @@ NCR_700_detect(Scsi_Host_Template *tpnt,
 
 	hostdata->script = script;
 	hostdata->pScript = pScript;
-	dma_sync_single(hostdata->dev, pScript, sizeof(SCRIPT), DMA_TO_DEVICE);
+	dma_sync_single_for_device(hostdata->dev, pScript, sizeof(SCRIPT), DMA_TO_DEVICE);
 	hostdata->state = NCR_700_HOST_FREE;
 	hostdata->cmd = NULL;
 	host->max_id = 7;
@@ -982,8 +982,8 @@ process_script_interrupt(__u32 dsps, __u
 				SCp->cmnd[7] = hostdata->status[0];
 				SCp->use_sg = 0;
 				SCp->sc_data_direction = SCSI_DATA_READ;
-				dma_sync_single(hostdata->dev, slot->pCmd,
-						SCp->cmd_len, DMA_TO_DEVICE);
+				dma_sync_single_for_device(hostdata->dev, slot->pCmd,
+							   SCp->cmd_len, DMA_TO_DEVICE);
 				SCp->request_bufflen = sizeof(SCp->sense_buffer);
 				slot->dma_handle = dma_map_single(hostdata->dev, SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE);
 				slot->SG[0].ins = bS_to_host(SCRIPT_MOVE_DATA_IN | sizeof(SCp->sense_buffer));
@@ -1007,7 +1007,7 @@ process_script_interrupt(__u32 dsps, __u
 			//   SCp->cmnd[0] == INQUIRY && SCp->use_sg == 0) {
 			//	/* Piggy back the tag queueing support
 			//	 * on this command */
-			//	dma_sync_single(hostdata->dev,
+			//	dma_sync_single_for_cpu(hostdata->dev,
 			//			    slot->dma_handle,
 			//			    SCp->request_bufflen,
 			//			    DMA_FROM_DEVICE);
diff -puN drivers/scsi/dc395x.c~dma_sync_for_device-cpu drivers/scsi/dc395x.c
--- 25/drivers/scsi/dc395x.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/dc395x.c	2004-02-28 18:08:55.000000000 -0800
@@ -2419,13 +2419,13 @@ static void update_sg_list(struct ScsiRe
 				psge->length -= xferred;	/* residue data length  */
 				psge->address += xferred;	/* residue data pointer */
 				srb->sg_index = idx;
-				pci_dma_sync_single(srb->dcb->
-						    acb->dev,
-						    srb->sg_bus_addr,
-						    sizeof(struct SGentry)
-						    *
-						    DC395x_MAX_SG_LISTENTRY,
-						    PCI_DMA_TODEVICE);
+				pci_dma_sync_single_for_device(srb->dcb->
+							       acb->dev,
+							       srb->sg_bus_addr,
+							       sizeof(struct SGentry)
+							       *
+							       DC395x_MAX_SG_LISTENTRY,
+							       PCI_DMA_TODEVICE);
 				break;
 			}
 			psge++;
@@ -4298,11 +4298,11 @@ void srb_done(struct AdapterCtlBlk *acb,
 
 	if (dir != PCI_DMA_NONE) {
 		if (cmd->use_sg)
-			pci_dma_sync_sg(acb->dev,
+			pci_dma_sync_sg_for_cpu(acb->dev,
 					(struct scatterlist *) cmd->
 					request_buffer, cmd->use_sg, dir);
 		else if (cmd->request_buffer)
-			pci_dma_sync_single(acb->dev,
+			pci_dma_sync_single_for_cpu(acb->dev,
 					    srb->segment_x[0].address,
 					    cmd->request_bufflen, dir);
 	}
diff -puN drivers/scsi/eata.c~dma_sync_for_device-cpu drivers/scsi/eata.c
--- 25/drivers/scsi/eata.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/eata.c	2004-02-28 18:08:55.000000000 -0800
@@ -1598,17 +1598,17 @@ static void sync_dma(unsigned int i, uns
    pci_dir = scsi_to_pci_dma_dir(SCpnt->sc_data_direction);
 
    if (DEV2H(cpp->sense_addr))
-      pci_dma_sync_single(HD(j)->pdev, DEV2H(cpp->sense_addr),
+      pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr),
                           DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
 
    if (SCpnt->use_sg)
-      pci_dma_sync_sg(HD(j)->pdev, SCpnt->request_buffer,
+      pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer,
                          SCpnt->use_sg, pci_dir);
 
    if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
 
    if (DEV2H(cpp->data_address))
-      pci_dma_sync_single(HD(j)->pdev, DEV2H(cpp->data_address),
+      pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->data_address),
                        DEV2H(cpp->data_len), pci_dir);
 }
 
diff -puN drivers/scsi/megaraid.c~dma_sync_for_device-cpu drivers/scsi/megaraid.c
--- 25/drivers/scsi/megaraid.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/megaraid.c	2004-02-28 18:08:55.000000000 -0800
@@ -261,10 +261,6 @@ mega_query_adapter(adapter_t *adapter)
 			"megaraid: Product_info cmd failed with error: %d\n",
 				retval);
 
-		pci_dma_sync_single(adapter->dev, prod_info_dma_handle,
-				sizeof(mega_product_info),
-				PCI_DMA_FROMDEVICE);
-
 		pci_unmap_single(adapter->dev, prod_info_dma_handle,
 				sizeof(mega_product_info), PCI_DMA_FROMDEVICE);
 	}
@@ -1651,26 +1647,11 @@ mega_free_scb(adapter_t *adapter, scb_t 
 	case MEGA_BULK_DATA:
 		pci_unmap_page(adapter->dev, scb->dma_h_bulkdata,
 			scb->cmd->request_bufflen, scb->dma_direction);
-
-		if( scb->dma_direction == PCI_DMA_FROMDEVICE ) {
-			pci_dma_sync_single(adapter->dev,
-					scb->dma_h_bulkdata,
-					scb->cmd->request_bufflen,
-					PCI_DMA_FROMDEVICE);
-		}
-
 		break;
 
 	case MEGA_SGLIST:
 		pci_unmap_sg(adapter->dev, scb->cmd->request_buffer,
 			scb->cmd->use_sg, scb->dma_direction);
-
-		if( scb->dma_direction == PCI_DMA_FROMDEVICE ) {
-			pci_dma_sync_sg(adapter->dev,
-					scb->cmd->request_buffer,
-					scb->cmd->use_sg, PCI_DMA_FROMDEVICE);
-		}
-
 		break;
 
 	default:
@@ -1758,14 +1739,6 @@ mega_build_sglist(adapter_t *adapter, sc
 			*buf = (u32)scb->dma_h_bulkdata;
 			*len = (u32)cmd->request_bufflen;
 		}
-
-		if( scb->dma_direction == PCI_DMA_TODEVICE ) {
-			pci_dma_sync_single(adapter->dev,
-					scb->dma_h_bulkdata,
-					cmd->request_bufflen,
-					PCI_DMA_TODEVICE);
-		}
-
 		return 0;
 	}
 
@@ -1804,11 +1777,6 @@ mega_build_sglist(adapter_t *adapter, sc
 	 */
 	*len = (u32)cmd->request_bufflen;
 
-	if( scb->dma_direction == PCI_DMA_TODEVICE ) {
-		pci_dma_sync_sg(adapter->dev, sgl, cmd->use_sg,
-				PCI_DMA_TODEVICE);
-	}
-
 	/* Return count of SG requests */
 	return sgcnt;
 }
diff -puN drivers/scsi/ncr53c8xx.c~dma_sync_for_device-cpu drivers/scsi/ncr53c8xx.c
--- 25/drivers/scsi/ncr53c8xx.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/ncr53c8xx.c	2004-02-28 18:08:55.000000000 -0800
@@ -5140,9 +5140,10 @@ void ncr_complete (ncb_p np, ccb_p cp)
 		*/
 		if (cmd->cmnd[0] == 0x12 && !(cmd->cmnd[1] & 0x3) &&
 		    cmd->cmnd[4] >= 7 && !cmd->use_sg) {
-			sync_scsi_data(np, cmd);	/* SYNC the data */
+			sync_scsi_data_for_cpu(np, cmd);	/* SYNC the data */
 			ncr_setup_lcb (np, cmd->device->id, cmd->device->lun,
 				       (char *) cmd->request_buffer);
+			sync_scsi_data_for_device(np, cmd);	/* SYNC the data */
 		}
 
 		tp->bytes     += cp->data_len;
diff -puN drivers/scsi/sym53c8xx_2/sym_glue.c~dma_sync_for_device-cpu drivers/scsi/sym53c8xx_2/sym_glue.c
--- 25/drivers/scsi/sym53c8xx_2/sym_glue.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/sym53c8xx_2/sym_glue.c	2004-02-28 18:08:55.000000000 -0800
@@ -212,17 +212,32 @@ static int __map_scsi_sg_data(struct pci
 	return use_sg;
 }
 
-static void __sync_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+static void __sync_scsi_data_for_cpu(struct pci_dev *pdev, struct scsi_cmnd *cmd)
 {
 	int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction);
 
 	switch(SYM_UCMD_PTR(cmd)->data_mapped) {
 	case 2:
-		pci_dma_sync_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir);
+		pci_dma_sync_sg_for_cpu(pdev, cmd->buffer, cmd->use_sg, dma_dir);
 		break;
 	case 1:
-		pci_dma_sync_single(pdev, SYM_UCMD_PTR(cmd)->data_mapping,
-				    cmd->request_bufflen, dma_dir);
+		pci_dma_sync_single_for_cpu(pdev, SYM_UCMD_PTR(cmd)->data_mapping,
+					    cmd->request_bufflen, dma_dir);
+		break;
+	}
+}
+
+static void __sync_scsi_data_for_device(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+{
+	int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction);
+
+	switch(SYM_UCMD_PTR(cmd)->data_mapped) {
+	case 2:
+		pci_dma_sync_sg_for_device(pdev, cmd->buffer, cmd->use_sg, dma_dir);
+		break;
+	case 1:
+		pci_dma_sync_single_for_device(pdev, SYM_UCMD_PTR(cmd)->data_mapping,
+					       cmd->request_bufflen, dma_dir);
 		break;
 	}
 }
@@ -233,8 +248,10 @@ static void __sync_scsi_data(struct pci_
 		__map_scsi_single_data(np->s.device, cmd)
 #define map_scsi_sg_data(np, cmd)	\
 		__map_scsi_sg_data(np->s.device, cmd)
-#define sync_scsi_data(np, cmd)		\
-		__sync_scsi_data(np->s.device, cmd)
+#define sync_scsi_data_for_cpu(np, cmd)		\
+		__sync_scsi_data_for_cpu(np->s.device, cmd)
+#define sync_scsi_data_for_device(np, cmd)		\
+		__sync_scsi_data_for_device(np->s.device, cmd)
 
 /*
  *  Complete a pending CAM CCB.
@@ -394,10 +411,11 @@ void sym_sniff_inquiry(struct sym_hcb *n
 	if (!cmd || cmd->use_sg)
 		return;
 
-	sync_scsi_data(np, cmd);
+	sync_scsi_data_for_cpu(np, cmd);
 	retv = __sym_sniff_inquiry(np, cmd->device->id, cmd->device->lun,
 				   (u_char *) cmd->request_buffer,
 				   cmd->request_bufflen - resid);
+	sync_scsi_data_for_device(np, cmd);
 	if (retv < 0)
 		return;
 	else if (retv)
diff -puN drivers/scsi/sym53c8xx_comm.h~dma_sync_for_device-cpu drivers/scsi/sym53c8xx_comm.h
--- 25/drivers/scsi/sym53c8xx_comm.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/sym53c8xx_comm.h	2004-02-28 18:08:55.000000000 -0800
@@ -703,7 +703,8 @@ static m_addr_t __vtobus(m_bush_t bush, 
 #define __unmap_scsi_data(dev, cmd)	do {; } while (0)
 #define __map_scsi_single_data(dev, cmd) (__vtobus(dev,(cmd)->request_buffer))
 #define __map_scsi_sg_data(dev, cmd)	((cmd)->use_sg)
-#define __sync_scsi_data(dev, cmd)	do {; } while (0)
+#define __sync_scsi_data_for_cpu(dev, cmd)	do {; } while (0)
+#define __sync_scsi_data_for_device(dev, cmd)	do {; } while (0)
 
 #define scsi_sg_dma_address(sc)		vtobus((sc)->address)
 #define scsi_sg_dma_len(sc)		((sc)->length)
@@ -767,18 +768,34 @@ static int __map_scsi_sg_data(struct dev
 	return use_sg;
 }
 
-static void __sync_scsi_data(struct device *dev, Scsi_Cmnd *cmd)
+static void __sync_scsi_data_for_cpu(struct device *dev, Scsi_Cmnd *cmd)
 {
 	enum dma_data_direction dma_dir = 
 		(enum dma_data_direction)scsi_to_pci_dma_dir(cmd->sc_data_direction);
 
 	switch(cmd->__data_mapped) {
 	case 2:
-		dma_sync_sg(dev, cmd->buffer, cmd->use_sg, dma_dir);
+		dma_sync_sg_for_cpu(dev, cmd->buffer, cmd->use_sg, dma_dir);
 		break;
 	case 1:
-		dma_sync_single(dev, cmd->__data_mapping,
-				cmd->request_bufflen, dma_dir);
+		dma_sync_single_for_cpu(dev, cmd->__data_mapping,
+					cmd->request_bufflen, dma_dir);
+		break;
+	}
+}
+
+static void __sync_scsi_data_for_device(struct device *dev, Scsi_Cmnd *cmd)
+{
+	enum dma_data_direction dma_dir =
+		(enum dma_data_direction)scsi_to_pci_dma_dir(cmd->sc_data_direction);
+
+	switch(cmd->__data_mapped) {
+	case 2:
+		dma_sync_sg_for_device(dev, cmd->buffer, cmd->use_sg, dma_dir);
+		break;
+	case 1:
+		dma_sync_single_for_device(dev, cmd->__data_mapping,
+					   cmd->request_bufflen, dma_dir);
 		break;
 	}
 }
@@ -791,7 +808,8 @@ static void __sync_scsi_data(struct devi
 #define unmap_scsi_data(np, cmd)	__unmap_scsi_data(np->dev, cmd)
 #define map_scsi_single_data(np, cmd)	__map_scsi_single_data(np->dev, cmd)
 #define map_scsi_sg_data(np, cmd)	__map_scsi_sg_data(np->dev, cmd)
-#define sync_scsi_data(np, cmd)		__sync_scsi_data(np->dev, cmd)
+#define sync_scsi_data_for_cpu(np, cmd)	__sync_scsi_data_for_cpu(np->dev, cmd)
+#define sync_scsi_data_for_device(np, cmd) __sync_scsi_data_for_device(np->dev, cmd)
 
 /*==========================================================
 **
diff -puN drivers/scsi/u14-34f.c~dma_sync_for_device-cpu drivers/scsi/u14-34f.c
--- 25/drivers/scsi/u14-34f.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/scsi/u14-34f.c	2004-02-28 18:08:55.000000000 -0800
@@ -1184,17 +1184,17 @@ static void sync_dma(unsigned int i, uns
    pci_dir = scsi_to_pci_dma_dir(SCpnt->sc_data_direction);
 
    if (DEV2H(cpp->sense_addr))
-      pci_dma_sync_single(HD(j)->pdev, DEV2H(cpp->sense_addr),
+      pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr),
                           DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
 
    if (SCpnt->use_sg)
-      pci_dma_sync_sg(HD(j)->pdev, SCpnt->request_buffer,
+      pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer,
                          SCpnt->use_sg, pci_dir);
 
    if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
 
    if (DEV2H(cpp->data_address))
-      pci_dma_sync_single(HD(j)->pdev, DEV2H(cpp->data_address),
+      pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->data_address),
                        DEV2H(cpp->data_len), pci_dir);
 }
 
diff -puN drivers/usb/core/usb.c~dma_sync_for_device-cpu drivers/usb/core/usb.c
--- 25/drivers/usb/core/usb.c~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/drivers/usb/core/usb.c	2004-02-28 18:08:55.000000000 -0800
@@ -1297,6 +1297,13 @@ struct urb *usb_buffer_map (struct urb *
 	return urb;
 }
 
+/* XXX DISABLED, no users currently.  If you wish to re-enable this
+ * XXX please determine whether the sync is to transfer ownership of
+ * XXX the buffer from device to cpu or vice verse, and thusly use the
+ * XXX appropriate _for_{cpu,device}() method.  -DaveM
+ */
+#if 0
+
 /**
  * usb_buffer_dmasync - synchronize DMA and CPU view of buffer(s)
  * @urb: urb whose transfer_buffer/setup_packet will be synchronized
@@ -1325,6 +1332,7 @@ void usb_buffer_dmasync (struct urb *urb
 					DMA_TO_DEVICE);
 	}
 }
+#endif
 
 /**
  * usb_buffer_unmap - free DMA mapping(s) for an urb
@@ -1403,6 +1411,13 @@ int usb_buffer_map_sg (struct usb_device
 			usb_pipein (pipe) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 }
 
+/* XXX DISABLED, no users currently.  If you wish to re-enable this
+ * XXX please determine whether the sync is to transfer ownership of
+ * XXX the buffer from device to cpu or vice verse, and thusly use the
+ * XXX appropriate _for_{cpu,device}() method.  -DaveM
+ */
+#if 0
+
 /**
  * usb_buffer_dmasync_sg - synchronize DMA and CPU view of scatterlist buffer(s)
  * @dev: device to which the scatterlist will be mapped
@@ -1428,6 +1443,7 @@ void usb_buffer_dmasync_sg (struct usb_d
 	dma_sync_sg (controller, sg, n_hw_ents,
 			usb_pipein (pipe) ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 }
+#endif
 
 /**
  * usb_buffer_unmap_sg - free DMA mapping(s) for a scatterlist
@@ -1595,11 +1611,15 @@ EXPORT_SYMBOL (usb_buffer_alloc);
 EXPORT_SYMBOL (usb_buffer_free);
 
 EXPORT_SYMBOL (usb_buffer_map);
+#if 0
 EXPORT_SYMBOL (usb_buffer_dmasync);
+#endif
 EXPORT_SYMBOL (usb_buffer_unmap);
 
 EXPORT_SYMBOL (usb_buffer_map_sg);
+#if 0
 EXPORT_SYMBOL (usb_buffer_dmasync_sg);
+#endif
 EXPORT_SYMBOL (usb_buffer_unmap_sg);
 
 MODULE_LICENSE("GPL");
diff -puN include/asm-alpha/pci.h~dma_sync_for_device-cpu include/asm-alpha/pci.h
--- 25/include/asm-alpha/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-alpha/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -88,7 +88,7 @@ extern void pci_free_consistent(struct p
 /* Map a single buffer of the indicate size for PCI DMA in streaming
    mode.  The 32-bit PCI bus mastering address to use is returned.
    Once the device is given the dma address, the device owns this memory
-   until either pci_unmap_single or pci_dma_sync_single is performed.  */
+   until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.  */
 
 extern dma_addr_t pci_map_single(struct pci_dev *, void *, size_t, int);
 
@@ -142,28 +142,44 @@ extern int pci_map_sg(struct pci_dev *, 
 extern void pci_unmap_sg(struct pci_dev *, struct scatterlist *, int, int);
 
 /* Make physical memory consistent for a single streaming mode DMA
-   translation after a transfer.
+   translation after a transfer and device currently has ownership
+   of the buffer.
 
    If you perform a pci_map_single() but wish to interrogate the
    buffer using the cpu, yet do not wish to teardown the PCI dma
    mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, the device
-   again owns the buffer.  */
+   point you give the PCI dma address back to the card, you must first
+   perform a pci_dma_sync_for_device, and then the device again owns
+   the buffer.  */
 
 static inline void
-pci_dma_sync_single(struct pci_dev *dev, dma_addr_t dma_addr, long size,
-		    int direction)
+pci_dma_sync_single_for_cpu(struct pci_dev *dev, dma_addr_t dma_addr, long size,
+			    int direction)
+{
+	/* Nothing to do.  */
+}
+
+static inline void
+pci_dma_sync_single_for_device(struct pci_dev *dev, dma_addr_t dma_addr, long size,
+			       int direction)
 {
 	/* Nothing to do.  */
 }
 
 /* Make physical memory consistent for a set of streaming mode DMA
-   translations after a transfer.  The same as pci_dma_sync_single but
-   for a scatter-gather list, same rules and usage.  */
+   translations after a transfer.  The same as pci_dma_sync_single_*
+   but for a scatter-gather list, same rules and usage.  */
+
+static inline void
+pci_dma_sync_sg_for_cpu(struct pci_dev *dev, struct scatterlist *sg, int nents,
+			int direction)
+{
+	/* Nothing to do.  */
+}
 
 static inline void
-pci_dma_sync_sg(struct pci_dev *dev, struct scatterlist *sg, int nents,
-	        int direction)
+pci_dma_sync_sg_for_device(struct pci_dev *dev, struct scatterlist *sg, int nents,
+			int direction)
 {
 	/* Nothing to do.  */
 }
@@ -184,8 +200,14 @@ extern dma64_addr_t pci_dac_page_to_dma(
 extern struct page *pci_dac_dma_to_page(struct pci_dev *, dma64_addr_t);
 extern unsigned long pci_dac_dma_to_offset(struct pci_dev *, dma64_addr_t);
 
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+	/* Nothing to do. */
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
 {
 	/* Nothing to do. */
 }
diff -puN include/asm-arm/dma-mapping.h~dma_sync_for_device-cpu include/asm-arm/dma-mapping.h
--- 25/include/asm-arm/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-arm/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -26,8 +26,10 @@ dma_addr_t sa1111_map_single(struct devi
 void sa1111_unmap_single(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
 int sa1111_map_sg(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
 void sa1111_unmap_sg(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
-void sa1111_dma_sync_single(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
-void sa1111_dma_sync_sg(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
+void sa1111_dma_sync_single_for_cpu(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
+void sa1111_dma_sync_single_for_device(struct device *dev, dma_addr_t, size_t, enum dma_data_direction);
+void sa1111_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
+void sa1111_dma_sync_sg_for_device(struct device *dev, struct scatterlist *, int, enum dma_data_direction);
 
 #ifdef CONFIG_SA1111
 
@@ -115,7 +117,8 @@ dma_free_coherent(struct device *dev, si
  * or written back.
  *
  * The device owns this memory once this call has completed.  The CPU
- * can regain ownership by calling dma_unmap_single() or dma_sync_single().
+ * can regain ownership by calling dma_unmap_single() or
+ * dma_sync_single_for_cpu().
  */
 static inline dma_addr_t
 dma_map_single(struct device *dev, void *cpu_addr, size_t size,
@@ -140,7 +143,8 @@ dma_map_single(struct device *dev, void 
  * or written back.
  *
  * The device owns this memory once this call has completed.  The CPU
- * can regain ownership by calling dma_unmap_page() or dma_sync_single().
+ * can regain ownership by calling dma_unmap_page() or
+ * dma_sync_single_for_cpu().
  */
 static inline dma_addr_t
 dma_map_page(struct device *dev, struct page *page,
@@ -204,7 +208,7 @@ dma_unmap_page(struct device *dev, dma_a
  *
  * Map a set of buffers described by scatterlist in streaming
  * mode for DMA.  This is the scatter-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
+ * above dma_map_single interface.  Here the scatter gather list
  * elements are each tagged with the appropriate dma address
  * and length.  They are obtained via sg_dma_{address,length}(SG).
  *
@@ -214,7 +218,7 @@ dma_unmap_page(struct device *dev, dma_a
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for pci_map_single are
+ * Device ownership issues as mentioned above for dma_map_single are
  * the same here.
  */
 static inline int
@@ -246,7 +250,7 @@ dma_map_sg(struct device *dev, struct sc
  *
  * Unmap a set of streaming mode DMA translations.
  * Again, CPU read rules concerning calls here are the same as for
- * pci_unmap_single() above.
+ * dma_unmap_single() above.
  */
 static inline void
 dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
@@ -261,7 +265,7 @@ dma_unmap_sg(struct device *dev, struct 
 }
 
 /**
- * dma_sync_single
+ * dma_sync_single_for_cpu
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
  * @handle: DMA address of buffer
  * @size: size of buffer to map
@@ -270,18 +274,31 @@ dma_unmap_sg(struct device *dev, struct 
  * Make physical memory consistent for a single streaming mode DMA
  * translation after a transfer.
  *
- * If you perform a pci_map_single() but wish to interrogate the
+ * If you perform a dma_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
+ * next point you give the PCI dma address back to the card, you
+ * must first the perform a dma_sync_for_device, and then the
  * device again owns the buffer.
  */
 static inline void
-dma_sync_single(struct device *dev, dma_addr_t handle, size_t size,
-		enum dma_data_direction dir)
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size,
+			enum dma_data_direction dir)
 {
 	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_single(dev, handle, size, dir);
+		sa1111_dma_sync_single_for_cpu(dev, handle, size, dir);
+		return;
+	}
+
+	consistent_sync((void *)__bus_to_virt(handle), size, dir);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size,
+			   enum dma_data_direction dir)
+{
+	if (dmadev_is_sa1111(dev)) {
+		sa1111_dma_sync_single_for_device(dev, handle, size, dir);
 		return;
 	}
 
@@ -289,7 +306,7 @@ dma_sync_single(struct device *dev, dma_
 }
 
 /**
- * dma_sync_sg
+ * dma_sync_sg_for_cpu
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
  * @sg: list of buffers
  * @nents: number of buffers to map
@@ -298,17 +315,34 @@ dma_sync_single(struct device *dev, dma_
  * Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as dma_sync_single_for_* but for a scatter-gather list,
  * same rules and usage.
  */
 static inline void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nents,
-	    enum dma_data_direction dir)
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
+		    enum dma_data_direction dir)
+{
+	int i;
+
+	if (dmadev_is_sa1111(dev)) {
+		sa1111_dma_sync_sg_for_cpu(dev, sg, nents, dir);
+		return;
+	}
+
+	for (i = 0; i < nents; i++, sg++) {
+		char *virt = page_address(sg->page) + sg->offset;
+		consistent_sync(virt, sg->length, dir);
+	}
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents,
+		       enum dma_data_direction dir)
 {
 	int i;
 
 	if (dmadev_is_sa1111(dev)) {
-		sa1111_dma_sync_sg(dev, sg, nents, dir);
+		sa1111_dma_sync_sg_for_device(dev, sg, nents, dir);
 		return;
 	}
 
diff -puN include/asm-generic/dma-mapping.h~dma_sync_for_device-cpu include/asm-generic/dma-mapping.h
--- 25/include/asm-generic/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-generic/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -103,21 +103,41 @@ dma_unmap_sg(struct device *dev, struct 
 }
 
 static inline void
-dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	pci_dma_sync_single(to_pci_dev(dev), dma_handle, size, (int)direction);
+	pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle,
+				    size, (int)direction);
 }
 
 static inline void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
-	    enum dma_data_direction direction)
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+			   enum dma_data_direction direction)
 {
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	pci_dma_sync_sg(to_pci_dev(dev), sg, nelems, (int)direction);
+	pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle,
+				       size, (int)direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		       enum dma_data_direction direction)
+{
+	BUG_ON(dev->bus != &pci_bus_type);
+
+	pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction);
 }
 
 /* Now for the API extensions over the pci_ one */
@@ -135,12 +155,21 @@ dma_get_cache_alignment(void)
 }
 
 static inline void
-dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			      unsigned long offset, size_t size,
+			      enum dma_data_direction direction)
+{
+	/* just sync everything, that's all the pci API can do */
+	dma_sync_single_for_cpu(dev, dma_handle, offset+size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
 {
 	/* just sync everything, that's all the pci API can do */
-	dma_sync_single(dev, dma_handle, offset+size, direction);
+	dma_sync_single_for_device(dev, dma_handle, offset+size, direction);
 }
 
 static inline void
diff -puN include/asm-generic/pci-dma-compat.h~dma_sync_for_device-cpu include/asm-generic/pci-dma-compat.h
--- 25/include/asm-generic/pci-dma-compat.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-generic/pci-dma-compat.h	2004-02-28 18:08:55.000000000 -0800
@@ -71,17 +71,31 @@ pci_unmap_sg(struct pci_dev *hwdev, stru
 }
 
 static inline void
-pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle,
+pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
 		    size_t size, int direction)
 {
-	dma_sync_single(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
+	dma_sync_single_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
 }
 
 static inline void
-pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
+		    size_t size, int direction)
+{
+	dma_sync_single_for_device(hwdev == NULL ? NULL : &hwdev->dev, dma_handle, size, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg,
+		int nelems, int direction)
+{
+	dma_sync_sg_for_cpu(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
+}
+
+static inline void
+pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
 		int nelems, int direction)
 {
-	dma_sync_sg(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
+	dma_sync_sg_for_device(hwdev == NULL ? NULL : &hwdev->dev, sg, nelems, (enum dma_data_direction)direction);
 }
 
 #endif
diff -puN include/asm-i386/dma-mapping.h~dma_sync_for_device-cpu include/asm-i386/dma-mapping.h
--- 25/include/asm-i386/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-i386/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -70,24 +70,42 @@ dma_unmap_sg(struct device *dev, struct 
 }
 
 static inline void
-dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+			enum dma_data_direction direction)
 {
 	flush_write_buffers();
 }
 
 static inline void
-dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
-		      unsigned long offset, size_t size,
-		      enum dma_data_direction direction)
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+			      unsigned long offset, size_t size,
+			      enum dma_data_direction direction)
+{
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction direction)
 {
 	flush_write_buffers();
 }
 
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
+{
+}
 
 static inline void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
-		 enum dma_data_direction direction)
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+		    enum dma_data_direction direction)
 {
 	flush_write_buffers();
 }
diff -puN include/asm-i386/pci.h~dma_sync_for_device-cpu include/asm-i386/pci.h
--- 25/include/asm-i386/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-i386/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -60,27 +60,32 @@ struct pci_dev;
 /* This is always fine. */
 #define pci_dac_dma_supported(pci_dev, mask)	(1)
 
-static __inline__ dma64_addr_t
+static inline dma64_addr_t
 pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
 {
 	return ((dma64_addr_t) page_to_phys(page) +
 		(dma64_addr_t) offset);
 }
 
-static __inline__ struct page *
+static inline struct page *
 pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return pfn_to_page(dma_addr >> PAGE_SHIFT);
 }
 
-static __inline__ unsigned long
+static inline unsigned long
 pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return (dma_addr & ~PAGE_MASK);
 }
 
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
 {
 	flush_write_buffers();
 }
diff -puN include/asm-ia64/dma-mapping.h~dma_sync_for_device-cpu include/asm-ia64/dma-mapping.h
--- 25/include/asm-ia64/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ia64/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -14,8 +14,10 @@
 #define dma_map_sg		platform_dma_map_sg
 #define dma_unmap_single	platform_dma_unmap_single
 #define dma_unmap_sg		platform_dma_unmap_sg
-#define dma_sync_single		platform_dma_sync_single
-#define dma_sync_sg		platform_dma_sync_sg
+#define dma_sync_single_for_cpu	platform_dma_sync_single_for_cpu
+#define dma_sync_sg_for_cpu	platform_dma_sync_sg_for_cpu
+#define dma_sync_single_for_device platform_dma_sync_single_for_device
+#define dma_sync_sg_for_device	platform_dma_sync_sg_for_device
 
 #define dma_map_page(dev, pg, off, size, dir)				\
 	dma_map_single(dev, page_address(pg) + (off), (size), (dir))
@@ -27,8 +29,10 @@
  * See Documentation/DMA-API.txt for details.
  */
 
-#define dma_sync_single_range(dev, dma_handle, offset, size, dir)	\
-	dma_sync_single(dev, dma_handle, size, dir)
+#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir)	\
+	dma_sync_single_for_cpu(dev, dma_handle, size, dir)
+#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir)	\
+	dma_sync_single_for_device(dev, dma_handle, size, dir)
 
 #define dma_supported		platform_dma_supported
 
diff -puN include/asm-ia64/machvec.h~dma_sync_for_device-cpu include/asm-ia64/machvec.h
--- 25/include/asm-ia64/machvec.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ia64/machvec.h	2004-02-28 18:08:55.000000000 -0800
@@ -42,8 +42,10 @@ typedef dma_addr_t ia64_mv_dma_map_singl
 typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int);
 typedef int ia64_mv_dma_map_sg (struct device *, struct scatterlist *, int, int);
 typedef void ia64_mv_dma_unmap_sg (struct device *, struct scatterlist *, int, int);
-typedef void ia64_mv_dma_sync_single (struct device *, dma_addr_t, size_t, int);
-typedef void ia64_mv_dma_sync_sg (struct device *, struct scatterlist *, int, int);
+typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_t, int);
+typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int);
+typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int);
+typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int);
 typedef int ia64_mv_dma_supported (struct device *, u64);
 
 /*
@@ -104,8 +106,10 @@ extern void machvec_memory_fence (void);
 #  define platform_dma_unmap_single	ia64_mv.dma_unmap_single
 #  define platform_dma_map_sg		ia64_mv.dma_map_sg
 #  define platform_dma_unmap_sg		ia64_mv.dma_unmap_sg
-#  define platform_dma_sync_single	ia64_mv.dma_sync_single
-#  define platform_dma_sync_sg		ia64_mv.dma_sync_sg
+#  define platform_dma_sync_single_for_cpu ia64_mv.dma_sync_single_for_cpu
+#  define platform_dma_sync_sg_for_cpu	ia64_mv.dma_sync_sg_for_cpu
+#  define platform_dma_sync_single_for_device ia64_mv.dma_sync_single_for_device
+#  define platform_dma_sync_sg_for_device ia64_mv.dma_sync_sg_for_device
 #  define platform_dma_supported	ia64_mv.dma_supported
 #  define platform_irq_desc		ia64_mv.irq_desc
 #  define platform_irq_to_vector	ia64_mv.irq_to_vector
@@ -150,8 +154,10 @@ struct ia64_machine_vector {
 	ia64_mv_dma_unmap_single *dma_unmap_single;
 	ia64_mv_dma_map_sg *dma_map_sg;
 	ia64_mv_dma_unmap_sg *dma_unmap_sg;
-	ia64_mv_dma_sync_single *dma_sync_single;
-	ia64_mv_dma_sync_sg *dma_sync_sg;
+	ia64_mv_dma_sync_single *dma_sync_single_for_cpu;
+	ia64_mv_dma_sync_sg *dma_sync_sg_for_cpu;
+	ia64_mv_dma_sync_single *dma_sync_single_for_device;
+	ia64_mv_dma_sync_sg *dma_sync_sg_for_device;
 	ia64_mv_dma_supported *dma_supported;
 	ia64_mv_irq_desc *irq_desc;
 	ia64_mv_irq_to_vector *irq_to_vector;
@@ -192,8 +198,10 @@ struct ia64_machine_vector {
 	platform_dma_unmap_single,		\
 	platform_dma_map_sg,			\
 	platform_dma_unmap_sg,			\
-	platform_dma_sync_single,		\
-	platform_dma_sync_sg,			\
+	platform_dma_sync_single_for_cpu,	\
+	platform_dma_sync_sg_for_cpu,		\
+	platform_dma_sync_single_for_device,	\
+	platform_dma_sync_sg_for_device,	\
 	platform_dma_supported,			\
 	platform_irq_desc,			\
 	platform_irq_to_vector,			\
@@ -231,8 +239,10 @@ extern ia64_mv_dma_map_single		swiotlb_m
 extern ia64_mv_dma_unmap_single		swiotlb_unmap_single;
 extern ia64_mv_dma_map_sg		swiotlb_map_sg;
 extern ia64_mv_dma_unmap_sg		swiotlb_unmap_sg;
-extern ia64_mv_dma_sync_single		swiotlb_sync_single;
-extern ia64_mv_dma_sync_sg		swiotlb_sync_sg;
+extern ia64_mv_dma_sync_single_for_cpu	swiotlb_sync_single_for_cpu;
+extern ia64_mv_dma_sync_sg_for_cpu	swiotlb_sync_sg_for_cpu;
+extern ia64_mv_dma_sync_single_for_device swiotlb_sync_single_for_device;
+extern ia64_mv_dma_sync_sg_for_device	swiotlb_sync_sg_for_device;
 extern ia64_mv_dma_supported		swiotlb_dma_supported;
 
 /*
@@ -290,11 +300,17 @@ extern ia64_mv_dma_supported		swiotlb_dm
 #ifndef platform_dma_unmap_sg
 # define platform_dma_unmap_sg		swiotlb_unmap_sg
 #endif
-#ifndef platform_dma_sync_single
-# define platform_dma_sync_single	swiotlb_sync_single
+#ifndef platform_dma_sync_single_for_cpu
+# define platform_dma_sync_single_for_cpu	swiotlb_sync_single_for_cpu
 #endif
-#ifndef platform_dma_sync_sg
-# define platform_dma_sync_sg		swiotlb_sync_sg
+#ifndef platform_dma_sync_sg_for_cpu
+# define platform_dma_sync_sg_for_cpu		swiotlb_sync_sg_for_cpu
+#endif
+#ifndef platform_dma_sync_single_for_device
+# define platform_dma_sync_single_for_device	swiotlb_sync_single_for_device
+#endif
+#ifndef platform_dma_sync_sg_for_device
+# define platform_dma_sync_sg_for_device	swiotlb_sync_sg_for_device
 #endif
 #ifndef platform_dma_supported
 # define  platform_dma_supported	swiotlb_dma_supported
diff -puN include/asm-ia64/machvec_hpzx1.h~dma_sync_for_device-cpu include/asm-ia64/machvec_hpzx1.h
--- 25/include/asm-ia64/machvec_hpzx1.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ia64/machvec_hpzx1.h	2004-02-28 18:08:55.000000000 -0800
@@ -26,8 +26,10 @@ extern ia64_mv_dma_supported		sba_dma_su
 #define platform_dma_unmap_single	sba_unmap_single
 #define platform_dma_map_sg		sba_map_sg
 #define platform_dma_unmap_sg		sba_unmap_sg
-#define platform_dma_sync_single	((ia64_mv_dma_sync_single *) machvec_memory_fence)
-#define platform_dma_sync_sg		((ia64_mv_dma_sync_sg *) machvec_memory_fence)
+#define platform_dma_sync_single_for_cpu ((ia64_mv_dma_sync_single_for_cpu *) machvec_memory_fence)
+#define platform_dma_sync_sg_for_cpu	((ia64_mv_dma_sync_sg_for_cpu *) machvec_memory_fence)
+#define platform_dma_sync_single_for_device ((ia64_mv_dma_sync_single_for_device *) machvec_memory_fence)
+#define platform_dma_sync_sg_for_device	((ia64_mv_dma_sync_sg_for_device *) machvec_memory_fence)
 #define platform_dma_supported		sba_dma_supported
 
 #endif /* _ASM_IA64_MACHVEC_HPZX1_h */
diff -puN include/asm-ia64/machvec_sn2.h~dma_sync_for_device-cpu include/asm-ia64/machvec_sn2.h
--- 25/include/asm-ia64/machvec_sn2.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ia64/machvec_sn2.h	2004-02-28 18:08:55.000000000 -0800
@@ -62,8 +62,10 @@ extern ia64_mv_dma_map_single		sn_dma_ma
 extern ia64_mv_dma_unmap_single		sn_dma_unmap_single;
 extern ia64_mv_dma_map_sg		sn_dma_map_sg;
 extern ia64_mv_dma_unmap_sg		sn_dma_unmap_sg;
-extern ia64_mv_dma_sync_single		sn_dma_sync_single;
-extern ia64_mv_dma_sync_sg		sn_dma_sync_sg;
+extern ia64_mv_dma_sync_single_for_cpu	sn_dma_sync_single_for_cpu;
+extern ia64_mv_dma_sync_sg_for_cpu	sn_dma_sync_sg_for_cpu;
+extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device;
+extern ia64_mv_dma_sync_sg_for_device	sn_dma_sync_sg_for_device;
 extern ia64_mv_dma_supported		sn_dma_supported;
 
 /*
@@ -105,8 +107,10 @@ extern ia64_mv_dma_supported		sn_dma_sup
 #define platform_dma_unmap_single	sn_dma_unmap_single
 #define platform_dma_map_sg		sn_dma_map_sg
 #define platform_dma_unmap_sg		sn_dma_unmap_sg
-#define platform_dma_sync_single	sn_dma_sync_single
-#define platform_dma_sync_sg		sn_dma_sync_sg
+#define platform_dma_sync_single_for_cpu sn_dma_sync_single_for_cpu
+#define platform_dma_sync_sg_for_cpu	sn_dma_sync_sg_for_cpu
+#define platform_dma_sync_single_for_device sn_dma_sync_single_for_device
+#define platform_dma_sync_sg_for_device	sn_dma_sync_sg_for_device
 #define platform_dma_supported		sn_dma_supported
 
 #include <asm/sn/sn2/io.h>
diff -puN include/asm-ia64/pci.h~dma_sync_for_device-cpu include/asm-ia64/pci.h
--- 25/include/asm-ia64/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ia64/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -76,7 +76,8 @@ extern int pcibios_prep_mwi (struct pci_
 #define pci_dac_page_to_dma(dev,pg,off,dir)		((dma_addr_t) page_to_bus(pg) + (off))
 #define pci_dac_dma_to_page(dev,dma_addr)		(virt_to_page(bus_to_virt(dma_addr)))
 #define pci_dac_dma_to_offset(dev,dma_addr)		offset_in_page(dma_addr)
-#define pci_dac_dma_sync_single(dev,dma_addr,len,dir)	do { mb(); } while (0)
+#define pci_dac_dma_sync_single_for_cpu(dev,dma_addr,len,dir)	do { } while (0)
+#define pci_dac_dma_sync_single_for_device(dev,dma_addr,len,dir)	do { mb(); } while (0)
 
 #define sg_dma_len(sg)		((sg)->dma_length)
 #define sg_dma_address(sg)	((sg)->dma_address)
diff -puN include/asm-mips/dma-mapping.h~dma_sync_for_device-cpu include/asm-mips/dma-mapping.h
--- 25/include/asm-mips/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-mips/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -29,11 +29,17 @@ extern void dma_unmap_page(struct device
 	size_t size, enum dma_data_direction direction);
 extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	int nhwentries, enum dma_data_direction direction);
-extern void dma_sync_single(struct device *dev, dma_addr_t dma_handle,
+extern void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
 	size_t size, enum dma_data_direction direction);
-extern void dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
+extern void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+	size_t size, enum dma_data_direction direction);
+extern void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+	unsigned long offset, size_t size, enum dma_data_direction direction);
+extern void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
 	unsigned long offset, size_t size, enum dma_data_direction direction);
-extern void dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+	enum dma_data_direction direction);
+extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 	enum dma_data_direction direction);
 
 extern int dma_supported(struct device *dev, u64 mask);
diff -puN include/asm-mips/pci.h~dma_sync_for_device-cpu include/asm-mips/pci.h
--- 25/include/asm-mips/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-mips/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -82,7 +82,9 @@ extern struct page *pci_dac_dma_to_page(
 	dma64_addr_t dma_addr);
 extern unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev,
 	dma64_addr_t dma_addr);
-extern void pci_dac_dma_sync_single(struct pci_dev *pdev,
+extern void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev,
+	dma64_addr_t dma_addr, size_t len, int direction);
+extern void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev,
 	dma64_addr_t dma_addr, size_t len, int direction);
 
 #endif /* __KERNEL__ */
diff -puN include/asm-parisc/dma-mapping.h~dma_sync_for_device-cpu include/asm-parisc/dma-mapping.h
--- 25/include/asm-parisc/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-parisc/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -15,8 +15,10 @@ struct hppa_dma_ops {
 	void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction);
 	int  (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction);
 	void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nhwents, enum dma_data_direction direction);
-	void (*dma_sync_single)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
-	void (*dma_sync_sg)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
+	void (*dma_sync_single_for_cpu)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
+	void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction);
+	void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
+	void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction);
 };
 
 /*
@@ -116,28 +118,53 @@ dma_unmap_page(struct device *dev, dma_a
 
 
 static inline void
-dma_sync_single(struct device *dev, dma_addr_t dma_handle, size_t size,
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
 		enum dma_data_direction direction)
 {
-	if(hppa_dma_ops->dma_sync_single)
-		hppa_dma_ops->dma_sync_single(dev, dma_handle, 0, size, direction);
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
+		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, 0, size, direction);
 }
 
 static inline void
-dma_sync_single_range(struct device *dev, dma_addr_t dma_handle,
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+		enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_device)
+		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, 0, size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+		      unsigned long offset, size_t size,
+		      enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
+		hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, offset, size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
 		      unsigned long offset, size_t size,
 		      enum dma_data_direction direction)
 {
-	if(hppa_dma_ops->dma_sync_single)
-		hppa_dma_ops->dma_sync_single(dev, dma_handle, offset, size, direction);
+	if(hppa_dma_ops->dma_sync_single_for_device)
+		hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, offset, size, direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+		 enum dma_data_direction direction)
+{
+	if(hppa_dma_ops->dma_sync_sg_for_cpu)
+		hppa_dma_ops->dma_sync_sg_for_cpu(dev, sg, nelems, direction);
 }
 
 static inline void
-dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems,
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
 		 enum dma_data_direction direction)
 {
-	if(hppa_dma_ops->dma_sync_sg)
-		hppa_dma_ops->dma_sync_sg(dev, sg, nelems, direction);
+	if(hppa_dma_ops->dma_sync_sg_for_device)
+		hppa_dma_ops->dma_sync_sg_for_device(dev, sg, nelems, direction);
 }
 
 static inline int
@@ -166,14 +193,14 @@ dma_get_cache_alignment(void)
 static inline int
 dma_is_consistent(dma_addr_t dma_addr)
 {
-	return (hppa_dma_ops->dma_sync_single == NULL);
+	return (hppa_dma_ops->dma_sync_single_for_cpu == NULL);
 }
 
 static inline void
 dma_cache_sync(void *vaddr, size_t size,
 	       enum dma_data_direction direction)
 {
-	if(hppa_dma_ops->dma_sync_single)
+	if(hppa_dma_ops->dma_sync_single_for_cpu)
 		flush_kernel_dcache_range((unsigned long)vaddr, size);
 }
 
diff -puN include/asm-ppc64/pci.h~dma_sync_for_device-cpu include/asm-ppc64/pci.h
--- 25/include/asm-ppc64/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ppc64/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -112,17 +112,33 @@ static inline void pci_unmap_sg(struct p
 	pci_dma_ops.pci_unmap_sg(hwdev, sg, nents, direction);
 }
 
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
-				       dma_addr_t dma_handle,
-				       size_t size, int direction)
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev,
+					       dma_addr_t dma_handle,
+					       size_t size, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
 	/* nothing to do */
 }
 
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
-				   struct scatterlist *sg,
-				   int nelems, int direction)
+static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev,
+						  dma_addr_t dma_handle,
+						  size_t size, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+	/* nothing to do */
+}
+
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev,
+					   struct scatterlist *sg,
+					   int nelems, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+	/* nothing to do */
+}
+
+static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev,
+					      struct scatterlist *sg,
+					      int nelems, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
 	/* nothing to do */
diff -puN include/asm-ppc/pci.h~dma_sync_for_device-cpu include/asm-ppc/pci.h
--- 25/include/asm-ppc/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-ppc/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -91,7 +91,7 @@ extern void pci_free_consistent(struct p
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
  */
 static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
 					size_t size, int direction)
@@ -190,35 +190,58 @@ static inline void pci_unmap_sg(struct p
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the device
+ * again owns the buffer.
  */
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
-				       dma_addr_t dma_handle,
-				       size_t size, int direction)
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev,
+					       dma_addr_t dma_handle,
+					       size_t size, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE);
 
-	consistent_sync(bus_to_virt(dma_handle), size, direction);
+	consistent_sync_for_cpu(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev,
+						  dma_addr_t dma_handle,
+						  size_t size, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+	consistent_sync_for_device(bus_to_virt(dma_handle), size, direction);
 }
 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_for_* but for a scatter-gather list,
  * same rules and usage.
  */
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
-				   struct scatterlist *sg,
-				   int nelems, int direction)
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev,
+					   struct scatterlist *sg,
+					   int nelems, int direction)
 {
 	int i;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 
 	for (i = 0; i < nelems; i++, sg++)
-		consistent_sync_page(sg->page, sg->offset,
-				     sg->length, direction);
+		consistent_sync_page_for_cpu(sg->page, sg->offset,
+					     sg->length, direction);
+}
+
+static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev,
+					      struct scatterlist *sg,
+					      int nelems, int direction)
+{
+	int i;
+
+	BUG_ON(direction == PCI_DMA_NONE);
+
+	for (i = 0; i < nelems; i++, sg++)
+		consistent_sync_page_for_device(sg->page, sg->offset,
+						sg->length, direction);
 }
 
 /* Return whether the given PCI device DMA address mask can
@@ -237,26 +260,32 @@ static inline int pci_dma_supported(stru
  */
 #define pci_dac_dma_supported(pci_dev, mask)	(0)
 
-static __inline__ dma64_addr_t
+static inline dma64_addr_t
 pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
 {
 	return (dma64_addr_t) page_to_bus(page) + offset;
 }
 
-static __inline__ struct page *
+static inline struct page *
 pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return mem_map + (unsigned long)(dma_addr >> PAGE_SHIFT);
 }
 
-static __inline__ unsigned long
+static inline unsigned long
 pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return (dma_addr & ~PAGE_MASK);
 }
 
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+	/* Nothing to do. */
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
 {
 	/* Nothing to do. */
 }
diff -puN include/asm-sh/pci.h~dma_sync_for_device-cpu include/asm-sh/pci.h
--- 25/include/asm-sh/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-sh/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -84,7 +84,7 @@ extern void pci_free_consistent(struct p
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
  */
 static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
 					size_t size, int direction)
@@ -184,12 +184,21 @@ static inline void pci_unmap_sg(struct p
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
- */
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
-				       dma_addr_t dma_handle,
-				       size_t size, int direction)
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the device
+ * again owns the buffer.
+ */
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev,
+					       dma_addr_t dma_handle,
+					       size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+                BUG();
+}
+
+static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev,
+						  dma_addr_t dma_handle,
+						  size_t size, int direction)
 {
 	if (direction == PCI_DMA_NONE)
                 BUG();
@@ -203,12 +212,20 @@ static inline void pci_dma_sync_single(s
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
-				   struct scatterlist *sg,
-				   int nelems, int direction)
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev,
+					   struct scatterlist *sg,
+					   int nelems, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+                BUG();
+}
+
+static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev,
+					      struct scatterlist *sg,
+					      int nelems, int direction)
 {
 	if (direction == PCI_DMA_NONE)
                 BUG();
diff -puN include/asm-sparc64/pci.h~dma_sync_for_device-cpu include/asm-sparc64/pci.h
--- 25/include/asm-sparc64/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-sparc64/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -60,7 +60,7 @@ extern void pci_free_consistent(struct p
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
  */
 extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
 
@@ -123,19 +123,36 @@ extern void pci_unmap_sg(struct pci_dev 
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the
  * device again owns the buffer.
  */
-extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle,
-				size_t size, int direction);
+extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle,
+					size_t size, int direction);
+
+static inline void
+pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle,
+			       size_t size, int direction)
+{
+	/* No flushing needed to sync cpu writes to the device.  */
+	BUG_ON(direction == PCI_DMA_NONE);
+}
 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-extern void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+
+static inline void
+pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg,
+			int nelems, int direction)
+{
+	/* No flushing needed to sync cpu writes to the device.  */
+	BUG_ON(direction == PCI_DMA_NONE);
+}
 
 /* Return whether the given PCI device DMA address mask can
  * be supported properly.  For example, if your device can
@@ -159,14 +176,14 @@ extern int pci_dma_supported(struct pci_
 #define pci_dac_dma_supported(pci_dev, mask) \
 	((((mask) & PCI64_REQUIRED_MASK) == PCI64_REQUIRED_MASK) ? 1 : 0)
 
-static __inline__ dma64_addr_t
+static inline dma64_addr_t
 pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
 {
 	return (PCI64_ADDR_BASE +
 		__pa(page_address(page)) + offset);
 }
 
-static __inline__ struct page *
+static inline struct page *
 pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	unsigned long paddr = (dma_addr & PAGE_MASK) - PCI64_ADDR_BASE;
@@ -174,14 +191,22 @@ pci_dac_dma_to_page(struct pci_dev *pdev
 	return virt_to_page(__va(paddr));
 }
 
-static __inline__ unsigned long
+static inline unsigned long
 pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return (dma_addr & ~PAGE_MASK);
 }
 
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+	/* DAC cycle addressing does not make use of the
+	 * PCI controller's streaming cache, so nothing to do.
+	 */
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
 {
 	/* DAC cycle addressing does not make use of the
 	 * PCI controller's streaming cache, so nothing to do.
diff -puN include/asm-sparc64/sbus.h~dma_sync_for_device-cpu include/asm-sparc64/sbus.h
--- 25/include/asm-sparc64/sbus.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-sparc64/sbus.h	2004-02-28 18:08:55.000000000 -0800
@@ -111,7 +111,11 @@ extern int sbus_map_sg(struct sbus_dev *
 extern void sbus_unmap_sg(struct sbus_dev *, struct scatterlist *, int, int);
 
 /* Finally, allow explicit synchronization of streamable mappings. */
-extern void sbus_dma_sync_single(struct sbus_dev *, dma_addr_t, size_t, int);
-extern void sbus_dma_sync_sg(struct sbus_dev *, struct scatterlist *, int, int);
+extern void sbus_dma_sync_single_for_cpu(struct sbus_dev *, dma_addr_t, size_t, int);
+#define sbus_dma_sync_single sbus_dma_sync_single_for_cpu
+extern void sbus_dma_sync_single_for_device(struct sbus_dev *, dma_addr_t, size_t, int);
+extern void sbus_dma_sync_sg_for_cpu(struct sbus_dev *, struct scatterlist *, int, int);
+#define sbus_dma_sync_sg sbus_dma_sync_sg_for_cpu
+extern void sbus_dma_sync_sg_for_device(struct sbus_dev *, struct scatterlist *, int, int);
 
 #endif /* !(_SPARC64_SBUS_H) */
diff -puN include/asm-sparc/pci.h~dma_sync_for_device-cpu include/asm-sparc/pci.h
--- 25/include/asm-sparc/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-sparc/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -52,7 +52,7 @@ extern void pci_free_consistent(struct p
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
  */
 extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
 
@@ -116,18 +116,21 @@ extern void pci_unmap_sg(struct pci_dev 
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
- * device again owns the buffer.
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the device
+ * again owns the buffer.
  */
-extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
+extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
+extern void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
 
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-extern void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
+extern void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
 
 /* Return whether the given PCI device DMA address mask can
  * be supported properly.  For example, if your device can
diff -puN include/asm-sparc/sbus.h~dma_sync_for_device-cpu include/asm-sparc/sbus.h
--- 25/include/asm-sparc/sbus.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-sparc/sbus.h	2004-02-28 18:08:55.000000000 -0800
@@ -118,8 +118,12 @@ extern int sbus_map_sg(struct sbus_dev *
 extern void sbus_unmap_sg(struct sbus_dev *, struct scatterlist *, int, int);
 
 /* Finally, allow explicit synchronization of streamable mappings. */
-extern void sbus_dma_sync_single(struct sbus_dev *, dma_addr_t, size_t, int);
-extern void sbus_dma_sync_sg(struct sbus_dev *, struct scatterlist *, int, int);
+extern void sbus_dma_sync_single_for_cpu(struct sbus_dev *, dma_addr_t, size_t, int);
+#define sbus_dma_sync_single sbus_dma_sync_single_for_cpu
+extern void sbus_dma_sync_single_for_device(struct sbus_dev *, dma_addr_t, size_t, int);
+extern void sbus_dma_sync_sg_for_cpu(struct sbus_dev *, struct scatterlist *, int, int);
+#define sbus_dma_sync_sg sbus_dma_sync_sg_for_cpu
+extern void sbus_dma_sync_sg_for_device(struct sbus_dev *, struct scatterlist *, int, int);
 
 /* Eric Brower (ebrower@usa.net)
  * Translate SBus interrupt levels to ino values--
diff -puN include/asm-v850/pci.h~dma_sync_for_device-cpu include/asm-v850/pci.h
--- 25/include/asm-v850/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-v850/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -27,7 +27,7 @@ extern void pcibios_set_master (struct p
 
 /* `Grant' to PDEV the memory block at CPU_ADDR, for doing DMA.  The
    32-bit PCI bus mastering address to use is returned.  the device owns
-   this memory until either pci_unmap_single or pci_dma_sync_single is
+   this memory until either pci_unmap_single or pci_dma_sync_single_for_cpu is
    performed.  */
 extern dma_addr_t
 pci_map_single (struct pci_dev *pdev, void *cpu_addr, size_t size, int dir);
@@ -44,10 +44,15 @@ pci_unmap_single (struct pci_dev *pdev, 
    If you perform a pci_map_single() but wish to interrogate the
    buffer using the cpu, yet do not wish to teardown the PCI dma
    mapping, you must call this function before doing so.  At the next
-   point you give the PCI dma address back to the card, the device
-   again owns the buffer.  */
+   point you give the PCI dma address back to the card, you must first
+   perform a pci_dma_sync_for_device, and then the device again owns
+   the buffer.  */
 extern void
-pci_dma_sync_single (struct pci_dev *dev, dma_addr_t dma_addr, size_t size,
+pci_dma_sync_single_for_cpu (struct pci_dev *dev, dma_addr_t dma_addr, size_t size,
+		     int dir);
+
+extern void
+pci_dma_sync_single_for_device (struct pci_dev *dev, dma_addr_t dma_addr, size_t size,
 		     int dir);
 
 
diff -puN include/asm-x86_64/pci.h~dma_sync_for_device-cpu include/asm-x86_64/pci.h
--- 25/include/asm-x86_64/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/asm-x86_64/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -78,10 +78,18 @@ extern dma_addr_t swiotlb_map_single (st
 				      int dir);
 extern void swiotlb_unmap_single (struct device *hwdev, dma_addr_t dev_addr,
 				  size_t size, int dir);
-extern void swiotlb_sync_single (struct device *hwdev, dma_addr_t dev_addr, 
-				 size_t size, int dir);
-extern void swiotlb_sync_sg (struct device *hwdev, struct scatterlist *sg, int nelems, 
-			     int dir);
+extern void swiotlb_sync_single_for_cpu (struct device *hwdev,
+					 dma_addr_t dev_addr,
+					 size_t size, int dir);
+extern void swiotlb_sync_single_for_device (struct device *hwdev,
+					    dma_addr_t dev_addr,
+					    size_t size, int dir);
+extern void swiotlb_sync_sg_for_cpu (struct device *hwdev,
+				     struct scatterlist *sg, int nelems,
+				     int dir);
+extern void swiotlb_sync_sg_for_device (struct device *hwdev,
+					struct scatterlist *sg, int nelems,
+					int dir);
 extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
 		      int nents, int direction);
 extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
@@ -95,7 +103,7 @@ extern void swiotlb_unmap_sg(struct devi
  * The 32-bit bus address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory
- * until either pci_unmap_single or pci_dma_sync_single is performed.
+ * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed.
  */
 extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, 
 				 int direction);
@@ -125,29 +133,56 @@ void pci_unmap_single(struct pci_dev *hw
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
 	(((PTR)->LEN_NAME) = (VAL))
 
-static inline void pci_dma_sync_single(struct pci_dev *hwdev, 
-				       dma_addr_t dma_handle,
-				       size_t size, int direction)
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev,
+					       dma_addr_t dma_handle,
+					       size_t size, int direction)
 {
 	BUG_ON(direction == PCI_DMA_NONE); 
 
 #ifdef CONFIG_SWIOTLB
 	if (swiotlb)
-		return swiotlb_sync_single(&hwdev->dev,dma_handle,size,direction);
+		return swiotlb_sync_single_for_cpu(&hwdev->dev,dma_handle,size,direction);
 #endif
 
 	flush_write_buffers();
 } 
 
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev, 
-				   struct scatterlist *sg,
-				   int nelems, int direction)
+static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev,
+						  dma_addr_t dma_handle,
+						  size_t size, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb)
+		return swiotlb_sync_single_for_device(&hwdev->dev,dma_handle,size,direction);
+#endif
+
+	flush_write_buffers();
+}
+
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev,
+					   struct scatterlist *sg,
+					   int nelems, int direction)
+{
+	BUG_ON(direction == PCI_DMA_NONE);
+
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb)
+		return swiotlb_sync_sg_for_cpu(&hwdev->dev,sg,nelems,direction);
+#endif
+	flush_write_buffers();
+}
+
+static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev,
+					      struct scatterlist *sg,
+					      int nelems, int direction)
 { 
 	BUG_ON(direction == PCI_DMA_NONE); 
 
 #ifdef CONFIG_SWIOTLB
 	if (swiotlb)
-		return swiotlb_sync_sg(&hwdev->dev,sg,nelems,direction);
+		return swiotlb_sync_sg_for_device(&hwdev->dev,sg,nelems,direction);
 #endif
 	flush_write_buffers();
 } 
@@ -218,12 +253,21 @@ static inline dma_addr_t pci_map_page(st
  * If you perform a pci_map_single() but wish to interrogate the
  * buffer using the cpu, yet do not wish to teardown the PCI dma
  * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, the
+ * next point you give the PCI dma address back to the card, you
+ * must first perform a pci_dma_sync_for_device, and then the
  * device again owns the buffer.
  */
-static inline void pci_dma_sync_single(struct pci_dev *hwdev,
-				       dma_addr_t dma_handle,
-				       size_t size, int direction)
+static inline void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev,
+					       dma_addr_t dma_handle,
+					       size_t size, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+}
+
+static inline void pci_dma_sync_single_for_device(struct pci_dev *hwdev,
+						  dma_addr_t dma_handle,
+						  size_t size, int direction)
 {
 	if (direction == PCI_DMA_NONE)
 		out_of_line_bug();
@@ -233,12 +277,20 @@ static inline void pci_dma_sync_single(s
 /* Make physical memory consistent for a set of streaming
  * mode DMA translations after a transfer.
  *
- * The same as pci_dma_sync_single but for a scatter-gather list,
+ * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
-				   struct scatterlist *sg,
-				   int nelems, int direction)
+static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev,
+					   struct scatterlist *sg,
+					   int nelems, int direction)
+{
+	if (direction == PCI_DMA_NONE)
+		out_of_line_bug();
+}
+
+static inline void pci_dma_sync_sg_for_device(struct pci_dev *hwdev,
+					      struct scatterlist *sg,
+					      int nelems, int direction)
 {
 	if (direction == PCI_DMA_NONE)
 		out_of_line_bug();
@@ -264,27 +316,32 @@ extern void pci_unmap_sg(struct pci_dev 
  */
 extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask);
 
-static __inline__ dma64_addr_t
+static inline dma64_addr_t
 pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
 {
 	return ((dma64_addr_t) page_to_phys(page) +
 		(dma64_addr_t) offset);
 }
 
-static __inline__ struct page *
+static inline struct page *
 pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return virt_to_page(__va(dma_addr)); 	
 }
 
-static __inline__ unsigned long
+static inline unsigned long
 pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
 {
 	return (dma_addr & ~PAGE_MASK);
 }
 
-static __inline__ void
-pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+static inline void
+pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+}
+
+static inline void
+pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
 {
 	flush_write_buffers();
 }
diff -puN include/linux/dma-mapping.h~dma_sync_for_device-cpu include/linux/dma-mapping.h
--- 25/include/linux/dma-mapping.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/linux/dma-mapping.h	2004-02-28 18:08:55.000000000 -0800
@@ -12,6 +12,10 @@ enum dma_data_direction {
 
 #include <asm/dma-mapping.h>
 
+/* Backwards compat, remove in 2.7.x */
+#define dma_sync_single		dma_sync_single_for_cpu
+#define dma_sync_sg		dma_sync_sg_for_cpu
+
 #endif
 
 
diff -puN include/linux/pci.h~dma_sync_for_device-cpu include/linux/pci.h
--- 25/include/linux/pci.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/linux/pci.h	2004-02-28 18:08:55.000000000 -0800
@@ -723,6 +723,10 @@ extern int msi_free_vectors(struct pci_d
 
 #include <asm/pci.h>
 
+/* Backwards compat, remove in 2.7.x */
+#define pci_dma_sync_single	pci_dma_sync_single_for_cpu
+#define pci_dma_sync_sg		pci_dma_sync_sg_for_cpu
+
 /*
  *  If the system does not have PCI, clearly these return errors.  Define
  *  these as simple inline functions to avoid hair in drivers.
diff -puN include/linux/usb_gadget.h~dma_sync_for_device-cpu include/linux/usb_gadget.h
--- 25/include/linux/usb_gadget.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/linux/usb_gadget.h	2004-02-28 18:08:55.000000000 -0800
@@ -117,7 +117,7 @@ struct usb_ep_ops {
 	void (*free_buffer) (struct usb_ep *ep, void *buf, dma_addr_t dma,
 		unsigned bytes);
 	// NOTE:  on 2.5, drivers may also use dma_map() and
-	// dma_sync_single() to manage dma overhead. 
+	// dma_sync_single_*() to manage dma overhead.
 
 	int (*queue) (struct usb_ep *ep, struct usb_request *req,
 		int gfp_flags);
diff -puN include/linux/usb.h~dma_sync_for_device-cpu include/linux/usb.h
--- 25/include/linux/usb.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/linux/usb.h	2004-02-28 18:08:55.000000000 -0800
@@ -830,14 +830,18 @@ void usb_buffer_free (struct usb_device 
 	void *addr, dma_addr_t dma);
 
 struct urb *usb_buffer_map (struct urb *urb);
+#if 0
 void usb_buffer_dmasync (struct urb *urb);
+#endif
 void usb_buffer_unmap (struct urb *urb);
 
 struct scatterlist;
 int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
 		struct scatterlist *sg, int nents);
+#if 0
 void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
 		struct scatterlist *sg, int n_hw_ents);
+#endif
 void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe,
 		struct scatterlist *sg, int n_hw_ents);
 
diff -puN include/net/irda/vlsi_ir.h~dma_sync_for_device-cpu include/net/irda/vlsi_ir.h
--- 25/include/net/irda/vlsi_ir.h~dma_sync_for_device-cpu	2004-02-28 18:08:54.000000000 -0800
+++ 25-akpm/include/net/irda/vlsi_ir.h	2004-02-28 18:08:55.000000000 -0800
@@ -41,19 +41,6 @@
 #define PCI_CLASS_SUBCLASS_MASK		0xffff
 #endif
 
-/* missing pci-dma api call to give streaming dma buffer back to hw
- * patch was floating on lkml around 2.5.2x and might be present later.
- * Defining it this way is ok, since the vlsi-ir is only
- * used on two oldish x86-based notebooks which are cache-coherent
- * (and flush_write_buffers also handles PPro errata and C3 OOstore)
- */
-#ifdef CONFIG_X86
-#include <asm-i386/io.h>
-#define pci_dma_prep_single(dev, addr, size, direction)	flush_write_buffers()
-#else
-#error missing pci dma api call
-#endif
-
 /* in recent 2.5 interrupt handlers have non-void return value */
 #ifndef IRQ_RETVAL
 typedef void irqreturn_t;

_