patch-2.4.19 linux-2.4.19/arch/ppc64/kernel/pci_dma.c
Next file: linux-2.4.19/arch/ppc64/kernel/pci_dn.c
Previous file: linux-2.4.19/arch/ppc64/kernel/pci.h
Back to the patch index
Back to the overall index
- Lines: 1497
- Date:
Fri Aug 2 17:39:43 2002
- Orig file:
linux-2.4.18/arch/ppc64/kernel/pci_dma.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -urN linux-2.4.18/arch/ppc64/kernel/pci_dma.c linux-2.4.19/arch/ppc64/kernel/pci_dma.c
@@ -0,0 +1,1496 @@
+/*
+ * pci_dma.c
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Dynamic DMA mapping support.
+ *
+ * Manages the TCE space assigned to this partition.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/ppcdebug.h>
+
+#include <asm/iSeries/HvCallXm.h>
+#include <asm/iSeries/LparData.h>
+#include <asm/pci_dma.h>
+#include <asm/pci-bridge.h>
+#include <asm/iSeries/iSeries_pci.h>
+
+#include <asm/machdep.h>
+
+#include "pci.h"
+
+/* #define DEBUG_TCE 1 */
+/* #define MONITOR_TCE 1 */ /* Turn on to sanity check TCE generation. */
+
+
+/* Initialize so this guy does not end up in the BSS section.
+ * Only used to pass OF initialization data set in prom.c into the main
+ * kernel code -- data ultimately copied into tceTables[].
+ */
+extern struct _of_tce_table of_tce_table[];
+
+extern struct pci_controller* hose_head;
+extern struct pci_controller** hose_tail;
+extern struct list_head iSeries_Global_Device_List;
+
+struct TceTable virtBusVethTceTable; /* Tce table for virtual ethernet */
+struct TceTable virtBusVioTceTable; /* Tce table for virtual I/O */
+
+struct iSeries_Device_Node iSeries_veth_dev_node = { LogicalSlot: 0xFF, DevTceTable: &virtBusVethTceTable };
+struct iSeries_Device_Node iSeries_vio_dev_node = { LogicalSlot: 0xFF, DevTceTable: &virtBusVioTceTable };
+
+struct pci_dev iSeries_veth_dev_st = { sysdata: &iSeries_veth_dev_node };
+struct pci_dev iSeries_vio_dev_st = { sysdata: &iSeries_vio_dev_node };
+
+struct pci_dev * iSeries_veth_dev = &iSeries_veth_dev_st;
+struct pci_dev * iSeries_vio_dev = &iSeries_vio_dev_st;
+
+/* Device TceTable is stored in Device Node */
+/* struct TceTable * tceTables[256]; */ /* Tce tables for 256 busses
+ * Bus 255 is the virtual bus
+ * zero indicates no bus defined
+ */
+/* allocates a contiguous range of tces (power-of-2 size) */
+static inline long alloc_tce_range(struct TceTable *,
+ unsigned order );
+
+/* allocates a contiguous range of tces (power-of-2 size)
+ * assumes lock already held
+ */
+static long alloc_tce_range_nolock(struct TceTable *,
+ unsigned order );
+
+/* frees a contiguous range of tces (power-of-2 size) */
+static inline void free_tce_range(struct TceTable *,
+ long tcenum,
+ unsigned order );
+
+/* frees a contiguous rnage of tces (power-of-2 size)
+ * assumes lock already held
+ */
+void free_tce_range_nolock(struct TceTable *,
+ long tcenum,
+ unsigned order );
+
+/* allocates a range of tces and sets them to the pages */
+static inline dma_addr_t get_tces( struct TceTable *,
+ unsigned order,
+ void *page,
+ unsigned numPages,
+ int direction );
+
+static long test_tce_range( struct TceTable *,
+ long tcenum,
+ unsigned order );
+
+static unsigned fill_scatterlist_sg(struct scatterlist *sg, int nents,
+ dma_addr_t dma_addr,
+ unsigned long numTces );
+
+static unsigned long num_tces_sg( struct scatterlist *sg,
+ int nents );
+
+static dma_addr_t create_tces_sg( struct TceTable *tbl,
+ struct scatterlist *sg,
+ int nents,
+ unsigned numTces,
+ int direction );
+
+static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
+ struct TceTable *tce_table_parms );
+
+static void getTceTableParmsPSeries( struct pci_controller *phb,
+ struct device_node *dn,
+ struct TceTable *tce_table_parms );
+
+static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
+ struct device_node *dn,
+ struct TceTable *newTceTable );
+
+static struct TceTable* findHwTceTable(struct TceTable * newTceTable );
+
+void create_pci_bus_tce_table( unsigned long token );
+
+u8 iSeries_Get_Bus( struct pci_dev * dv )
+{
+ return 0;
+}
+
+static inline struct TceTable *get_tce_table(struct pci_dev *dev)
+{
+ if (!dev)
+ dev = ppc64_isabridge_dev;
+ if (!dev)
+ return NULL;
+ if (naca->platform == PLATFORM_ISERIES_LPAR) {
+ return ISERIES_DEVNODE(dev)->DevTceTable;
+ } else {
+ return PCI_GET_DN(dev)->tce_table;
+ }
+}
+
+static unsigned long __inline__ count_leading_zeros64( unsigned long x )
+{
+ unsigned long lz;
+ asm("cntlzd %0,%1" : "=r"(lz) : "r"(x));
+ return lz;
+}
+
+static void tce_build_iSeries(struct TceTable *tbl, long tcenum,
+ unsigned long uaddr, int direction )
+{
+ u64 setTceRc;
+ union Tce tce;
+
+ PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
+ PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
+ tcenum, tbl, tbl->index);
+
+ tce.wholeTce = 0;
+ tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
+
+ /* If for virtual bus */
+ if ( tbl->tceType == TCE_VB ) {
+ tce.tceBits.valid = 1;
+ tce.tceBits.allIo = 1;
+ if ( direction != PCI_DMA_TODEVICE )
+ tce.tceBits.readWrite = 1;
+ } else {
+ /* If for PCI bus */
+ tce.tceBits.readWrite = 1; // Read allowed
+ if ( direction != PCI_DMA_TODEVICE )
+ tce.tceBits.pciWrite = 1;
+ }
+
+ setTceRc = HvCallXm_setTce((u64)tbl->index,
+ (u64)tcenum,
+ tce.wholeTce );
+ if(setTceRc) {
+ panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", setTceRc);
+ }
+}
+
+static void tce_build_pSeries(struct TceTable *tbl, long tcenum,
+ unsigned long uaddr, int direction )
+{
+ union Tce tce;
+ union Tce *tce_addr;
+
+ PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
+ PPCDBG(PPCDBG_TCE, "\ttcenum = 0x%lx, tbl = 0x%lx, index=%lx\n",
+ tcenum, tbl, tbl->index);
+
+ tce.wholeTce = 0;
+ tce.tceBits.rpn = (virt_to_absolute(uaddr)) >> PAGE_SHIFT;
+
+ tce.tceBits.readWrite = 1; // Read allowed
+ if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1;
+
+ tce_addr = ((union Tce *)tbl->base) + tcenum;
+ *tce_addr = (union Tce)tce.wholeTce;
+
+}
+
+/*
+ * Build a TceTable structure. This contains a multi-level bit map which
+ * is used to manage allocation of the tce space.
+ */
+static struct TceTable *build_tce_table( struct TceTable * tbl )
+{
+ unsigned long bits, bytes, totalBytes;
+ unsigned long numBits[NUM_TCE_LEVELS], numBytes[NUM_TCE_LEVELS];
+ unsigned i, k, m;
+ unsigned char * pos, * p, b;
+
+ PPCDBG(PPCDBG_TCEINIT, "build_tce_table: tbl = 0x%lx\n", tbl);
+ spin_lock_init( &(tbl->lock) );
+
+ tbl->mlbm.maxLevel = 0;
+
+ /* Compute number of bits and bytes for each level of the
+ * multi-level bit map
+ */
+ totalBytes = 0;
+ bits = tbl->size * (PAGE_SIZE / sizeof( union Tce ));
+
+ for ( i=0; i<NUM_TCE_LEVELS; ++i ) {
+ bytes = ((bits+63)/64) * 8;
+ PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d bits=%ld, bytes=%ld\n", i, bits, bytes );
+ numBits[i] = bits;
+ numBytes[i] = bytes;
+ bits /= 2;
+ totalBytes += bytes;
+ }
+ PPCDBG(PPCDBG_TCEINIT, "build_tce_table: totalBytes=%ld\n", totalBytes );
+
+ pos = (char *)__get_free_pages( GFP_ATOMIC, get_order( totalBytes ));
+
+ if ( pos == NULL ) {
+ panic("PCI_DMA: Allocation failed in build_tce_table!\n");
+ }
+
+ /* For each level, fill in the pointer to the bit map,
+ * and turn on the last bit in the bit map (if the
+ * number of bits in the map is odd). The highest
+ * level will get all of its bits turned on.
+ */
+ memset( pos, 0, totalBytes );
+ for (i=0; i<NUM_TCE_LEVELS; ++i) {
+ if ( numBytes[i] ) {
+ tbl->mlbm.level[i].map = pos;
+ tbl->mlbm.maxLevel = i;
+
+ if ( numBits[i] & 1 ) {
+ p = pos + numBytes[i] - 1;
+ m = (( numBits[i] % 8) - 1) & 7;
+ *p = 0x80 >> m;
+ PPCDBG(PPCDBG_TCEINIT, "build_tce_table: level %d last bit %x\n", i, 0x80>>m );
+ }
+ }
+ else
+ tbl->mlbm.level[i].map = 0;
+ pos += numBytes[i];
+ tbl->mlbm.level[i].numBits = numBits[i];
+ tbl->mlbm.level[i].numBytes = numBytes[i];
+ }
+
+ /* For the highest level, turn on all the bits */
+
+ i = tbl->mlbm.maxLevel;
+ p = tbl->mlbm.level[i].map;
+ m = numBits[i];
+ PPCDBG(PPCDBG_TCEINIT, "build_tce_table: highest level (%d) has all bits set\n", i);
+ for (k=0; k<numBytes[i]; ++k) {
+ if ( m >= 8 ) {
+ /* handle full bytes */
+ *p++ = 0xff;
+ m -= 8;
+ }
+ else if(m>0) {
+ /* handle the last partial byte */
+ b = 0x80;
+ *p = 0;
+ while (m) {
+ *p |= b;
+ b >>= 1;
+ --m;
+ }
+ } else {
+ break;
+ }
+ }
+
+ return tbl;
+}
+
+static inline long alloc_tce_range( struct TceTable *tbl, unsigned order )
+{
+ long retval;
+ unsigned long flags;
+
+ /* Lock the tce allocation bitmap */
+ spin_lock_irqsave( &(tbl->lock), flags );
+
+ /* Do the actual work */
+ retval = alloc_tce_range_nolock( tbl, order );
+
+ /* Unlock the tce allocation bitmap */
+ spin_unlock_irqrestore( &(tbl->lock), flags );
+
+ return retval;
+}
+
+static long alloc_tce_range_nolock( struct TceTable *tbl, unsigned order )
+{
+ unsigned long numBits, numBytes;
+ unsigned long i, bit, block, mask;
+ long tcenum;
+ u64 * map;
+
+ /* If the order (power of 2 size) requested is larger than our
+ * biggest, indicate failure
+ */
+ if(order >= NUM_TCE_LEVELS) {
+ /* This can happen if block of TCE's are not found. This code */
+ /* maybe in a recursive loop looking up the bit map for the range.*/
+ panic("PCI_DMA: alloc_tce_range_nolock: invalid order: %d\n",order);
+ }
+
+ numBits = tbl->mlbm.level[order].numBits;
+ numBytes = tbl->mlbm.level[order].numBytes;
+ map = (u64 *)tbl->mlbm.level[order].map;
+
+ /* Initialize return value to -1 (failure) */
+ tcenum = -1;
+
+ /* Loop through the bytes of the bitmap */
+ for (i=0; i<numBytes/8; ++i) {
+ if ( *map ) {
+ /* A free block is found, compute the block
+ * number (of this size)
+ */
+ bit = count_leading_zeros64( *map );
+ block = (i * 64) + bit; /* Bit count to free entry */
+
+ /* turn off the bit in the map to indicate
+ * that the block is now in use
+ */
+ mask = 0x1UL << (63 - bit);
+ *map &= ~mask;
+
+ /* compute the index into our tce table for
+ * the first tce in the block
+ */
+ PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: allocating block %ld, (byte=%ld, bit=%ld) order %d\n", block, i, bit, order );
+ tcenum = block << order;
+ return tcenum;
+ }
+ ++map;
+ }
+
+#ifdef DEBUG_TCE
+ if ( tcenum == -1 ) {
+ PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: no available blocks of order = %d\n", order );
+ if ( order < tbl->mlbm.maxLevel ) {
+ PPCDBG(PPCDBG_TCE, "alloc_tce_range_nolock: trying next bigger size\n" );
+ }
+ else {
+ panic("PCI_DMA: alloc_tce_range_nolock: maximum size reached...failing\n");
+ }
+ }
+#endif
+
+ /* If no block of the requested size was found, try the next
+ * size bigger. If one of those is found, return the second
+ * half of the block to freespace and keep the first half
+ */
+ if((tcenum == -1) && (order < (NUM_TCE_LEVELS - 1))) {
+ tcenum = alloc_tce_range_nolock( tbl, order+1 );
+ if ( tcenum != -1 ) {
+ free_tce_range_nolock( tbl, tcenum+(1<<order), order );
+ }
+ }
+
+ /* Return the index of the first tce in the block
+ * (or -1 if we failed)
+ */
+ return tcenum;
+}
+
+static inline void free_tce_range(struct TceTable *tbl,
+ long tcenum, unsigned order )
+{
+ unsigned long flags;
+
+ /* Lock the tce allocation bitmap */
+ spin_lock_irqsave( &(tbl->lock), flags );
+
+ /* Do the actual work */
+ free_tce_range_nolock( tbl, tcenum, order );
+
+ /* Unlock the tce allocation bitmap */
+ spin_unlock_irqrestore( &(tbl->lock), flags );
+
+}
+
+void free_tce_range_nolock(struct TceTable *tbl,
+ long tcenum, unsigned order )
+{
+ unsigned long block;
+ unsigned byte, bit, mask, b;
+ unsigned char * map, * bytep;
+
+ if (order >= NUM_TCE_LEVELS) {
+ panic("PCI_DMA: free_tce_range: invalid order: 0x%x\n",order);
+ return;
+ }
+
+ block = tcenum >> order;
+
+#ifdef MONITOR_TCE
+ if ( tcenum != (block << order ) ) {
+ printk("PCI_DMA: Free_tce_range: tcenum %lx misaligned for order %x\n",tcenum, order);
+ return;
+ }
+ if ( block >= tbl->mlbm.level[order].numBits ) {
+ printk("PCI_DMA: Free_tce_range: tcenum %lx is outside the range of this map (order %x, numBits %lx\n",
+ tcenum, order, tbl->mlbm.level[order].numBits );
+ return;
+ }
+ if ( test_tce_range( tbl, tcenum, order ) ) {
+ printk("PCI_DMA: Freeing range not allocated: tTceTable %p, tcenum %lx, order %x\n",tbl, tcenum, order );
+ return;
+ }
+#endif
+
+ map = tbl->mlbm.level[order].map;
+ byte = block / 8;
+ bit = block % 8;
+ mask = 0x80 >> bit;
+ bytep = map + byte;
+
+#ifdef DEBUG_TCE
+ PPCDBG(PPCDBG_TCE,"free_tce_range_nolock: freeing block %ld (byte=%d, bit=%d) of order %d\n",
+ block, byte, bit, order);
+#endif
+
+#ifdef MONITOR_TCE
+ if ( *bytep & mask ) {
+ panic("PCI_DMA: Tce already free: TceTable %p, tcenum %lx, order %x\n",tbl,tcenum,order);
+ }
+#endif
+
+ *bytep |= mask;
+
+ /* If there is a higher level in the bit map than this we may be
+ * able to buddy up this block with its partner.
+ * If this is the highest level we can't buddy up
+ * If this level has an odd number of bits and
+ * we are freeing the last block we can't buddy up
+ * Don't buddy up if it's in the first 1/4 of the level
+ */
+ if (( order < tbl->mlbm.maxLevel ) &&
+ ( block > (tbl->mlbm.level[order].numBits/4) ) &&
+ (( block < tbl->mlbm.level[order].numBits-1 ) ||
+ ( 0 == ( tbl->mlbm.level[order].numBits & 1)))) {
+ /* See if we can buddy up the block we just freed */
+ bit &= 6; /* get to the first of the buddy bits */
+ mask = 0xc0 >> bit; /* build two bit mask */
+ b = *bytep & mask; /* Get the two bits */
+ if ( 0 == (b ^ mask) ) { /* If both bits are on */
+ /* both of the buddy blocks are free we can combine them */
+ *bytep ^= mask; /* turn off the two bits */
+ block = ( byte * 8 ) + bit; /* block of first of buddies */
+ tcenum = block << order;
+ /* free the buddied block */
+ PPCDBG(PPCDBG_TCE,
+ "free_tce_range: buddying blocks %ld & %ld\n",
+ block, block+1);
+ free_tce_range_nolock( tbl, tcenum, order+1 );
+ }
+ }
+}
+
+static long test_tce_range( struct TceTable *tbl, long tcenum, unsigned order )
+{
+ unsigned long block;
+ unsigned byte, bit, mask, b;
+ long retval, retLeft, retRight;
+ unsigned char * map;
+
+ map = tbl->mlbm.level[order].map;
+ block = tcenum >> order;
+ byte = block / 8; /* Byte within bitmap */
+ bit = block % 8; /* Bit within byte */
+ mask = 0x80 >> bit;
+ b = (*(map+byte) & mask ); /* 0 if block is allocated, else free */
+ if ( b )
+ retval = 1; /* 1 == block is free */
+ else
+ retval = 0; /* 0 == block is allocated */
+ /* Test bits at all levels below this to ensure that all agree */
+
+ if (order) {
+ retLeft = test_tce_range( tbl, tcenum, order-1 );
+ retRight = test_tce_range( tbl, tcenum+(1<<(order-1)), order-1 );
+ if ( retLeft || retRight ) {
+ retval = 2;
+ }
+ }
+
+ /* Test bits at all levels above this to ensure that all agree */
+
+ return retval;
+}
+
+static inline dma_addr_t get_tces( struct TceTable *tbl, unsigned order, void *page, unsigned numPages, int direction )
+{
+ long tcenum;
+ unsigned long uaddr;
+ unsigned i;
+ dma_addr_t retTce = NO_TCE;
+
+ uaddr = (unsigned long)page & PAGE_MASK;
+
+ /* Allocate a range of tces */
+ tcenum = alloc_tce_range( tbl, order );
+ if ( tcenum != -1 ) {
+ /* We got the tces we wanted */
+ tcenum += tbl->startOffset; /* Offset into real TCE table */
+ retTce = tcenum << PAGE_SHIFT; /* Set the return dma address */
+ /* Setup a tce for each page */
+ for (i=0; i<numPages; ++i) {
+ ppc_md.tce_build(tbl, tcenum, uaddr, direction);
+ ++tcenum;
+ uaddr += PAGE_SIZE;
+ }
+ /* Make sure the update is visible to hardware.
+ sync required to synchronize the update to
+ the TCE table with the MMIO that will send
+ the bus address to the IOA */
+ __asm__ __volatile__ ("sync" : : : "memory");
+ }
+ else {
+ panic("PCI_DMA: Tce Allocation failure in get_tces. 0x%p\n",tbl);
+ }
+
+ return retTce;
+}
+
+static void tce_free_one_iSeries( struct TceTable *tbl, long tcenum )
+{
+ u64 set_tce_rc;
+ union Tce tce;
+ tce.wholeTce = 0;
+ set_tce_rc = HvCallXm_setTce((u64)tbl->index,
+ (u64)tcenum,
+ tce.wholeTce);
+ if ( set_tce_rc )
+ panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", set_tce_rc);
+
+}
+
+static void tce_free_one_pSeries( struct TceTable *tbl, long tcenum )
+{
+ union Tce tce;
+ union Tce *tce_addr;
+
+ tce.wholeTce = 0;
+
+ tce_addr = ((union Tce *)tbl->base) + tcenum;
+ *tce_addr = (union Tce)tce.wholeTce;
+
+}
+
+static void tce_free(struct TceTable *tbl, dma_addr_t dma_addr,
+ unsigned order, unsigned num_pages)
+{
+ long tcenum, total_tces, free_tce;
+ unsigned i;
+
+ total_tces = (tbl->size * (PAGE_SIZE / sizeof(union Tce)));
+
+ tcenum = dma_addr >> PAGE_SHIFT;
+ free_tce = tcenum - tbl->startOffset;
+
+ if ( ( (free_tce + num_pages) > total_tces ) ||
+ ( tcenum < tbl->startOffset ) ) {
+ printk("tce_free: invalid tcenum\n");
+ printk("\ttcenum = 0x%lx\n", tcenum);
+ printk("\tTCE Table = 0x%lx\n", (u64)tbl);
+ printk("\tbus# = 0x%lx\n", (u64)tbl->busNumber );
+ printk("\tsize = 0x%lx\n", (u64)tbl->size);
+ printk("\tstartOff = 0x%lx\n", (u64)tbl->startOffset );
+ printk("\tindex = 0x%lx\n", (u64)tbl->index);
+ return;
+ }
+
+ for (i=0; i<num_pages; ++i) {
+ ppc_md.tce_free_one(tbl, tcenum);
+ ++tcenum;
+ }
+
+ /* No sync (to make TCE change visible) is required here.
+ The lwsync when acquiring the lock in free_tce_range
+ is sufficient to synchronize with the bitmap.
+ */
+
+ free_tce_range( tbl, free_tce, order );
+}
+
+void __init create_virtual_bus_tce_table(void)
+{
+ struct TceTable *t;
+ struct TceTableManagerCB virtBusTceTableParms;
+ u64 absParmsPtr;
+
+ virtBusTceTableParms.busNumber = 255; /* Bus 255 is the virtual bus */
+ virtBusTceTableParms.virtualBusFlag = 0xff; /* Ask for virtual bus */
+
+ absParmsPtr = virt_to_absolute( (u64)&virtBusTceTableParms );
+ HvCallXm_getTceTableParms( absParmsPtr );
+
+ virtBusVethTceTable.size = virtBusTceTableParms.size / 2;
+ virtBusVethTceTable.busNumber = virtBusTceTableParms.busNumber;
+ virtBusVethTceTable.startOffset = virtBusTceTableParms.startOffset;
+ virtBusVethTceTable.index = virtBusTceTableParms.index;
+ virtBusVethTceTable.tceType = TCE_VB;
+
+ virtBusVioTceTable.size = virtBusTceTableParms.size - virtBusVethTceTable.size;
+ virtBusVioTceTable.busNumber = virtBusTceTableParms.busNumber;
+ virtBusVioTceTable.startOffset = virtBusTceTableParms.startOffset +
+ virtBusVethTceTable.size * (PAGE_SIZE/sizeof(union Tce));
+ virtBusVioTceTable.index = virtBusTceTableParms.index;
+ virtBusVioTceTable.tceType = TCE_VB;
+
+ t = build_tce_table( &virtBusVethTceTable );
+ if ( t ) {
+ /* tceTables[255] = t; */
+ //VirtBusVethTceTable = t;
+ printk( "Virtual Bus VETH TCE table built successfully.\n");
+ printk( " TCE table size = %ld entries\n",
+ (unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
+ printk( " TCE table token = %d\n",
+ (unsigned)t->index );
+ printk( " TCE table start entry = 0x%lx\n",
+ (unsigned long)t->startOffset );
+ }
+ else printk( "Virtual Bus VETH TCE table failed.\n");
+
+ t = build_tce_table( &virtBusVioTceTable );
+ if ( t ) {
+ //VirtBusVioTceTable = t;
+ printk( "Virtual Bus VIO TCE table built successfully.\n");
+ printk( " TCE table size = %ld entries\n",
+ (unsigned long)t->size*(PAGE_SIZE/sizeof(union Tce)) );
+ printk( " TCE table token = %d\n",
+ (unsigned)t->index );
+ printk( " TCE table start entry = 0x%lx\n",
+ (unsigned long)t->startOffset );
+ }
+ else printk( "Virtual Bus VIO TCE table failed.\n");
+}
+
+void create_tce_tables_for_buses(struct list_head *bus_list)
+{
+ struct pci_controller* phb;
+ struct device_node *dn, *first_dn;
+ int num_slots, num_slots_ilog2;
+ int first_phb = 1;
+
+ for (phb=hose_head;phb;phb=phb->next) {
+ first_dn = ((struct device_node *)phb->arch_data)->child;
+ /* Carve 2GB into the largest dma_window_size possible */
+ for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling)
+ num_slots++;
+ num_slots_ilog2 = __ilog2(num_slots);
+ if ((1<<num_slots_ilog2) != num_slots)
+ num_slots_ilog2++;
+ phb->dma_window_size = 1 << (22 - num_slots_ilog2);
+ /* Reserve 16MB of DMA space on the first PHB.
+ * We should probably be more careful and use firmware props.
+ * In reality this space is remapped, not lost. But we don't
+ * want to get that smart to handle it -- too much work.
+ */
+ phb->dma_window_base_cur = first_phb ? (1 << 12) : 0;
+ first_phb = 0;
+ for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) {
+ create_pci_bus_tce_table((unsigned long)dn);
+ }
+ }
+}
+
+void create_tce_tables_for_busesLP(struct list_head *bus_list)
+{
+ struct list_head *ln;
+ struct pci_bus *bus;
+ struct device_node *busdn;
+ u32 *dma_window;
+ for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+ bus = pci_bus_b(ln);
+ busdn = PCI_GET_DN(bus);
+ /* NOTE: there should never be a window declared on a bus when
+ * child devices also have a window. If this should ever be
+ * architected, we probably want children to have priority.
+ * In reality, the PHB containing ISA has the property, but otherwise
+ * it is the pci-bridges that have the property.
+ */
+ dma_window = (u32 *)get_property(busdn, "ibm,dma-window", 0);
+ if (dma_window) {
+ /* Busno hasn't been copied yet.
+ * Do it now because getTceTableParmsPSeriesLP needs it.
+ */
+ busdn->busno = bus->number;
+ create_pci_bus_tce_table((unsigned long)busdn);
+ } else
+ create_tce_tables_for_busesLP(&bus->children);
+ }
+}
+
+void create_tce_tables(void) {
+ struct pci_dev *dev;
+ struct device_node *dn, *mydn;
+
+ if (naca->platform == PLATFORM_PSERIES_LPAR) {
+ create_tce_tables_for_busesLP(&pci_root_buses);
+ }
+ else {
+ create_tce_tables_for_buses(&pci_root_buses);
+ }
+ /* Now copy the tce_table ptr from the bus devices down to every
+ * pci device_node. This means get_tce_table() won't need to search
+ * up the device tree to find it.
+ */
+ pci_for_each_dev(dev) {
+ mydn = dn = PCI_GET_DN(dev);
+ while (dn && dn->tce_table == NULL)
+ dn = dn->parent;
+ if (dn) {
+ mydn->tce_table = dn->tce_table;
+ }
+ }
+}
+
+
+/*
+ * iSeries token = iSeries_device_Node*
+ * pSeries token = pci_controller*
+ *
+ */
+void create_pci_bus_tce_table( unsigned long token ) {
+ struct TceTable * newTceTable;
+
+ PPCDBG(PPCDBG_TCE, "Entering create_pci_bus_tce_table.\n");
+ PPCDBG(PPCDBG_TCE, "\ttoken = 0x%lx\n", token);
+
+ newTceTable = (struct TceTable *)kmalloc( sizeof(struct TceTable), GFP_KERNEL );
+
+ /*****************************************************************/
+ /* For the iSeries machines, the HvTce Table can be one of three */
+ /* flavors, */
+ /* - Single bus TCE table, */
+ /* - Tce Table Share between buses, */
+ /* - Tce Table per logical slot. */
+ /*****************************************************************/
+ if(naca->platform == PLATFORM_ISERIES_LPAR) {
+
+ struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)token;
+ getTceTableParmsiSeries(DevNode,newTceTable);
+
+ /* Look for existing TCE table for this device. */
+ DevNode->DevTceTable = findHwTceTable(newTceTable );
+ if( DevNode->DevTceTable == NULL) {
+ DevNode->DevTceTable = build_tce_table( newTceTable );
+ }
+ else {
+ /* We're using a shared table, free this new one. */
+ kfree(newTceTable);
+ }
+ printk("Pci Device 0x%p TceTable: %p\n",DevNode,DevNode->DevTceTable);
+ return;
+ }
+ /* pSeries Leg */
+ else {
+ struct device_node *dn;
+ struct pci_controller *phb;
+
+ dn = (struct device_node *)token;
+ phb = dn->phb;
+ if (naca->platform == PLATFORM_PSERIES)
+ getTceTableParmsPSeries(phb, dn, newTceTable);
+ else
+ getTceTableParmsPSeriesLP(phb, dn, newTceTable);
+
+ dn->tce_table = build_tce_table( newTceTable );
+ }
+}
+
+/***********************************************************************/
+/* This function compares the known Tce tables to find a TceTable that */
+/* has already been built for hardware TCEs. */
+/* Search the complete(all devices) for a TCE table assigned. If the */
+/* startOffset, index, and size match, then the TCE for this device has*/
+/* already been built and it should be shared with this device */
+/***********************************************************************/
+static struct TceTable* findHwTceTable(struct TceTable * newTceTable )
+{
+ struct list_head* Device_Node_Ptr = iSeries_Global_Device_List.next;
+ /* Cache the compare values. */
+ u64 startOffset = newTceTable->startOffset;
+ u64 index = newTceTable->index;
+ u64 size = newTceTable->size;
+
+ while(Device_Node_Ptr != &iSeries_Global_Device_List) {
+ struct iSeries_Device_Node* CmprNode = (struct iSeries_Device_Node*)Device_Node_Ptr;
+ if( CmprNode->DevTceTable != NULL &&
+ CmprNode->DevTceTable->tceType == TCE_PCI) {
+ if( CmprNode->DevTceTable->startOffset == startOffset &&
+ CmprNode->DevTceTable->index == index &&
+ CmprNode->DevTceTable->size == size ) {
+ printk("PCI TCE table matches 0x%p \n",CmprNode->DevTceTable);
+ return CmprNode->DevTceTable;
+ }
+ }
+ /* Get next Device Node in List */
+ Device_Node_Ptr = Device_Node_Ptr->next;
+ }
+ return NULL;
+}
+
+/***********************************************************************/
+/* Call Hv with the architected data structure to get TCE table info. */
+/* info. Put the returned data into the Linux representation of the */
+/* TCE table data. */
+/* The Hardware Tce table comes in three flavors. */
+/* 1. TCE table shared between Buses. */
+/* 2. TCE table per Bus. */
+/* 3. TCE Table per IOA. */
+/***********************************************************************/
+static void getTceTableParmsiSeries(struct iSeries_Device_Node* DevNode,
+ struct TceTable* newTceTable )
+{
+ struct TceTableManagerCB* pciBusTceTableParms = (struct TceTableManagerCB*)kmalloc( sizeof(struct TceTableManagerCB), GFP_KERNEL );
+ if(pciBusTceTableParms == NULL) panic("PCI_DMA: TCE Table Allocation failed.");
+
+ memset( (void*)pciBusTceTableParms,0,sizeof(struct TceTableManagerCB) );
+ pciBusTceTableParms->busNumber = ISERIES_BUS(DevNode);
+ pciBusTceTableParms->logicalSlot = DevNode->LogicalSlot;
+ pciBusTceTableParms->virtualBusFlag = 0;
+
+ HvCallXm_getTceTableParms( REALADDR(pciBusTceTableParms) );
+
+ /* PciTceTableParms Bus:0x18 Slot:0x04 Start:0x000000 Offset:0x04c000 Size:0x0020 */
+ printk("PciTceTableParms Bus:0x%02lx Slot:0x%02x Start:0x%06lx Offset:0x%06lx Size:0x%04lx\n",
+ pciBusTceTableParms->busNumber,
+ pciBusTceTableParms->logicalSlot,
+ pciBusTceTableParms->start,
+ pciBusTceTableParms->startOffset,
+ pciBusTceTableParms->size);
+
+ if(pciBusTceTableParms->size == 0) {
+ printk("PCI_DMA: Possible Structure mismatch, 0x%p\n",pciBusTceTableParms);
+ panic( "PCI_DMA: pciBusTceTableParms->size is zero, halt here!");
+ }
+
+ newTceTable->size = pciBusTceTableParms->size;
+ newTceTable->busNumber = pciBusTceTableParms->busNumber;
+ newTceTable->startOffset = pciBusTceTableParms->startOffset;
+ newTceTable->index = pciBusTceTableParms->index;
+ newTceTable->tceType = TCE_PCI;
+
+ kfree(pciBusTceTableParms);
+}
+
+static void getTceTableParmsPSeries(struct pci_controller *phb,
+ struct device_node *dn,
+ struct TceTable *newTceTable ) {
+ phandle node;
+ unsigned long i;
+
+ node = ((struct device_node *)(phb->arch_data))->node;
+
+ PPCDBG(PPCDBG_TCEINIT, "getTceTableParms: start\n");
+ PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table = 0x%lx\n", of_tce_table);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb = 0x%lx\n", phb);
+ PPCDBG(PPCDBG_TCEINIT, "\tdn = 0x%lx\n", dn);
+ PPCDBG(PPCDBG_TCEINIT, "\tdn->name = %s\n", dn->name);
+ PPCDBG(PPCDBG_TCEINIT, "\tdn->full_name= %s\n", dn->full_name);
+ PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable = 0x%lx\n", newTceTable);
+ PPCDBG(PPCDBG_TCEINIT, "\tdma_window_size = 0x%lx\n", phb->dma_window_size);
+
+ i = 0;
+ while(of_tce_table[i].node) {
+ PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].node = 0x%lx\n",
+ i, of_tce_table[i].node);
+ PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].base = 0x%lx\n",
+ i, of_tce_table[i].base);
+ PPCDBG(PPCDBG_TCEINIT, "\tof_tce_table[%d].size = 0x%lx\n",
+ i, of_tce_table[i].size >> PAGE_SHIFT);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data->node = 0x%lx\n",
+ node);
+
+ if(of_tce_table[i].node == node) {
+ memset((void *)of_tce_table[i].base,
+ 0, of_tce_table[i].size);
+ newTceTable->busNumber = phb->bus->number;
+
+ /* Units of tce entries. */
+ newTceTable->startOffset = phb->dma_window_base_cur;
+
+ /* Adjust the current table offset to the next */
+ /* region. Measured in TCE entries. Force an */
+ /* alignment to the size alloted per IOA. This */
+ /* makes it easier to remove the 1st 16MB. */
+ phb->dma_window_base_cur += (phb->dma_window_size>>3);
+ phb->dma_window_base_cur &=
+ ~((phb->dma_window_size>>3)-1);
+
+ /* Set the tce table size - measured in units */
+ /* of pages of tce table. */
+ newTceTable->size = ((phb->dma_window_base_cur -
+ newTceTable->startOffset) << 3)
+ >> PAGE_SHIFT;
+
+ /* Test if we are going over 2GB of DMA space. */
+ if(phb->dma_window_base_cur > (1 << 19)) {
+ panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+ }
+
+ newTceTable->base = of_tce_table[i].base;
+ newTceTable->index = 0;
+
+ PPCDBG(PPCDBG_TCEINIT,
+ "\tnewTceTable->base = 0x%lx\n",
+ newTceTable->base);
+ PPCDBG(PPCDBG_TCEINIT,
+ "\tnewTceTable->startOffset = 0x%lx"
+ "(# tce entries)\n",
+ newTceTable->startOffset);
+ PPCDBG(PPCDBG_TCEINIT,
+ "\tnewTceTable->size = 0x%lx"
+ "(# pages of tce table)\n",
+ newTceTable->size);
+ }
+ i++;
+ }
+}
+
+/*
+ * getTceTableParmsPSeriesLP
+ *
+ * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
+ *
+ * ToDo: properly interpret the ibm,dma-window property. The definition is:
+ * logical-bus-number (1 word)
+ * phys-address (#address-cells words)
+ * size (#cell-size words)
+ *
+ * Currently we hard code these sizes (more or less).
+ */
+static void getTceTableParmsPSeriesLP(struct pci_controller *phb,
+ struct device_node *dn,
+ struct TceTable *newTceTable ) {
+ u32 *dma_window = (u32 *)get_property(dn, "ibm,dma-window", 0);
+ if (!dma_window) {
+ panic("PCI_DMA: getTceTableParmsPSeriesLP: device %s has no ibm,dma-window property!\n", dn->full_name);
+ }
+
+ newTceTable->busNumber = dn->busno;
+ newTceTable->size = (((((unsigned long)dma_window[4] << 32) | (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT;
+ newTceTable->startOffset = ((((unsigned long)dma_window[2] << 32) | (unsigned long)dma_window[3]) >> 12);
+ newTceTable->base = 0;
+ newTceTable->index = dma_window[0];
+ PPCDBG(PPCDBG_TCEINIT, "getTceTableParmsPSeriesLP for bus 0x%lx:\n", dn->busno);
+ PPCDBG(PPCDBG_TCEINIT, "\tDevice = %s\n", dn->full_name);
+ PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->index = 0x%lx\n", newTceTable->index);
+ PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->startOffset = 0x%lx\n", newTceTable->startOffset);
+ PPCDBG(PPCDBG_TCEINIT, "\tnewTceTable->size = 0x%lx\n", newTceTable->size);
+}
+
+/* Allocates a contiguous real buffer and creates TCEs over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (tce) of the first page.
+ */
+void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+ dma_addr_t *dma_handle)
+{
+ struct TceTable * tbl;
+ void *ret = NULL;
+ unsigned order, nPages;
+ dma_addr_t tce;
+
+ PPCDBG(PPCDBG_TCE, "pci_alloc_consistent:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx\n", hwdev);
+ PPCDBG(PPCDBG_TCE, "\tsize = 0x%16.16lx\n", size);
+ PPCDBG(PPCDBG_TCE, "\tdma_handle = 0x%16.16lx\n", dma_handle);
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+ nPages = 1 << order;
+
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: pci_alloc_consistent size to large: 0x%lx \n",size);
+ return (void *)NO_TCE;
+ }
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl ) {
+ /* Alloc enough pages (and possibly more) */
+ ret = (void *)__get_free_pages( GFP_ATOMIC, order );
+ if ( ret ) {
+ /* Page allocation succeeded */
+ memset(ret, 0, nPages << PAGE_SHIFT);
+ /* Set up tces to cover the allocated range */
+ tce = get_tces( tbl, order, ret, nPages, PCI_DMA_BIDIRECTIONAL );
+ if ( tce == NO_TCE ) {
+ PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tces failed\n" );
+ free_pages( (unsigned long)ret, order );
+ ret = NULL;
+ }
+ else
+ {
+ *dma_handle = tce;
+ }
+ }
+ else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: __get_free_pages failed for order = %d\n", order);
+ }
+ else PPCDBG(PPCDBG_TCE, "pci_alloc_consistent: get_tce_table failed for 0x%016lx\n", hwdev);
+
+ PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: dma_handle = 0x%16.16lx\n", *dma_handle);
+ PPCDBG(PPCDBG_TCE, "\tpci_alloc_consistent: return = 0x%16.16lx\n", ret);
+ return ret;
+}
+
+void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ struct TceTable * tbl;
+ unsigned order, nPages;
+
+ PPCDBG(PPCDBG_TCE, "pci_free_consistent:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, dma_handle = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, dma_handle, vaddr);
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+ nPages = 1 << order;
+
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: pci_free_consistent size to large: 0x%lx \n",size);
+ return;
+ }
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl ) {
+ tce_free(tbl, dma_handle, order, nPages);
+ free_pages( (unsigned long)vaddr, order );
+ }
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer. The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+dma_addr_t pci_map_single(struct pci_dev *hwdev, void *vaddr,
+ size_t size, int direction )
+{
+ struct TceTable * tbl;
+ dma_addr_t dma_handle = NO_TCE;
+ unsigned long uaddr;
+ unsigned order, nPages;
+
+ PPCDBG(PPCDBG_TCE, "pci_map_single:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, vaddr = 0x%16.16lx\n", hwdev, size, direction, vaddr);
+ if ( direction == PCI_DMA_NONE )
+ BUG();
+
+ uaddr = (unsigned long)vaddr;
+ nPages = PAGE_ALIGN( uaddr + size ) - ( uaddr & PAGE_MASK );
+ order = get_order( nPages & PAGE_MASK );
+ nPages >>= PAGE_SHIFT;
+
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: pci_map_single size to large: 0x%lx \n",size);
+ return NO_TCE;
+ }
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl ) {
+ dma_handle = get_tces( tbl, order, vaddr, nPages, direction );
+ dma_handle |= ( uaddr & ~PAGE_MASK );
+ }
+
+ return dma_handle;
+}
+
+void pci_unmap_single( struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction )
+{
+ struct TceTable * tbl;
+ unsigned order, nPages;
+
+ PPCDBG(PPCDBG_TCE, "pci_unmap_single:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, size = 0x%16.16lx, direction = 0x%16.16lx, dma_handle = 0x%16.16lx\n", hwdev, size, direction, dma_handle);
+ if ( direction == PCI_DMA_NONE )
+ BUG();
+
+ nPages = PAGE_ALIGN( dma_handle + size ) - ( dma_handle & PAGE_MASK );
+ order = get_order( nPages & PAGE_MASK );
+ nPages >>= PAGE_SHIFT;
+
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: pci_unmap_single size to large: 0x%lx \n",size);
+ return;
+ }
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl )
+ tce_free(tbl, dma_handle, order, nPages);
+
+}
+
+/* Figure out how many TCEs are actually going to be required
+ * to map this scatterlist. This code is not optimal. It
+ * takes into account the case where entry n ends in the same
+ * page in which entry n+1 starts. It does not handle the
+ * general case of entry n ending in the same page in which
+ * entry m starts.
+ */
+static unsigned long num_tces_sg( struct scatterlist *sg, int nents )
+{
+ unsigned long nTces, numPages, startPage, endPage, prevEndPage;
+ unsigned i;
+
+ prevEndPage = 0;
+ nTces = 0;
+
+ for (i=0; i<nents; ++i) {
+ /* Compute the starting page number and
+ * the ending page number for this entry
+ */
+ startPage = (unsigned long)sg->address >> PAGE_SHIFT;
+ endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT;
+ numPages = endPage - startPage + 1;
+ /* Simple optimization: if the previous entry ended
+ * in the same page in which this entry starts
+ * then we can reduce the required pages by one.
+ * This matches assumptions in fill_scatterlist_sg and
+ * create_tces_sg
+ */
+ if ( startPage == prevEndPage )
+ --numPages;
+ nTces += numPages;
+ prevEndPage = endPage;
+ sg++;
+ }
+ return nTces;
+}
+
+/* Fill in the dma data in the scatterlist
+ * return the number of dma sg entries created
+ */
+static unsigned fill_scatterlist_sg( struct scatterlist *sg, int nents,
+ dma_addr_t dma_addr , unsigned long numTces)
+{
+ struct scatterlist *dma_sg;
+ u32 cur_start_dma;
+ unsigned long cur_len_dma, cur_end_virt, uaddr;
+ unsigned num_dma_ents;
+
+ dma_sg = sg;
+ num_dma_ents = 1;
+
+ /* Process the first sg entry */
+ cur_start_dma = dma_addr + ((unsigned long)sg->address & (~PAGE_MASK));
+ cur_len_dma = sg->length;
+ /* cur_end_virt holds the address of the byte immediately after the
+ * end of the current buffer.
+ */
+ cur_end_virt = (unsigned long)sg->address + cur_len_dma;
+ /* Later code assumes that unused sg->dma_address and sg->dma_length
+ * fields will be zero. Other archs seem to assume that the user
+ * (device driver) guarantees that...I don't want to depend on that
+ */
+ sg->dma_address = sg->dma_length = 0;
+
+ /* Process the rest of the sg entries */
+ while (--nents) {
+ ++sg;
+ /* Clear possibly unused fields. Note: sg >= dma_sg so
+ * this can't be clearing a field we've already set
+ */
+ sg->dma_address = sg->dma_length = 0;
+
+ /* Check if it is possible to make this next entry
+ * contiguous (in dma space) with the previous entry.
+ */
+
+ /* The entries can be contiguous in dma space if
+ * the previous entry ends immediately before the
+ * start of the current entry (in virtual space)
+ * or if the previous entry ends at a page boundary
+ * and the current entry starts at a page boundary.
+ */
+ uaddr = (unsigned long)sg->address;
+ if ( ( uaddr != cur_end_virt ) &&
+ ( ( ( uaddr | cur_end_virt ) & (~PAGE_MASK) ) ||
+ ( ( uaddr & PAGE_MASK ) == ( ( cur_end_virt-1 ) & PAGE_MASK ) ) ) ) {
+ /* This entry can not be contiguous in dma space.
+ * save the previous dma entry and start a new one
+ */
+ dma_sg->dma_address = cur_start_dma;
+ dma_sg->dma_length = cur_len_dma;
+
+ ++dma_sg;
+ ++num_dma_ents;
+
+ cur_start_dma += cur_len_dma-1;
+ /* If the previous entry ends and this entry starts
+ * in the same page then they share a tce. In that
+ * case don't bump cur_start_dma to the next page
+ * in dma space. This matches assumptions made in
+ * num_tces_sg and create_tces_sg.
+ */
+ if ((uaddr & PAGE_MASK) == ((cur_end_virt-1) & PAGE_MASK))
+ cur_start_dma &= PAGE_MASK;
+ else
+ cur_start_dma = PAGE_ALIGN(cur_start_dma+1);
+ cur_start_dma += ( uaddr & (~PAGE_MASK) );
+ cur_len_dma = 0;
+ }
+ /* Accumulate the length of this entry for the next
+ * dma entry
+ */
+ cur_len_dma += sg->length;
+ cur_end_virt = uaddr + sg->length;
+ }
+ /* Fill in the last dma entry */
+ dma_sg->dma_address = cur_start_dma;
+ dma_sg->dma_length = cur_len_dma;
+
+ if ((((cur_start_dma +cur_len_dma - 1)>> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1) != numTces)
+ {
+ PPCDBG(PPCDBG_TCE, "fill_scatterlist_sg: numTces %ld, used tces %d\n",
+ numTces,
+ (unsigned)(((cur_start_dma + cur_len_dma - 1) >> PAGE_SHIFT) - (dma_addr >> PAGE_SHIFT) + 1));
+ }
+
+
+ return num_dma_ents;
+}
+
+/* Call the hypervisor to create the TCE entries.
+ * return the number of TCEs created
+ */
+static dma_addr_t create_tces_sg( struct TceTable *tbl, struct scatterlist *sg,
+ int nents, unsigned numTces, int direction )
+{
+ unsigned order, i, j;
+ unsigned long startPage, endPage, prevEndPage, numPages, uaddr;
+ long tcenum, starttcenum;
+ dma_addr_t dmaAddr;
+
+ dmaAddr = NO_TCE;
+
+ order = get_order( numTces << PAGE_SHIFT );
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: create_tces_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
+ return NO_TCE;
+ }
+
+ /* allocate a block of tces */
+ tcenum = alloc_tce_range( tbl, order );
+ if ( tcenum != -1 ) {
+ tcenum += tbl->startOffset;
+ starttcenum = tcenum;
+ dmaAddr = tcenum << PAGE_SHIFT;
+ prevEndPage = 0;
+ for (j=0; j<nents; ++j) {
+ startPage = (unsigned long)sg->address >> PAGE_SHIFT;
+ endPage = ((unsigned long)sg->address + sg->length - 1) >> PAGE_SHIFT;
+ numPages = endPage - startPage + 1;
+
+ uaddr = (unsigned long)sg->address;
+
+ /* If the previous entry ended in the same page that
+ * the current page starts then they share that
+ * tce and we reduce the number of tces we need
+ * by one. This matches assumptions made in
+ * num_tces_sg and fill_scatterlist_sg
+ */
+ if ( startPage == prevEndPage ) {
+ --numPages;
+ uaddr += PAGE_SIZE;
+ }
+
+ for (i=0; i<numPages; ++i) {
+ ppc_md.tce_build(tbl, tcenum, uaddr, direction);
+ ++tcenum;
+ uaddr += PAGE_SIZE;
+ }
+
+ prevEndPage = endPage;
+ sg++;
+ }
+ /* Make sure the update is visible to hardware.
+ sync required to synchronize the update to
+ the TCE table with the MMIO that will send
+ the bus address to the IOA */
+ __asm__ __volatile__ ("sync" : : : "memory");
+
+ if ((tcenum - starttcenum) != numTces)
+ PPCDBG(PPCDBG_TCE, "create_tces_sg: numTces %d, tces used %d\n",
+ numTces, (unsigned)(tcenum - starttcenum));
+
+ }
+
+ return dmaAddr;
+}
+
+int pci_map_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction )
+{
+ struct TceTable * tbl;
+ unsigned numTces;
+ int num_dma;
+ dma_addr_t dma_handle;
+
+ PPCDBG(PPCDBG_TCE, "pci_map_sg:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nents = 0x%16.16lx\n", hwdev, sg, direction, nents);
+ /* Fast path for a single entry scatterlist */
+ if ( nents == 1 ) {
+ sg->dma_address = pci_map_single( hwdev, sg->address,
+ sg->length, direction );
+ sg->dma_length = sg->length;
+ return 1;
+ }
+
+ if ( direction == PCI_DMA_NONE )
+ BUG();
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl ) {
+ /* Compute the number of tces required */
+ numTces = num_tces_sg( sg, nents );
+ /* Create the tces and get the dma address */
+ dma_handle = create_tces_sg( tbl, sg, nents, numTces, direction );
+
+ /* Fill in the dma scatterlist */
+ num_dma = fill_scatterlist_sg( sg, nents, dma_handle, numTces );
+ }
+
+ return num_dma;
+}
+
+void pci_unmap_sg( struct pci_dev *hwdev, struct scatterlist *sg, int nelms, int direction )
+{
+ struct TceTable * tbl;
+ unsigned order, numTces, i;
+ dma_addr_t dma_end_page, dma_start_page;
+
+ PPCDBG(PPCDBG_TCE, "pci_unmap_sg:\n");
+ PPCDBG(PPCDBG_TCE, "\thwdev = 0x%16.16lx, sg = 0x%16.16lx, direction = 0x%16.16lx, nelms = 0x%16.16lx\n", hwdev, sg, direction, nelms);
+
+ if ( direction == PCI_DMA_NONE )
+ BUG();
+
+ dma_start_page = sg->dma_address & PAGE_MASK;
+ for ( i=nelms; i>0; --i ) {
+ unsigned k = i - 1;
+ if ( sg[k].dma_length ) {
+ dma_end_page = ( sg[k].dma_address +
+ sg[k].dma_length - 1 ) & PAGE_MASK;
+ break;
+ }
+ }
+
+ numTces = ((dma_end_page - dma_start_page ) >> PAGE_SHIFT) + 1;
+ order = get_order( numTces << PAGE_SHIFT );
+
+ /* Client asked for way to much space. This is checked later anyway */
+ /* It is easier to debug here for the drivers than in the tce tables.*/
+ if(order >= NUM_TCE_LEVELS) {
+ printk("PCI_DMA: pci_unmap_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
+ return;
+ }
+
+ tbl = get_tce_table(hwdev);
+
+ if ( tbl )
+ tce_free( tbl, dma_start_page, order, numTces );
+
+}
+
+/*
+ * phb_tce_table_init
+ *
+ * Function: Display TCE config registers. Could be easily changed
+ * to initialize the hardware to use TCEs.
+ */
+unsigned long phb_tce_table_init(struct pci_controller *phb) {
+ unsigned int r, cfg_rw, i;
+ unsigned long r64;
+ phandle node;
+
+ PPCDBG(PPCDBG_TCE, "phb_tce_table_init: start.\n");
+
+ node = ((struct device_node *)(phb->arch_data))->node;
+
+ PPCDBG(PPCDBG_TCEINIT, "\tphb = 0x%lx\n", phb);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb->type = 0x%lx\n", phb->type);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb->phb_regs = 0x%lx\n", phb->phb_regs);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb->chip_regs = 0x%lx\n", phb->chip_regs);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb: node = 0x%lx\n", node);
+ PPCDBG(PPCDBG_TCEINIT, "\tphb->arch_data = 0x%lx\n", phb->arch_data);
+
+ i = 0;
+ while(of_tce_table[i].node) {
+ if(of_tce_table[i].node == node) {
+ if(phb->type == phb_type_python) {
+ r = *(((unsigned int *)phb->phb_regs) + (0xf10>>2));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR(low) = 0x%x\n", r);
+ r = *(((unsigned int *)phb->phb_regs) + (0xf00>>2));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR(high) = 0x%x\n", r);
+ r = *(((unsigned int *)phb->phb_regs) + (0xfd0>>2));
+ PPCDBG(PPCDBG_TCEINIT, "\tPHB cfg(rw) = 0x%x\n", r);
+ break;
+ } else if(phb->type == phb_type_speedwagon) {
+ r64 = *(((unsigned long *)phb->chip_regs) +
+ (0x800>>3));
+ PPCDBG(PPCDBG_TCEINIT, "\tNCFG = 0x%lx\n", r64);
+ r64 = *(((unsigned long *)phb->chip_regs) +
+ (0x580>>3));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR0 = 0x%lx\n", r64);
+ r64 = *(((unsigned long *)phb->chip_regs) +
+ (0x588>>3));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR1 = 0x%lx\n", r64);
+ r64 = *(((unsigned long *)phb->chip_regs) +
+ (0x590>>3));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR2 = 0x%lx\n", r64);
+ r64 = *(((unsigned long *)phb->chip_regs) +
+ (0x598>>3));
+ PPCDBG(PPCDBG_TCEINIT, "\tTAR3 = 0x%lx\n", r64);
+ cfg_rw = *(((unsigned int *)phb->chip_regs) +
+ ((0x160 +
+ (((phb->local_number)+8)<<12))>>2));
+ PPCDBG(PPCDBG_TCEINIT, "\tcfg_rw = 0x%x\n", cfg_rw);
+ }
+ }
+ i++;
+ }
+
+ PPCDBG(PPCDBG_TCEINIT, "phb_tce_table_init: done\n");
+
+ return(0);
+}
+
+/* These are called very early. */
+void tce_init_pSeries(void)
+{
+ ppc_md.tce_build = tce_build_pSeries;
+ ppc_md.tce_free_one = tce_free_one_pSeries;
+}
+
+void tce_init_iSeries(void)
+{
+ ppc_md.tce_build = tce_build_iSeries;
+ ppc_md.tce_free_one = tce_free_one_iSeries;
+}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)