From: Michael S. Tsirkin <mst@mellanox.co.il>

Locking during the poll cq operation can be reduced by locking the cq
while qp is being removed from the qp array.  This also avoids an
extra atomic operation for reference counting.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <roland@topspin.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/drivers/infiniband/hw/mthca/mthca_cq.c |   16 ++++------
 25-akpm/drivers/infiniband/hw/mthca/mthca_qp.c |   37 ++++++++++++++++++++++---
 2 files changed, 39 insertions(+), 14 deletions(-)

diff -puN drivers/infiniband/hw/mthca/mthca_cq.c~ib-mthca-improve-cq-locking-part-2 drivers/infiniband/hw/mthca/mthca_cq.c
--- 25/drivers/infiniband/hw/mthca/mthca_cq.c~ib-mthca-improve-cq-locking-part-2	2005-03-03 17:39:54.000000000 -0800
+++ 25-akpm/drivers/infiniband/hw/mthca/mthca_cq.c	2005-03-03 17:39:54.000000000 -0800
@@ -418,14 +418,14 @@ static inline int mthca_poll_one(struct 
 			spin_unlock(&(*cur_qp)->lock);
 		}
 
-		spin_lock(&dev->qp_table.lock);
+		/*
+		 * We do not have to take the QP table lock here,
+		 * because CQs will be locked while QPs are removed
+		 * from the table.
+		 */
 		*cur_qp = mthca_array_get(&dev->qp_table.qp,
 					  be32_to_cpu(cqe->my_qpn) &
 					  (dev->limits.num_qps - 1));
-		if (*cur_qp)
-			atomic_inc(&(*cur_qp)->refcount);
-		spin_unlock(&dev->qp_table.lock);
-
 		if (!*cur_qp) {
 			mthca_warn(dev, "CQ entry for unknown QP %06x\n",
 				   be32_to_cpu(cqe->my_qpn) & 0xffffff);
@@ -537,12 +537,8 @@ int mthca_poll_cq(struct ib_cq *ibcq, in
 		inc_cons_index(dev, cq, freed);
 	}
 
-	if (qp) {
+	if (qp)
 		spin_unlock(&qp->lock);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 
diff -puN drivers/infiniband/hw/mthca/mthca_qp.c~ib-mthca-improve-cq-locking-part-2 drivers/infiniband/hw/mthca/mthca_qp.c
--- 25/drivers/infiniband/hw/mthca/mthca_qp.c~ib-mthca-improve-cq-locking-part-2	2005-03-03 17:39:54.000000000 -0800
+++ 25-akpm/drivers/infiniband/hw/mthca/mthca_qp.c	2005-03-03 17:39:54.000000000 -0800
@@ -1083,9 +1083,21 @@ int mthca_alloc_sqp(struct mthca_dev *de
 	return 0;
 
  err_out_free:
-	spin_lock_irq(&dev->qp_table.lock);
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	spin_lock(&dev->qp_table.lock);
 	mthca_array_clear(&dev->qp_table.qp, mqpn);
-	spin_unlock_irq(&dev->qp_table.lock);
+	spin_unlock(&dev->qp_table.lock);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
 
  err_out:
 	dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1100,11 +1112,28 @@ void mthca_free_qp(struct mthca_dev *dev
 	u8 status;
 	int size;
 	int i;
+	struct mthca_cq *send_cq;
+	struct mthca_cq *recv_cq;
+
+	send_cq = to_mcq(qp->ibqp.send_cq);
+	recv_cq = to_mcq(qp->ibqp.recv_cq);
 
-	spin_lock_irq(&dev->qp_table.lock);
+	/*
+	 * Lock CQs here, so that CQ polling code can do QP lookup
+	 * without taking a lock.
+	 */
+	spin_lock_irq(&send_cq->lock);
+	if (send_cq != recv_cq)
+		spin_lock(&recv_cq->lock);
+
+	spin_lock(&dev->qp_table.lock);
 	mthca_array_clear(&dev->qp_table.qp,
 			  qp->qpn & (dev->limits.num_qps - 1));
-	spin_unlock_irq(&dev->qp_table.lock);
+	spin_unlock(&dev->qp_table.lock);
+
+	if (send_cq != recv_cq)
+		spin_unlock(&recv_cq->lock);
+	spin_unlock_irq(&send_cq->lock);
 
 	atomic_dec(&qp->refcount);
 	wait_event(qp->wait, !atomic_read(&qp->refcount));
_