From: Mikael Pettersson <mikpe@csd.uu.se>

This patch eliminates the illegal task_lock() perfctr's
inheritance feature introduced in release_task().

- Changed __vperfctr_release() to use schedule_work() to do
  the task_lock(parent) etc in a different thread's context.
  This is because release_task() has a write lock on the
  task list lock, and task_lock() is forbidden in that case.
  When current == parent, this is bypassed and the merge work
  is done immediately without taking task_lock().
  Added children_lock to struct vperfctr, to synchronise
  accesses (release/update_control/read) to the children array.

Signed-off-by: Mikael Pettersson <mikpe@csd.uu.se>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/Documentation/perfctr/virtual.txt |   12 +--
 25-akpm/drivers/perfctr/version.h         |    2 
 25-akpm/drivers/perfctr/virtual.c         |   97 ++++++++++++++++++++----------
 3 files changed, 70 insertions(+), 41 deletions(-)

diff -puN Documentation/perfctr/virtual.txt~perfctr-inheritance-locking-fix Documentation/perfctr/virtual.txt
--- 25/Documentation/perfctr/virtual.txt~perfctr-inheritance-locking-fix	Fri Nov 12 16:19:56 2004
+++ 25-akpm/Documentation/perfctr/virtual.txt	Fri Nov 12 16:19:57 2004
@@ -1,4 +1,4 @@
-$Id: perfctr-documentation-update.patch,v 1.1 2004/07/12 05:41:57 akpm Exp $
+$Id: virtual.txt,v 1.3 2004/08/09 09:42:22 mikpe Exp $
 
 VIRTUAL PER-PROCESS PERFORMANCE COUNTERS
 ========================================
@@ -140,13 +140,9 @@ There are five types of accesses to a th
    before accessing the perfctr pointer.
 
 5. release_task().
-   While reaping a child, the kernel only takes the tasklist_lock to
-   prevent the parent from changing or disappearing. This does not
-   prevent the parent's perfctr state pointer from changing. Concurrent
-   accesses to the parent's "children counts" state are also possible.
-
-   To avoid these issues, perfctr_release_task() performs a task_lock()
-   on the parent.
+   Reaping a child may or may not be done by the parent of that child.
+   When done by the parent, no lock is taken. Otherwise, a task_lock()
+   on the parent is done before accessing its thread's perfctr pointer.
 
 The Pseudo File System
 ----------------------
diff -puN drivers/perfctr/version.h~perfctr-inheritance-locking-fix drivers/perfctr/version.h
--- 25/drivers/perfctr/version.h~perfctr-inheritance-locking-fix	Fri Nov 12 16:19:56 2004
+++ 25-akpm/drivers/perfctr/version.h	Fri Nov 12 16:19:57 2004
@@ -1 +1 @@
-#define VERSION "2.7.4"
+#define VERSION "2.7.5"
diff -puN drivers/perfctr/virtual.c~perfctr-inheritance-locking-fix drivers/perfctr/virtual.c
--- 25/drivers/perfctr/virtual.c~perfctr-inheritance-locking-fix	Fri Nov 12 16:19:56 2004
+++ 25-akpm/drivers/perfctr/virtual.c	Fri Nov 12 16:19:57 2004
@@ -1,4 +1,4 @@
-/* $Id: virtual.c,v 1.95 2004/05/31 20:36:37 mikpe Exp $
+/* $Id: virtual.c,v 1.104 2004/08/09 09:42:22 mikpe Exp $
  * Virtual per-process performance counters.
  *
  * Copyright (C) 1999-2004  Mikael Pettersson
@@ -42,10 +42,15 @@ struct vperfctr {
 #ifdef CONFIG_PERFCTR_INTERRUPT_SUPPORT
 	unsigned int iresume_cstatus;
 #endif
-	/* protected by task_lock(owner) */
+	/* children_lock protects inheritance_id and children,
+	   when parent is not the one doing release_task() */
+	spinlock_t children_lock;
 	unsigned long long inheritance_id;
 	struct perfctr_sum_ctrs children;
-	struct work_struct free_work;
+	/* schedule_work() data for when an operation cannot be
+	   done in the current context due to locking rules */
+	struct work_struct work;
+	struct task_struct *parent_tsk;
 };
 #define IS_RUNNING(perfctr)	perfctr_cstatus_enabled((perfctr)->cpu_state.cstatus)
 
@@ -172,8 +177,8 @@ static void schedule_put_vperfctr(struct
 {
 	if (!atomic_dec_and_test(&perfctr->count))
 		return;
-	INIT_WORK(&perfctr->free_work, scheduled_vperfctr_free, perfctr);
-	schedule_work(&perfctr->free_work);
+	INIT_WORK(&perfctr->work, scheduled_vperfctr_free, perfctr);
+	schedule_work(&perfctr->work);
 }
 
 static unsigned long long new_inheritance_id(void)
@@ -396,39 +401,67 @@ void __vperfctr_exit(struct vperfctr *pe
  * A task is being released. If it inherited its perfctr settings
  * from its parent, then merge its final counts back into the parent.
  * Then unlink the child's perfctr.
- * PRE: caller holds tasklist_lock.
+ * PRE: caller has write_lock_irq(&tasklist_lock).
  * PREEMPT note: preemption is disabled due to tasklist_lock.
+ *
+ * When current == parent_tsk, the child's counts can be merged
+ * into the parent's immediately. This is the common case.
+ *
+ * When current != parent_tsk, the parent must be task_lock()ed
+ * before its perfctr state can be accessed. task_lock() is illegal
+ * here due to the write_lock_irq(&tasklist_lock) in release_task(),
+ * so the operation is done via schedule_work().
  */
-void __vperfctr_release(struct task_struct *child_tsk)
+static void do_vperfctr_release(struct vperfctr *child_perfctr, struct task_struct *parent_tsk)
 {
-	struct task_struct *parent_tsk;
 	struct vperfctr *parent_perfctr;
-	struct vperfctr *child_perfctr;
 	unsigned int cstatus, nrctrs, i;
 
-	/* Our caller holds tasklist_lock, protecting child_tsk->parent.
-	   We must also task_lock(child_tsk->parent), to prevent its
-	   ->thread.perfctr from changing. */
-	parent_tsk = child_tsk->parent;
-	task_lock(parent_tsk);
 	parent_perfctr = parent_tsk->thread.perfctr;
-	child_perfctr = child_tsk->thread.perfctr;
-	if (parent_perfctr && child_perfctr &&
-	    parent_perfctr->inheritance_id == child_perfctr->inheritance_id) {
-		cstatus = parent_perfctr->cpu_state.cstatus;
-		if (perfctr_cstatus_has_tsc(cstatus))
-			parent_perfctr->children.tsc +=
-				child_perfctr->cpu_state.tsc_sum +
-				child_perfctr->children.tsc;
-		nrctrs = perfctr_cstatus_nrctrs(cstatus);
-		for(i = 0; i < nrctrs; ++i)
-			parent_perfctr->children.pmc[i] +=
-				child_perfctr->cpu_state.pmc[i].sum +
-				child_perfctr->children.pmc[i];
+	if (parent_perfctr && child_perfctr) {
+		spin_lock(&parent_perfctr->children_lock);
+		if (parent_perfctr->inheritance_id == child_perfctr->inheritance_id) {
+			cstatus = parent_perfctr->cpu_state.cstatus;
+			if (perfctr_cstatus_has_tsc(cstatus))
+				parent_perfctr->children.tsc +=
+					child_perfctr->cpu_state.tsc_sum +
+					child_perfctr->children.tsc;
+			nrctrs = perfctr_cstatus_nrctrs(cstatus);
+			for(i = 0; i < nrctrs; ++i)
+				parent_perfctr->children.pmc[i] +=
+					child_perfctr->cpu_state.pmc[i].sum +
+					child_perfctr->children.pmc[i];
+		}
+		spin_unlock(&parent_perfctr->children_lock);
 	}
+	schedule_put_vperfctr(child_perfctr);
+}
+
+static void scheduled_release(void *data)
+{
+	struct vperfctr *child_perfctr = data;
+	struct task_struct *parent_tsk = child_perfctr->parent_tsk;
+
+	task_lock(parent_tsk);
+	do_vperfctr_release(child_perfctr, parent_tsk);
 	task_unlock(parent_tsk);
+	put_task_struct(parent_tsk);
+}
+
+void __vperfctr_release(struct task_struct *child_tsk)
+{
+	struct task_struct *parent_tsk = child_tsk->parent;
+	struct vperfctr *child_perfctr = child_tsk->thread.perfctr;
+
 	child_tsk->thread.perfctr = NULL;
-	schedule_put_vperfctr(child_perfctr);
+	if (parent_tsk == current)
+		do_vperfctr_release(child_perfctr, parent_tsk);
+	else {
+		get_task_struct(parent_tsk);
+		INIT_WORK(&child_perfctr->work, scheduled_release, child_perfctr);
+		child_perfctr->parent_tsk = parent_tsk;
+		schedule_work(&child_perfctr->work);
+	}
 }
 
 /* schedule() --> switch_to() --> .. --> __vperfctr_suspend().
@@ -574,10 +607,10 @@ static int do_vperfctr_control(struct vp
 		if (!(control->preserve & (1<<i)))
 			perfctr->cpu_state.pmc[i].sum = 0;
 
-	task_lock(tsk);
+	spin_lock(&perfctr->children_lock);
 	perfctr->inheritance_id = new_inheritance_id();
 	memset(&perfctr->children, 0, sizeof perfctr->children);
-	task_unlock(tsk);
+	spin_unlock(&perfctr->children_lock);
 
 	if (tsk == current)
 		vperfctr_resume(perfctr);
@@ -678,10 +711,10 @@ static int do_vperfctr_read(struct vperf
 	}
 	if (childrenp) {
 		if (tsk)
-			task_lock(tsk);
+			spin_lock(&perfctr->children_lock);
 		tmp->children = perfctr->children;
 		if (tsk)
-			task_unlock(tsk);
+			spin_unlock(&perfctr->children_lock);
 	}
 	if (tsk == current)
 		preempt_enable();
_