diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/Makefile 320-kcg/Makefile
--- 310-irqbal_fast/Makefile	2004-03-12 11:06:17.000000000 -0800
+++ 320-kcg/Makefile	2004-03-14 09:50:47.000000000 -0800
@@ -442,6 +442,10 @@ ifndef CONFIG_FRAME_POINTER
 CFLAGS		+= -fomit-frame-pointer
 endif
 
+ifeq ($(CONFIG_MCOUNT),y)
+CFLAGS += -pg
+endif
+
 ifdef CONFIG_DEBUG_INFO
 CFLAGS		+= -g
 endif
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/Kconfig 320-kcg/arch/i386/Kconfig
--- 310-irqbal_fast/arch/i386/Kconfig	2004-03-14 09:50:04.000000000 -0800
+++ 320-kcg/arch/i386/Kconfig	2004-03-14 09:50:47.000000000 -0800
@@ -1635,6 +1635,14 @@ config X86_MPPARSE
 	depends on X86_LOCAL_APIC && !X86_VISWS
 	default y
 
+config MCOUNT
+	bool "Generate function call graph"
+	depends on FRAME_POINTER
+	help
+	  This option instruments the kernel to generate a deterministic
+	  function call graph.  Answering Y here will make your kernel run
+	  ???% slower.
+
 endmenu
 
 source "security/Kconfig"
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/boot/compressed/Makefile 320-kcg/arch/i386/boot/compressed/Makefile
--- 310-irqbal_fast/arch/i386/boot/compressed/Makefile	2003-03-20 11:25:38.000000000 -0800
+++ 320-kcg/arch/i386/boot/compressed/Makefile	2004-03-14 09:50:47.000000000 -0800
@@ -9,6 +9,17 @@ EXTRA_AFLAGS	:= -traditional
 
 LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
 
+ifeq ($(CONFIG_MCOUNT),y)
+quiet_cmd_nopg = CC     $@
+      cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@
+      
+$(obj)/misc.o: alwayscc
+	$(call cmd,nopg)
+
+alwayscc:
+	$(Q)rm -f $(obj)/misc.o
+endif
+
 $(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c 320-kcg/arch/i386/kernel/i386_ksyms.c
--- 310-irqbal_fast/arch/i386/kernel/i386_ksyms.c	2004-03-12 11:07:27.000000000 -0800
+++ 320-kcg/arch/i386/kernel/i386_ksyms.c	2004-03-14 09:50:47.000000000 -0800
@@ -188,6 +188,11 @@ extern void * memcpy(void *,const void *
 EXPORT_SYMBOL_NOVERS(memcpy);
 EXPORT_SYMBOL_NOVERS(memset);
 
+#ifdef CONFIG_MCOUNT
+extern void mcount(void);
+EXPORT_SYMBOL_NOVERS(mcount);
+#endif
+
 #ifdef CONFIG_HAVE_DEC_LOCK
 EXPORT_SYMBOL(atomic_dec_and_lock);
 #endif
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/Makefile 320-kcg/arch/i386/lib/Makefile
--- 310-irqbal_fast/arch/i386/lib/Makefile	2004-03-12 11:06:23.000000000 -0800
+++ 320-kcg/arch/i386/lib/Makefile	2004-03-14 09:50:47.000000000 -0800
@@ -11,3 +11,4 @@ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
 lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
 lib-$(CONFIG_KGDB) += kgdb_serial.o
 lib-$(CONFIG_DEBUG_IOVIRT)  += iodebug.o
+lib-$(CONFIG_MCOUNT) += mcount.o
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/i386/lib/mcount.S 320-kcg/arch/i386/lib/mcount.S
--- 310-irqbal_fast/arch/i386/lib/mcount.S	1969-12-31 16:00:00.000000000 -0800
+++ 320-kcg/arch/i386/lib/mcount.S	2004-03-14 09:50:47.000000000 -0800
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2000 SGI
+ *
+ * Written by Dimitris Michailidis dimitris@sgi.com
+ *
+ * This file implements mcount(), which is used to collect profiling data.
+ * We provide several variants to accomodate different types of callers at
+ * the lowest possible overhead.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+
+#define MCOUNT_HEAD  \
+	pushl %ecx          /* We must protect the arguments of FASTCALLs */; \
+	movl mcount_hook, %ecx;  \
+	testl %ecx, %ecx;  \
+	jz 1f;  \
+	pushl %eax;  \
+	pushl %edx;  \
+        movl 12(%esp), %edx  /* mcount()'s parent */
+
+#define MCOUNT_TAIL \
+	call *%ecx;  \
+	popl %edx;  \
+	popl %eax;  \
+1:	popl %ecx
+
+/*
+ * This is the main variant and is called by C code.  GCC's -pg option
+ * automatically instruments every C function with a call to this.
+ */
+ENTRY(mcount)
+#if defined(CONFIG_MCOUNT)
+	MCOUNT_HEAD
+#ifdef CONFIG_FRAME_POINTER
+        movl 4(%ebp), %eax  /* mcount()'s parent's parent */
+#endif
+	MCOUNT_TAIL
+#endif
+	ret
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/Kconfig 320-kcg/arch/ppc64/Kconfig
--- 310-irqbal_fast/arch/ppc64/Kconfig	2004-03-11 14:33:53.000000000 -0800
+++ 320-kcg/arch/ppc64/Kconfig	2004-03-14 09:50:47.000000000 -0800
@@ -389,6 +389,14 @@ config DEBUG_INFO
 	  Say Y here only if you plan to use gdb to debug the kernel.
 	  If you don't debug the kernel, you can say N.
 	  
+config MCOUNT
+	bool "Generate function call graph"
+	depends on DEBUG_KERNEL
+	help
+	  This option instruments the kernel to generate a deterministic
+	  function call graph.  Answering Y here will make your kernel run
+	  1-2% slower.
+
 endmenu
 
 source "security/Kconfig"
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/kernel/Makefile 320-kcg/arch/ppc64/kernel/Makefile
--- 310-irqbal_fast/arch/ppc64/kernel/Makefile	2004-03-11 14:33:53.000000000 -0800
+++ 320-kcg/arch/ppc64/kernel/Makefile	2004-03-14 09:50:47.000000000 -0800
@@ -5,6 +5,17 @@
 EXTRA_CFLAGS	+= -mno-minimal-toc
 extra-y		:= head.o vmlinux.lds.s
 
+ifeq ($(CONFIG_MCOUNT),y)
+quiet_cmd_nopg = CC      $@
+      cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@
+
+$(obj)/stab.o: alwayscc
+	$(call cmd,nopg)
+
+alwayscc:
+	$(Q)rm -f $(obj)/stab.o
+endif
+
 obj-y               :=	setup.o entry.o traps.o irq.o idle.o \
 			time.o process.o signal.o syscalls.o misc.o ptrace.o \
 			align.o semaphore.o bitops.o stab.o pacaData.o \
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/lib/Makefile 320-kcg/arch/ppc64/lib/Makefile
--- 310-irqbal_fast/arch/ppc64/lib/Makefile	2003-06-19 14:41:18.000000000 -0700
+++ 320-kcg/arch/ppc64/lib/Makefile	2004-03-14 09:50:47.000000000 -0800
@@ -4,3 +4,4 @@
 
 lib-y := checksum.o dec_and_lock.o string.o strcase.o
 lib-y += copypage.o memcpy.o copyuser.o
+lib-$(CONFIG_MCOUNT) += mcount.o
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/arch/ppc64/lib/mcount.S 320-kcg/arch/ppc64/lib/mcount.S
--- 310-irqbal_fast/arch/ppc64/lib/mcount.S	1969-12-31 16:00:00.000000000 -0800
+++ 320-kcg/arch/ppc64/lib/mcount.S	2004-03-14 09:50:47.000000000 -0800
@@ -0,0 +1,61 @@
+/*
+ * Written by Adam Litke (agl@us.ibm.com)
+ *
+ * This file implements mcount(), which is used to collect profiling data.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * This is called by C code in all files compiled with -pg
+ */
+
+_GLOBAL(_mcount)
+	/* Store parameter regs on stack */
+	std	r3, -16(r1)
+	std	r4, -24(r1)
+	std	r5, -32(r1)
+	std	r6, -40(r1)
+	std	r7, -48(r1)
+	std	r8, -56(r1)
+	std	r9, -64(r1)
+	std	r10, -72(r1)
+
+	/* Set up new stack frame */
+	mflr	r0
+	std	r0, 16(r1)
+	mfcr	r0
+	std	r0, 8(r1)
+	stdu	r1, -184(r1)
+
+	/* If relocation is off skip mcount_entry */
+	std	r14, -8(r1)
+	mfmsr	r14
+	andi.	r14, r14, MSR_IR
+	cmpldi	r14, 0
+	ld	r14, -8(r1)
+	beq	1f
+
+	/* Call mcount_entry */
+	bl	.mcount_entry
+	ori	0,0,0
+
+1:	
+	/* Put everything back */
+	addi	r1, r1, 184
+	ld	r0, 16(r1)
+	mtlr	r0
+	ld	r0, 8(r1)
+	mtcr	r0
+	ld	r3, -16(r1)
+	ld	r4, -24(r1)
+	ld	r5, -32(r1)
+	ld	r6, -40(r1)
+	ld	r7, -48(r1)
+	ld	r8, -56(r1)
+	ld	r9, -64(r1)
+	ld	r10, -72(r1)
+	blr
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/fs/proc/proc_misc.c 320-kcg/fs/proc/proc_misc.c
--- 310-irqbal_fast/fs/proc/proc_misc.c	2004-03-14 09:50:10.000000000 -0800
+++ 320-kcg/fs/proc/proc_misc.c	2004-03-14 09:50:47.000000000 -0800
@@ -51,6 +51,10 @@
 #include <asm/tlb.h>
 #include <asm/div64.h>
 
+#ifdef CONFIG_MCOUNT
+#include <linux/mcount.h>
+#endif
+
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 /*
@@ -857,4 +861,13 @@ void __init proc_misc_init(void)
 			entry->proc_fops = &ppc_htab_operations;
 	}
 #endif
+#ifdef CONFIG_MCOUNT
+	{
+		extern struct file_operations mcount_operations;
+		extern struct proc_dir_entry *mcount_pde;
+		mcount_pde = create_proc_entry("mcount", S_IRUGO|S_IWUSR, NULL);
+		if (mcount_pde)
+			mcount_pde->proc_fops = &mcount_operations;
+	}
+#endif
 }
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/include/asm-i386/atomic.h 320-kcg/include/asm-i386/atomic.h
--- 310-irqbal_fast/include/asm-i386/atomic.h	2004-02-18 14:57:17.000000000 -0800
+++ 320-kcg/include/asm-i386/atomic.h	2004-03-14 09:50:47.000000000 -0800
@@ -55,6 +55,17 @@ static __inline__ void atomic_add(int i,
 		:"ir" (i), "m" (v->counter));
 }
 
+/* Like the above but also returns the result */
+static __inline__ int atomic_add_return(int i, atomic_t *v)
+{
+	register int oldval;
+	__asm__ __volatile__(
+		LOCK "xaddl %2,%0"
+		:"=m" (v->counter), "=r" (oldval)
+		:"1" (i), "m" (v->counter) : "memory");
+	return oldval + i;
+}
+
 /**
  * atomic_sub - subtract the atomic variable
  * @i: integer value to subtract
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/include/linux/mcount.h 320-kcg/include/linux/mcount.h
--- 310-irqbal_fast/include/linux/mcount.h	1969-12-31 16:00:00.000000000 -0800
+++ 320-kcg/include/linux/mcount.h	2004-03-14 09:50:47.000000000 -0800
@@ -0,0 +1,63 @@
+/*
+ * include/linux/mcount.h
+ *
+ * Implementation of kernel mcount handler and supporting functions.
+ * 
+ * Code based on kernprof http://oss.sgi.com/projects/kernprof/
+ * Copyright (C) SGI 1999, 2000, 2001
+ * Written by Dimitris Michailidis (dimitris@engr.sgi.com)
+ * Modified by John Hawkes (hawkes@engr.sgi.com)
+ * Contributions from Niels Christiansen (nchr@us.ibm.com)
+ * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com)
+ */
+
+#ifndef __MCOUNT_H
+#define __MCOUNT_H
+
+#include <linux/kernel.h> 
+#include <linux/config.h> 
+#include <linux/proc_fs.h>
+
+#define DFL_PC_RES 4  /* default PC resolution for this platform */
+#define CG_MAX_ARCS (1 << (8 * sizeof(short)))
+#define FUNCTIONPC(func)        (*(unsigned long *)&(func))
+
+#define pc_out_of_range(pc)    \
+        ((pc) < (unsigned long) &_stext || (pc) >= (unsigned long) &_etext)
+
+struct prof_mem_map
+{
+	unsigned long     kernel_buckets;   /* number of kernel buckets */
+	unsigned long     nr_cpus;          /* number of processors whether profiled or not */
+	unsigned long     cg_from_size;     /* size of one cg_from array */
+	unsigned long     cg_to_size;       /* size of one cg_to array */
+	unsigned long     cg_to_offset;     /* offset of cg_to array */
+	unsigned long     kernel_start;     /* lowest text address in kernel */
+	unsigned long     kernel_end;       /* highest text address in kernel */
+};
+	
+struct cg_arc_dest {
+	unsigned long address;
+	atomic_t count;
+	unsigned short link;
+	unsigned short pad;
+};
+
+#ifdef CONFIG_X86
+void cg_record_arc(unsigned long frompc, unsigned long selfpc) __attribute__((regparm(2)));
+#endif
+
+int mcount_init(void); 
+
+ssize_t mcount_write(struct file * file, const char * buf,
+		       size_t count, loff_t *ppos);
+
+ssize_t mcount_read(struct file * file, char * buf,
+		       size_t count, loff_t *ppos);
+
+static struct file_operations mcount_operations = {
+	        write:  mcount_write,
+	        read:	mcount_read,
+};
+
+#endif
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/Makefile 320-kcg/kernel/Makefile
--- 310-irqbal_fast/kernel/Makefile	2004-03-12 11:06:59.000000000 -0800
+++ 320-kcg/kernel/Makefile	2004-03-14 09:53:46.000000000 -0800
@@ -23,6 +23,18 @@ obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 
+obj-$(CONFIG_MCOUNT) += mcount.o
+
+ifeq ($(CONFIG_MCOUNT),y)
+quiet_cmd_nopg = CC      $@
+	cmd_nopg = $(CC) $(subst -pg,,$(CFLAGS)) -c $(src)/$(*F).c -o $@
+
+$(obj)/mcount.o: alwayscc
+	$(call cmd,nopg)
+alwayscc:
+	$(Q)rm -f $(obj)/mcount.o
+endif
+
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
diff -purN -X /home/mbligh/.diff.exclude 310-irqbal_fast/kernel/mcount.c 320-kcg/kernel/mcount.c
--- 310-irqbal_fast/kernel/mcount.c	1969-12-31 16:00:00.000000000 -0800
+++ 320-kcg/kernel/mcount.c	2004-03-14 09:50:47.000000000 -0800
@@ -0,0 +1,203 @@
+/*
+ * kernel/mcount.c
+ *
+ * Implementation of kernel mcount handler and supporting functions.
+ * 
+ * Code based on kernprof http://oss.sgi.com/projects/kernprof/
+ * Copyright (C) SGI 1999, 2000, 2001
+ * Written by Dimitris Michailidis (dimitris@engr.sgi.com)
+ * Modified by John Hawkes (hawkes@engr.sgi.com)
+ * Contributions from Niels Christiansen (nchr@us.ibm.com)
+ * Adapted for stand-alone call graphing by Adam Litke (agl@us.ibm.com)
+ */
+
+#include <linux/mcount.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/percpu.h>
+#include <linux/kallsyms.h>
+
+void UNKNOWN_KERNEL(void) {} /* Dummy functions to make profiles more */
+void UNKNOWN_MODULE(void) {} /* descriptive */
+
+unsigned int mcount_shift, PC_resolution = DFL_PC_RES;
+
+char* memory_start = NULL;
+unsigned short *cg_from_base = NULL;
+struct cg_arc_dest *cg_to_base = NULL;
+int cg_arc_overflow = 0; /* set when no new arcs can be added to the call graph */
+int n_buckets = 0;
+size_t mem_needed;   /* space needed for the call graph and the PC samples */
+extern char _stext, _etext, _sinittext, _einittext;
+
+void (*mcount_hook)(unsigned long, unsigned long) = NULL;
+struct proc_dir_entry *mcount_pde;
+
+static int mcount_alloc_mem(void)
+{
+	unsigned long cg_from_size, cg_to_size;
+	size_t text_size = (unsigned long) &_etext - (unsigned long) &_stext;
+	struct prof_mem_map *memory_map;
+	
+	for (mcount_shift = 0; (1 << mcount_shift) < PC_resolution; mcount_shift++);
+	n_buckets = text_size >> mcount_shift;
+	cg_from_size = n_buckets * sizeof(short);
+	cg_to_size = CG_MAX_ARCS * sizeof(struct cg_arc_dest);
+	mem_needed = sizeof(struct prof_mem_map) + 
+		((cg_from_size + cg_to_size) * num_online_cpus());
+	if ((memory_start = vmalloc(mem_needed)) == NULL) {
+		return -ENOMEM;
+	}
+	memset(memory_start, 0, mem_needed);
+	
+	cg_from_base = (unsigned short *) (memory_start + sizeof(struct prof_mem_map));
+	cg_to_base = (struct cg_arc_dest *) (memory_start + sizeof(struct prof_mem_map) +
+			(cg_from_size * num_online_cpus()));
+
+	memory_map = (struct prof_mem_map*) memory_start;
+	memory_map->kernel_buckets = n_buckets;
+	memory_map->nr_cpus = num_online_cpus();
+	memory_map->cg_from_size = cg_from_size;
+	memory_map->cg_to_size = cg_to_size;
+	memory_map->cg_to_offset = cg_from_size * num_online_cpus();
+	memory_map->kernel_start = (unsigned long)&_stext;
+	memory_map->kernel_end = (unsigned long)&_etext;
+	return 0;
+}
+
+static void mcount_free_mem(void)
+{
+	vfree(memory_start);
+	memory_start = NULL;
+}
+
+void mcount_entry(void)
+{
+	unsigned long frompc, selfpc;
+
+	if(mcount_hook) {
+		frompc = (unsigned long)__builtin_return_address(2);
+		selfpc = (unsigned long)__builtin_return_address(1);
+		mcount_hook(frompc, selfpc);
+	}
+	return;
+}
+
+/* Record an arc traversal in the call graph.  Called by mcount().  SMP safe */
+void cg_record_arc(unsigned long frompc, unsigned long selfpc)
+{
+	static spinlock_t cg_record_lock = SPIN_LOCK_UNLOCKED;
+	unsigned long flags;
+	int toindex, fromindex, cpu;
+	unsigned short *q, *cg_from;
+	struct cg_arc_dest *p, *cg_to;
+	
+	cpu = smp_processor_id();
+	
+	cg_from = &cg_from_base[n_buckets * cpu];
+	cg_to   = &cg_to_base[CG_MAX_ARCS * cpu];
+	
+	if (pc_out_of_range(frompc))
+		fromindex = (FUNCTIONPC(UNKNOWN_KERNEL) - (unsigned long) &_stext)
+				>> mcount_shift;
+	else 
+		fromindex = (frompc - (unsigned long) &_stext) >> mcount_shift;
+	q = &cg_from[fromindex];
+	
+	/* Easy case: the arc is already in the call graph */
+	for (toindex = *q; toindex != 0; ) {
+		p = &cg_to[toindex];
+		if (p->address == selfpc) {
+			atomic_inc(&p->count);
+			return;
+		}
+		toindex = p->link;
+	}
+	/*
+	 * No luck.  We need to add a new arc.  Since cg_to[0] is unused,
+	 * we use cg_to[0].count to keep track of the next available arc.
+	 */
+	if (cg_arc_overflow) {
+		return;
+	}
+	toindex = atomic_add_return(1, &cg_to->count);
+	if (toindex >= CG_MAX_ARCS) {
+		/*
+		 * We have run out of space for arcs.  We'll keep incrementing
+		 * the existing ones but we won't try to add any more.
+		 */
+		cg_arc_overflow = 1;
+		atomic_set(&cg_to->count, CG_MAX_ARCS - 1);
+		return;
+	}
+	/*
+	 * We have a secured slot for a new arc and all we need to do is
+	 * initialize it and add it to a hash bucket.  We use compare&swap, if
+	 * possible, to avoid any spinlocks whatsoever.
+	 */
+	p = &cg_to[toindex];
+	p->address = selfpc;
+	atomic_set(&p->count, 1);
+	
+	spin_lock_irqsave(&cg_record_lock, flags);
+	p->link = *q;
+	*q = toindex;
+	spin_unlock_irqrestore(&cg_record_lock, flags);
+	return;
+}
+
+int mcount_start(void)
+{
+	if (!memory_start) {
+		if(mcount_alloc_mem())
+			return -ENOMEM;
+		mcount_pde->size = mem_needed;
+	}
+	mcount_hook = cg_record_arc;
+	return 0;
+}
+
+int mcount_stop(void)
+{
+	mcount_hook = NULL;
+	return 0;
+}
+
+int mcount_cleanup(void)
+{
+	mcount_stop();
+	mcount_pde->size = 0;
+	mcount_free_mem();
+	return 0;
+}
+
+ssize_t mcount_read(struct file * file, char * buf,
+			size_t count, loff_t *ppos)
+{
+	count = (count + *ppos >= mcount_pde->size) ? 
+		mcount_pde->size - *ppos : count;
+	copy_to_user(buf, memory_start + *ppos, count);
+	*ppos += count;
+	return count;
+}
+
+ssize_t mcount_write(struct file * file, const char * buf,
+		       size_t count, loff_t *ppos)
+{
+	int ret;
+	
+	switch (buf[0]) {
+		case '0':
+			ret = mcount_cleanup();
+			break;
+		case '1':
+			ret = mcount_stop();
+			break;
+		case '2':
+			ret = mcount_start();
+		default:
+			ret = -EINVAL;
+	}
+	return (ret == 0) ? count : ret;
+}