patch-2.4.19 linux-2.4.19/arch/ia64/kernel/perfmon.c
Next file: linux-2.4.19/arch/ia64/kernel/process.c
Previous file: linux-2.4.19/arch/ia64/kernel/palinfo.c
Back to the patch index
Back to the overall index
- Lines: 5032
- Date:
Fri Aug 2 17:39:42 2002
- Orig file:
linux-2.4.18/arch/ia64/kernel/perfmon.c
- Orig date:
Fri Nov 9 14:26:17 2001
diff -urN linux-2.4.18/arch/ia64/kernel/perfmon.c linux-2.4.19/arch/ia64/kernel/perfmon.c
@@ -1,13 +1,16 @@
/*
- * This file contains the code to configure and read/write the ia64 performance
- * monitoring stuff.
+ * This file implements the perfmon subsystem which is used
+ * to program the IA-64 Performance Monitoring Unit (PMU).
*
* Originaly Written by Ganesh Venkitachalam, IBM Corp.
- * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
- * Modifications by Stephane Eranian, Hewlett-Packard Co.
* Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
- * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 2000-2001 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * Modifications by Stephane Eranian, Hewlett-Packard Co.
+ * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
+ *
+ * Copyright (C) 1999-2002 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
*/
#include <linux/config.h>
@@ -22,151 +25,137 @@
#include <linux/mm.h>
#include <asm/bitops.h>
-#include <asm/efi.h>
#include <asm/errno.h>
-#include <asm/hw_irq.h>
#include <asm/page.h>
#include <asm/pal.h>
#include <asm/perfmon.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/signal.h>
#include <asm/system.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/delay.h> /* for ia64_get_itc() */
#ifdef CONFIG_PERFMON
-#define PFM_VERSION "0.3"
-#define PFM_SMPL_HDR_VERSION 1
-
-#define PMU_FIRST_COUNTER 4 /* first generic counter */
-
-#define PFM_WRITE_PMCS 0xa0
-#define PFM_WRITE_PMDS 0xa1
-#define PFM_READ_PMDS 0xa2
-#define PFM_STOP 0xa3
-#define PFM_START 0xa4
-#define PFM_ENABLE 0xa5 /* unfreeze only */
-#define PFM_DISABLE 0xa6 /* freeze only */
-#define PFM_RESTART 0xcf
-#define PFM_CREATE_CONTEXT 0xa7
-#define PFM_DESTROY_CONTEXT 0xa8
/*
- * Those 2 are just meant for debugging. I considered using sysctl() for
- * that but it is a little bit too pervasive. This solution is at least
- * self-contained.
+ * For PMUs which rely on the debug registers for some features, you must
+ * you must enable the following flag to activate the support for
+ * accessing the registers via the perfmonctl() interface.
*/
-#define PFM_DEBUG_ON 0xe0
-#define PFM_DEBUG_OFF 0xe1
-
-#define PFM_DEBUG_BASE PFM_DEBUG_ON
-
+#ifdef CONFIG_ITANIUM
+#define PFM_PMU_USES_DBR 1
+#endif
/*
- * perfmon API flags
+ * perfmon context states
*/
-#define PFM_FL_INHERIT_NONE 0x00 /* never inherit a context across fork (default) */
-#define PFM_FL_INHERIT_ONCE 0x01 /* clone pfm_context only once across fork() */
-#define PFM_FL_INHERIT_ALL 0x02 /* always clone pfm_context across fork() */
-#define PFM_FL_SMPL_OVFL_NOBLOCK 0x04 /* do not block on sampling buffer overflow */
-#define PFM_FL_SYSTEM_WIDE 0x08 /* create a system wide context */
-#define PFM_FL_EXCL_INTR 0x10 /* exclude interrupt from system wide monitoring */
+#define PFM_CTX_DISABLED 0
+#define PFM_CTX_ENABLED 1
/*
- * PMC API flags
+ * Reset register flags
*/
-#define PFM_REGFL_OVFL_NOTIFY 1 /* send notification on overflow */
+#define PFM_RELOAD_LONG_RESET 1
+#define PFM_RELOAD_SHORT_RESET 2
/*
- * Private flags and masks
+ * Misc macros and definitions
*/
+#define PMU_FIRST_COUNTER 4
+
+#define PFM_IS_DISABLED() pmu_conf.pfm_is_disabled
+
+#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_soft_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
#define PFM_FL_INHERIT_MASK (PFM_FL_INHERIT_NONE|PFM_FL_INHERIT_ONCE|PFM_FL_INHERIT_ALL)
-#ifdef CONFIG_SMP
-#define cpu_is_online(i) (cpu_online_map & (1UL << i))
-#else
-#define cpu_is_online(i) 1
-#endif
+#define PMC_IS_IMPL(i) (i<pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1UL<< (i) %64))
+#define PMD_IS_IMPL(i) (i<pmu_conf.num_pmds && pmu_conf.impl_regs[4+(i>>6)] & (1UL<<(i) % 64))
+
+#define PMD_IS_COUNTING(i) (i >=0 && i < 256 && pmu_conf.counter_pmds[i>>6] & (1UL <<(i) % 64))
+#define PMC_IS_COUNTING(i) PMD_IS_COUNTING(i)
+
+#define IBR_IS_IMPL(k) (k<pmu_conf.num_ibrs)
+#define DBR_IS_IMPL(k) (k<pmu_conf.num_dbrs)
+
+#define PMC_IS_BTB(a) (((pfm_monitor_t *)(a))->pmc_es == PMU_BTB_EVENT)
+
+#define LSHIFT(x) (1UL<<(x))
+#define PMM(x) LSHIFT(x)
+#define PMC_IS_MONITOR(c) ((pmu_conf.monitor_pmcs[0] & PMM((c))) != 0)
+
+#define CTX_IS_ENABLED(c) ((c)->ctx_flags.state == PFM_CTX_ENABLED)
+#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
+#define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)
+#define CTX_HAS_SMPL(c) ((c)->ctx_psb != NULL)
+#define CTX_USED_PMD(ctx,n) (ctx)->ctx_used_pmds[(n)>>6] |= 1UL<< ((n) % 64)
+
+#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
+
+#define LOCK_CTX(ctx) spin_lock(&(ctx)->ctx_lock)
+#define UNLOCK_CTX(ctx) spin_unlock(&(ctx)->ctx_lock)
+
+#define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0)
+#define PMU_OWNER() pmu_owners[smp_processor_id()].owner
+
+#define LOCK_PFS() spin_lock(&pfm_sessions.pfs_lock)
+#define UNLOCK_PFS() spin_unlock(&pfm_sessions.pfs_lock)
+
+#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
+
+/*
+ * debugging
+ */
+#define DBprintk(a) \
+ do { \
+ if (pfm_debug_mode >0) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+ } while (0)
-#define PMC_IS_IMPL(i) (i < pmu_conf.num_pmcs && pmu_conf.impl_regs[i>>6] & (1<< (i&~(64-1))))
-#define PMD_IS_IMPL(i) (i < pmu_conf.num_pmds && pmu_conf.impl_regs[4+(i>>6)] & (1<< (i&~(64-1))))
-#define PMD_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
-#define PMC_IS_COUNTER(i) (i>=PMU_FIRST_COUNTER && i < (PMU_FIRST_COUNTER+pmu_conf.max_counters))
-/* This is the Itanium-specific PMC layout for counter config */
+/*
+ * These are some helpful architected PMC and IBR/DBR register layouts
+ */
typedef struct {
unsigned long pmc_plm:4; /* privilege level mask */
unsigned long pmc_ev:1; /* external visibility */
unsigned long pmc_oi:1; /* overflow interrupt */
unsigned long pmc_pm:1; /* privileged monitor */
unsigned long pmc_ig1:1; /* reserved */
- unsigned long pmc_es:7; /* event select */
- unsigned long pmc_ig2:1; /* reserved */
- unsigned long pmc_umask:4; /* unit mask */
- unsigned long pmc_thres:3; /* threshold */
- unsigned long pmc_ig3:1; /* reserved (missing from table on p6-17) */
- unsigned long pmc_ism:2; /* instruction set mask */
- unsigned long pmc_ig4:38; /* reserved */
-} pmc_counter_reg_t;
-
-/* test for EAR/BTB configuration */
-#define PMU_DEAR_EVENT 0x67
-#define PMU_IEAR_EVENT 0x23
-#define PMU_BTB_EVENT 0x11
-
-#define PMC_IS_DEAR(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_DEAR_EVENT)
-#define PMC_IS_IEAR(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_IEAR_EVENT)
-#define PMC_IS_BTB(a) (((pmc_counter_reg_t *)(a))->pmc_es == PMU_BTB_EVENT)
-
-/*
- * This header is at the beginning of the sampling buffer returned to the user.
- * It is exported as Read-Only at this point. It is directly followed with the
- * first record.
- */
-typedef struct {
- int hdr_version; /* could be used to differentiate formats */
- int hdr_reserved;
- unsigned long hdr_entry_size; /* size of one entry in bytes */
- unsigned long hdr_count; /* how many valid entries */
- unsigned long hdr_pmds; /* which pmds are recorded */
-} perfmon_smpl_hdr_t;
-
-/*
- * Header entry in the buffer as a header as follows.
- * The header is directly followed with the PMDS to saved in increasing index order:
- * PMD4, PMD5, .... How many PMDs are present is determined by the tool which must
- * keep track of it when generating the final trace file.
- */
-typedef struct {
- int pid; /* identification of process */
- int cpu; /* which cpu was used */
- unsigned long rate; /* initial value of this counter */
- unsigned long stamp; /* timestamp */
- unsigned long ip; /* where did the overflow interrupt happened */
- unsigned long regs; /* which registers overflowed (up to 64)*/
-} perfmon_smpl_entry_t;
+ unsigned long pmc_es:8; /* event select */
+ unsigned long pmc_ig2:48; /* reserved */
+} pfm_monitor_t;
/*
* There is one such data structure per perfmon context. It is used to describe the
- * sampling buffer. It is to be shared among siblings whereas the pfm_context isn't.
+ * sampling buffer. It is to be shared among siblings whereas the pfm_context
+ * is not.
* Therefore we maintain a refcnt which is incremented on fork().
- * This buffer is private to the kernel only the actual sampling buffer including its
- * header are exposed to the user. This construct allows us to export the buffer read-write,
- * if needed, without worrying about security problems.
- */
-typedef struct {
- atomic_t psb_refcnt; /* how many users for the buffer */
- int reserved;
+ * This buffer is private to the kernel only the actual sampling buffer
+ * including its header are exposed to the user. This construct allows us to
+ * export the buffer read-write, if needed, without worrying about security
+ * problems.
+ */
+typedef struct _pfm_smpl_buffer_desc {
+ spinlock_t psb_lock; /* protection lock */
+ unsigned long psb_refcnt; /* how many users for the buffer */
+ int psb_flags; /* bitvector of flags */
+
void *psb_addr; /* points to location of first entry */
unsigned long psb_entries; /* maximum number of entries */
unsigned long psb_size; /* aligned size of buffer */
- unsigned long psb_index; /* next free entry slot */
+ unsigned long psb_index; /* next free entry slot XXX: must use the one in buffer */
unsigned long psb_entry_size; /* size of each entry including entry header */
perfmon_smpl_hdr_t *psb_hdr; /* points to sampling buffer header */
+
+ struct _pfm_smpl_buffer_desc *psb_next; /* next psb, used for rvfreeing of psb_hdr */
+
} pfm_smpl_buffer_desc_t;
+#define LOCK_PSB(p) spin_lock(&(p)->psb_lock)
+#define UNLOCK_PSB(p) spin_unlock(&(p)->psb_lock)
+
+#define PFM_PSB_VMA 0x1 /* a VMA is describing the buffer */
/*
* This structure is initialized at boot time and contains
@@ -180,126 +169,192 @@
unsigned long num_pmcs ; /* highest PMC implemented (may have holes) */
unsigned long num_pmds; /* highest PMD implemented (may have holes) */
unsigned long impl_regs[16]; /* buffer used to hold implememted PMC/PMD mask */
+ unsigned long num_ibrs; /* number of instruction debug registers */
+ unsigned long num_dbrs; /* number of data debug registers */
+ unsigned long monitor_pmcs[4]; /* which pmc are controlling monitors */
+ unsigned long counter_pmds[4]; /* which pmd are used as counters */
} pmu_config_t;
-#define PERFMON_IS_DISABLED() pmu_conf.pfm_is_disabled
-
+/*
+ * 64-bit software counter structure
+ */
typedef struct {
- __u64 val; /* virtual 64bit counter value */
- __u64 ival; /* initial value from user */
- __u64 smpl_rval; /* reset value on sampling overflow */
- __u64 ovfl_rval; /* reset value on overflow */
- int flags; /* notify/do not notify */
+ u64 val; /* virtual 64bit counter value */
+ u64 ival; /* initial value from user */
+ u64 long_reset; /* reset value on sampling overflow */
+ u64 short_reset;/* reset value on overflow */
+ u64 reset_pmds[4]; /* which other pmds to reset when this counter overflows */
+ int flags; /* notify/do not notify */
} pfm_counter_t;
-#define PMD_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
/*
- * perfmon context. One per process, is cloned on fork() depending on inheritance flags
+ * perfmon context. One per process, is cloned on fork() depending on
+ * inheritance flags
*/
typedef struct {
- unsigned int inherit:2; /* inherit mode */
- unsigned int noblock:1; /* block/don't block on overflow with notification */
- unsigned int system:1; /* do system wide monitoring */
- unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */
- unsigned int exclintr:1;/* exlcude interrupts from system wide monitoring */
- unsigned int reserved:26;
+ unsigned int state:1; /* 0=disabled, 1=enabled */
+ unsigned int inherit:2; /* inherit mode */
+ unsigned int block:1; /* when 1, task will blocked on user notifications */
+ unsigned int system:1; /* do system wide monitoring */
+ unsigned int frozen:1; /* pmu must be kept frozen on ctxsw in */
+ unsigned int protected:1; /* allow access to creator of context only */
+ unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
+ unsigned int reserved:24;
} pfm_context_flags_t;
+/*
+ * perfmon context: encapsulates all the state of a monitoring session
+ * XXX: probably need to change layout
+ */
typedef struct pfm_context {
+ pfm_smpl_buffer_desc_t *ctx_psb; /* sampling buffer, if any */
+ unsigned long ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
- pfm_smpl_buffer_desc_t *ctx_smpl_buf; /* sampling buffer descriptor, if any */
- unsigned long ctx_dear_counter; /* which PMD holds D-EAR */
- unsigned long ctx_iear_counter; /* which PMD holds I-EAR */
- unsigned long ctx_btb_counter; /* which PMD holds BTB */
-
- spinlock_t ctx_notify_lock;
+ spinlock_t ctx_lock;
pfm_context_flags_t ctx_flags; /* block/noblock */
- int ctx_notify_sig; /* XXX: SIGPROF or other */
+
struct task_struct *ctx_notify_task; /* who to notify on overflow */
- struct task_struct *ctx_creator; /* pid of creator (debug) */
+ struct task_struct *ctx_owner; /* pid of creator (debug) */
+
+ unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
+ unsigned long ctx_smpl_regs[4]; /* which registers to record on overflow */
- unsigned long ctx_ovfl_regs; /* which registers just overflowed (notification) */
- unsigned long ctx_smpl_regs; /* which registers to record on overflow */
+ struct semaphore ctx_restart_sem; /* use for blocking notification mode */
- struct semaphore ctx_restart_sem; /* use for blocking notification mode */
+ unsigned long ctx_used_pmds[4]; /* bitmask of used PMD (speedup ctxsw) */
+ unsigned long ctx_saved_pmcs[4]; /* bitmask of PMC to save on ctxsw */
+ unsigned long ctx_reload_pmcs[4]; /* bitmask of PMC to reload on ctxsw (SMP) */
- unsigned long ctx_used_pmds[4]; /* bitmask of used PMD (speedup ctxsw) */
- unsigned long ctx_used_pmcs[4]; /* bitmask of used PMC (speedup ctxsw) */
+ unsigned long ctx_used_ibrs[4]; /* bitmask of used IBR (speedup ctxsw) */
+ unsigned long ctx_used_dbrs[4]; /* bitmask of used DBR (speedup ctxsw) */
- pfm_counter_t ctx_pmds[IA64_NUM_PMD_COUNTERS]; /* XXX: size should be dynamic */
+ pfm_counter_t ctx_soft_pmds[IA64_NUM_PMD_REGS]; /* XXX: size should be dynamic */
+ u64 ctx_saved_psr; /* copy of psr used for lazy ctxsw */
+ unsigned long ctx_saved_cpus_allowed; /* copy of the task cpus_allowed (system wide) */
+ unsigned long ctx_cpu; /* cpu to which perfmon is applied (system wide) */
+
+ atomic_t ctx_saving_in_progress; /* flag indicating actual save in progress */
+ atomic_t ctx_last_cpu; /* CPU id of current or last CPU used */
} pfm_context_t;
-#define CTX_USED_PMD(ctx,n) (ctx)->ctx_used_pmds[(n)>>6] |= 1<< ((n) % 64)
-#define CTX_USED_PMC(ctx,n) (ctx)->ctx_used_pmcs[(n)>>6] |= 1<< ((n) % 64)
+#define ctx_fl_inherit ctx_flags.inherit
+#define ctx_fl_block ctx_flags.block
+#define ctx_fl_system ctx_flags.system
+#define ctx_fl_frozen ctx_flags.frozen
+#define ctx_fl_protected ctx_flags.protected
+#define ctx_fl_using_dbreg ctx_flags.using_dbreg
+
+/*
+ * global information about all sessions
+ * mostly used to synchronize between system wide and per-process
+ */
+typedef struct {
+ spinlock_t pfs_lock; /* lock the structure */
-#define ctx_fl_inherit ctx_flags.inherit
-#define ctx_fl_noblock ctx_flags.noblock
-#define ctx_fl_system ctx_flags.system
-#define ctx_fl_frozen ctx_flags.frozen
-#define ctx_fl_exclintr ctx_flags.exclintr
+ unsigned long pfs_task_sessions; /* number of per task sessions */
+ unsigned long pfs_sys_sessions; /* number of per system wide sessions */
+ unsigned long pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
+ unsigned long pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
+ struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
+} pfm_session_t;
-#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_noblock == 1)
-#define CTX_INHERIT_MODE(c) ((c)->ctx_fl_inherit)
-#define CTX_HAS_SMPL(c) ((c)->ctx_smpl_buf != NULL)
+/*
+ * structure used to pass argument to/from remote CPU
+ * using IPI to check and possibly save the PMU context on SMP systems.
+ *
+ * not used in UP kernels
+ */
+typedef struct {
+ struct task_struct *task; /* which task we are interested in */
+ int retval; /* return value of the call: 0=you can proceed, 1=need to wait for completion */
+} pfm_smp_ipi_arg_t;
-static pmu_config_t pmu_conf;
+/*
+ * perfmon command descriptions
+ */
+typedef struct {
+ int (*cmd_func)(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+ int cmd_flags;
+ unsigned int cmd_narg;
+ size_t cmd_argsize;
+} pfm_cmd_desc_t;
-/* for debug only */
-static int pfm_debug=0; /* 0= nodebug, >0= debug output on */
+#define PFM_CMD_PID 0x1 /* command requires pid argument */
+#define PFM_CMD_ARG_READ 0x2 /* command must read argument(s) */
+#define PFM_CMD_ARG_WRITE 0x4 /* command must write argument(s) */
+#define PFM_CMD_CTX 0x8 /* command needs a perfmon context */
+#define PFM_CMD_NOCHK 0x10 /* command does not need to check task's state */
-#define DBprintk(a) \
- do { \
- if (pfm_debug >0) { printk(__FUNCTION__" %d: ", __LINE__); printk a; } \
- } while (0);
+#define PFM_CMD_IDX(cmd) (cmd)
+
+#define PFM_CMD_IS_VALID(cmd) ((PFM_CMD_IDX(cmd) >= 0) && (PFM_CMD_IDX(cmd) < PFM_CMD_COUNT) \
+ && pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func != NULL)
+
+#define PFM_CMD_USE_PID(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_PID) != 0)
+#define PFM_CMD_READ_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_READ) != 0)
+#define PFM_CMD_WRITE_ARG(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_ARG_WRITE) != 0)
+#define PFM_CMD_USE_CTX(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_CTX) != 0)
+#define PFM_CMD_CHK(cmd) ((pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_flags & PFM_CMD_NOCHK) == 0)
+
+#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
+#define PFM_CMD_NARG(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_narg)
+#define PFM_CMD_ARG_SIZE(cmd) (pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_argsize)
-static void ia64_reset_pmu(void);
/*
- * structure used to pass information between the interrupt handler
- * and the tasklet.
+ * perfmon internal variables
*/
-typedef struct {
- pid_t to_pid; /* which process to notify */
- pid_t from_pid; /* which process is source of overflow */
- int sig; /* with which signal */
- unsigned long bitvect; /* which counters have overflowed */
-} notification_info_t;
+static pmu_config_t pmu_conf; /* PMU configuration */
+static int pfm_debug_mode; /* 0= nodebug, >0= debug output on */
+static pfm_session_t pfm_sessions; /* global sessions information */
+static struct proc_dir_entry *perfmon_dir; /* for debug only */
+static unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
+static unsigned long pfm_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
+static unsigned long pfm_recorded_samples_count;
-typedef struct {
- unsigned long pfs_proc_sessions;
- unsigned long pfs_sys_session; /* can only be 0/1 */
- unsigned long pfs_dfl_dcr; /* XXX: hack */
- unsigned int pfs_pp;
-} pfm_session_t;
+static unsigned long reset_pmcs[IA64_NUM_PMC_REGS]; /* contains PAL reset values for PMCS */
+
+static void pfm_vm_close(struct vm_area_struct * area);
+static struct vm_operations_struct pfm_vm_ops={
+ close: pfm_vm_close
+};
-struct {
+/*
+ * keep track of task owning the PMU per CPU.
+ */
+static struct {
struct task_struct *owner;
} ____cacheline_aligned pmu_owners[NR_CPUS];
-/*
- * helper macros
- */
-#define SET_PMU_OWNER(t) do { pmu_owners[smp_processor_id()].owner = (t); } while(0);
-#define PMU_OWNER() pmu_owners[smp_processor_id()].owner
+/*
+ * forward declarations
+ */
+static void ia64_reset_pmu(struct task_struct *);
#ifdef CONFIG_SMP
-#define PFM_CAN_DO_LAZY() (smp_num_cpus==1 && pfs_info.pfs_sys_session==0)
-#else
-#define PFM_CAN_DO_LAZY() (pfs_info.pfs_sys_session==0)
+static void pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx);
#endif
-
static void pfm_lazy_save_regs (struct task_struct *ta);
-/* for debug only */
-static struct proc_dir_entry *perfmon_dir;
+static inline unsigned long
+pfm_read_soft_counter(pfm_context_t *ctx, int i)
+{
+ return ctx->ctx_soft_pmds[i].val + (ia64_get_pmd(i) & pmu_conf.perf_ovfl_val);
+}
-/*
- * XXX: hack to indicate that a system wide monitoring session is active
- */
-static pfm_session_t pfs_info;
+static inline void
+pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
+{
+ ctx->ctx_soft_pmds[i].val = val & ~pmu_conf.perf_ovfl_val;
+ /*
+ * writing to unimplemented part is ignore, so we do not need to
+ * mask off top part
+ */
+ ia64_set_pmd(i, val);
+}
/*
* finds the number of PM(C|D) registers given
@@ -324,10 +379,10 @@
* Generates a unique (per CPU) timestamp
*/
static inline unsigned long
-perfmon_get_stamp(void)
+pfm_get_stamp(void)
{
/*
- * XXX: maybe find something more efficient
+ * XXX: must find something more efficient
*/
return ia64_get_itc();
}
@@ -353,80 +408,185 @@
}
}
}
- DBprintk(("uv2kva(%lx-->%lx)\n", adr, ret));
+ DBprintk(("[%d] uv2kva(%lx-->%lx)\n", current->pid, adr, ret));
return ret;
}
-
/* Here we want the physical address of the memory.
* This is used when initializing the contents of the
* area and marking the pages as reserved.
*/
static inline unsigned long
-kvirt_to_pa(unsigned long adr)
+pfm_kvirt_to_pa(unsigned long adr)
{
__u64 pa = ia64_tpa(adr);
- DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa));
+ //DBprintk(("kv2pa(%lx-->%lx)\n", adr, pa));
return pa;
}
+
static void *
-rvmalloc(unsigned long size)
+pfm_rvmalloc(unsigned long size)
{
void *mem;
unsigned long adr, page;
- /* XXX: may have to revisit this part because
- * vmalloc() does not necessarily return a page-aligned buffer.
- * This maybe a security problem when mapped at user level
- */
mem=vmalloc(size);
if (mem) {
+ //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
memset(mem, 0, size); /* Clear the ram out, no junk to the user */
adr=(unsigned long) mem;
while (size > 0) {
- page = kvirt_to_pa(adr);
+ page = pfm_kvirt_to_pa(adr);
mem_map_reserve(virt_to_page(__va(page)));
- adr+=PAGE_SIZE;
- size-=PAGE_SIZE;
+ adr += PAGE_SIZE;
+ size -= PAGE_SIZE;
}
}
return mem;
}
static void
-rvfree(void *mem, unsigned long size)
+pfm_rvfree(void *mem, unsigned long size)
{
- unsigned long adr, page;
+ unsigned long adr, page = 0;
if (mem) {
adr=(unsigned long) mem;
while (size > 0) {
- page = kvirt_to_pa(adr);
+ page = pfm_kvirt_to_pa(adr);
mem_map_unreserve(virt_to_page(__va(page)));
adr+=PAGE_SIZE;
size-=PAGE_SIZE;
}
vfree(mem);
}
+ return;
+}
+
+/*
+ * This function gets called from mm/mmap.c:exit_mmap() only when there is a sampling buffer
+ * attached to the context AND the current task has a mapping for it, i.e., it is the original
+ * creator of the context.
+ *
+ * This function is used to remember the fact that the vma describing the sampling buffer
+ * has now been removed. It can only be called when no other tasks share the same mm context.
+ *
+ */
+static void
+pfm_vm_close(struct vm_area_struct *vma)
+{
+ pfm_smpl_buffer_desc_t *psb = (pfm_smpl_buffer_desc_t *)vma->vm_private_data;
+
+ if (psb == NULL) {
+ printk("perfmon: psb is null in [%d]\n", current->pid);
+ return;
+ }
+ /*
+ * Add PSB to list of buffers to free on release_thread() when no more users
+ *
+ * This call is safe because, once the count is zero is cannot be modified anymore.
+ * This is not because there is no more user of the mm context, that the sampling
+ * buffer is not being used anymore outside of this task. In fact, it can still
+ * be accessed from within the kernel by another task (such as the monitored task).
+ *
+ * Therefore, we only move the psb into the list of buffers to free when we know
+ * nobody else is using it.
+ * The linked list if independent of the perfmon context, because in the case of
+ * multi-threaded processes, the last thread may not have been involved with
+ * monitoring however it will be the one removing the vma and it should therefore
+ * also remove the sampling buffer. This buffer cannot be removed until the vma
+ * is removed.
+ *
+ * This function cannot remove the buffer from here, because exit_mmap() must first
+ * complete. Given that there is no other vma related callback in the generic code,
+ * we have created on own with the linked list of sampling buffer to free which
+ * is part of the thread structure. In release_thread() we check if the list is
+ * empty. If not we call into perfmon to free the buffer and psb. That is the only
+ * way to ensure a safe deallocation of the sampling buffer which works when
+ * the buffer is shared between distinct processes or with multi-threaded programs.
+ *
+ * We need to lock the psb because the refcnt test and flag manipulation must
+ * looked like an atomic operation vis a vis pfm_context_exit()
+ */
+ LOCK_PSB(psb);
+
+ if (psb->psb_refcnt == 0) {
+
+ psb->psb_next = current->thread.pfm_smpl_buf_list;
+ current->thread.pfm_smpl_buf_list = psb;
+
+ DBprintk(("psb for [%d] smpl @%p size %ld inserted into list\n",
+ current->pid, psb->psb_hdr, psb->psb_size));
+ }
+ DBprintk(("psb vma flag cleared for [%d] smpl @%p size %ld inserted into list\n",
+ current->pid, psb->psb_hdr, psb->psb_size));
+
+ /*
+ * indicate to pfm_context_exit() that the vma has been removed.
+ */
+ psb->psb_flags &= ~PFM_PSB_VMA;
+
+ UNLOCK_PSB(psb);
+}
+
+/*
+ * This function is called from pfm_destroy_context() and also from pfm_inherit()
+ * to explicitely remove the sampling buffer mapping from the user level address space.
+ */
+static int
+pfm_remove_smpl_mapping(struct task_struct *task)
+{
+ pfm_context_t *ctx = task->thread.pfm_context;
+ pfm_smpl_buffer_desc_t *psb;
+ int r;
+
+ /*
+ * some sanity checks first
+ */
+ if (ctx == NULL || task->mm == NULL || ctx->ctx_smpl_vaddr == 0 || ctx->ctx_psb == NULL) {
+ printk("perfmon: invalid context mm=%p\n", task->mm);
+ return -1;
+ }
+ psb = ctx->ctx_psb;
+
+ down_write(&task->mm->mmap_sem);
+
+ r = do_munmap(task->mm, ctx->ctx_smpl_vaddr, psb->psb_size);
+
+ up_write(&task->mm->mmap_sem);
+ if (r !=0) {
+ printk("perfmon: pid %d unable to unmap sampling buffer @0x%lx size=%ld\n",
+ task->pid, ctx->ctx_smpl_vaddr, psb->psb_size);
+ }
+ DBprintk(("[%d] do_unmap(0x%lx, %ld)=%d\n",
+ task->pid, ctx->ctx_smpl_vaddr, psb->psb_size, r));
+
+ /*
+ * make sure we suppress all traces of this buffer
+ * (important for pfm_inherit)
+ */
+ ctx->ctx_smpl_vaddr = 0;
+
+ return 0;
}
static pfm_context_t *
pfm_context_alloc(void)
{
- pfm_context_t *pfc;
+ pfm_context_t *ctx;
/* allocate context descriptor */
- pfc = vmalloc(sizeof(*pfc));
- if (pfc) memset(pfc, 0, sizeof(*pfc));
-
- return pfc;
+ ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
+ if (ctx) memset(ctx, 0, sizeof(pfm_context_t));
+
+ return ctx;
}
static void
-pfm_context_free(pfm_context_t *pfc)
+pfm_context_free(pfm_context_t *ctx)
{
- if (pfc) vfree(pfc);
+ if (ctx) kfree(ctx);
}
static int
@@ -434,11 +594,13 @@
{
unsigned long page;
+ DBprintk(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
+
while (size > 0) {
- page = kvirt_to_pa(buf);
+ page = pfm_kvirt_to_pa(buf);
if (remap_page_range(addr, page, PAGE_SIZE, PAGE_SHARED)) return -ENOMEM;
-
+
addr += PAGE_SIZE;
buf += PAGE_SIZE;
size -= PAGE_SIZE;
@@ -458,7 +620,7 @@
for (i=0; i < size; i++, which++) res += hweight64(*which);
- DBprintk((" res=%ld\n", res));
+ DBprintk(("weight=%ld\n", res));
return res;
}
@@ -467,15 +629,16 @@
* Allocates the sampling buffer and remaps it into caller's address space
*/
static int
-pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long which_pmds, unsigned long entries, void **user_addr)
+pfm_smpl_buffer_alloc(pfm_context_t *ctx, unsigned long *which_pmds, unsigned long entries,
+ void **user_vaddr)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long addr, size, regcount;
+ struct vm_area_struct *vma = NULL;
+ unsigned long size, regcount;
void *smpl_buf;
pfm_smpl_buffer_desc_t *psb;
- regcount = pfm_smpl_entry_size(&which_pmds, 1);
+ regcount = pfm_smpl_entry_size(which_pmds, 1);
/* note that regcount might be 0, in this case only the header for each
* entry will be recorded.
@@ -488,133 +651,207 @@
+ entries * (sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64)));
/*
* check requested size to avoid Denial-of-service attacks
- * XXX: may have to refine this test
+ * XXX: may have to refine this test
+ * Check against address space limit.
+ *
+ * if ((mm->total_vm << PAGE_SHIFT) + len> current->rlim[RLIMIT_AS].rlim_cur)
+ * return -ENOMEM;
*/
if (size > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN;
- /* find some free area in address space */
- addr = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE);
- if (!addr) goto no_addr;
+ /*
+ * We do the easy to undo allocations first.
+ *
+ * pfm_rvmalloc(), clears the buffer, so there is no leak
+ */
+ smpl_buf = pfm_rvmalloc(size);
+ if (smpl_buf == NULL) {
+ DBprintk(("Can't allocate sampling buffer\n"));
+ return -ENOMEM;
+ }
+
+ DBprintk(("smpl_buf @%p\n", smpl_buf));
- DBprintk((" entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, addr));
+ /* allocate sampling buffer descriptor now */
+ psb = kmalloc(sizeof(*psb), GFP_KERNEL);
+ if (psb == NULL) {
+ DBprintk(("Can't allocate sampling buffer descriptor\n"));
+ pfm_rvfree(smpl_buf, size);
+ return -ENOMEM;
+ }
/* allocate vma */
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!vma) goto no_vma;
-
- /* XXX: see rvmalloc() for page alignment problem */
- smpl_buf = rvmalloc(size);
- if (smpl_buf == NULL) goto no_buffer;
-
- DBprintk((" smpl_buf @%p\n", smpl_buf));
-
- if (pfm_remap_buffer((unsigned long)smpl_buf, addr, size)) goto cant_remap;
-
- /* allocate sampling buffer descriptor now */
- psb = vmalloc(sizeof(*psb));
- if (psb == NULL) goto no_buffer_desc;
+ if (!vma) {
+ DBprintk(("Cannot allocate vma\n"));
+ goto error;
+ }
+ /*
+ * partially initialize the vma for the sampling buffer
+ */
+ vma->vm_mm = mm;
+ vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
+ vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
+ vma->vm_ops = &pfm_vm_ops; /* necesarry to get the close() callback */
+ vma->vm_pgoff = 0;
+ vma->vm_file = NULL;
+ vma->vm_raend = 0;
+ vma->vm_private_data = psb; /* information needed by the pfm_vm_close() function */
- /* start with something clean */
- memset(smpl_buf, 0x0, size);
+ /*
+ * Now we have everything we need and we can initialize
+ * and connect all the data structures
+ */
psb->psb_hdr = smpl_buf;
- psb->psb_addr = (char *)smpl_buf+sizeof(perfmon_smpl_hdr_t); /* first entry */
+ psb->psb_addr = ((char *)smpl_buf)+sizeof(perfmon_smpl_hdr_t); /* first entry */
psb->psb_size = size; /* aligned size */
psb->psb_index = 0;
psb->psb_entries = entries;
+ psb->psb_flags = PFM_PSB_VMA; /* remember that there is a vma describing the buffer */
+ psb->psb_refcnt = 1;
- atomic_set(&psb->psb_refcnt, 1);
+ spin_lock_init(&psb->psb_lock);
+ /*
+ * XXX: will need to do cacheline alignment to avoid false sharing in SMP mode and
+ * multitask monitoring.
+ */
psb->psb_entry_size = sizeof(perfmon_smpl_entry_t) + regcount*sizeof(u64);
- DBprintk((" psb @%p entry_size=%ld hdr=%p addr=%p\n", (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr, (void *)psb->psb_addr));
-
- /* initialize some of the fields of header */
- psb->psb_hdr->hdr_version = PFM_SMPL_HDR_VERSION;
- psb->psb_hdr->hdr_entry_size = sizeof(perfmon_smpl_entry_t)+regcount*sizeof(u64);
- psb->psb_hdr->hdr_pmds = which_pmds;
-
- /* store which PMDS to record */
- ctx->ctx_smpl_regs = which_pmds;
+ DBprintk(("psb @%p entry_size=%ld hdr=%p addr=%p\n",
+ (void *)psb,psb->psb_entry_size, (void *)psb->psb_hdr,
+ (void *)psb->psb_addr));
- /* link to perfmon context */
- ctx->ctx_smpl_buf = psb;
+ /* initialize some of the fields of user visible buffer header */
+ psb->psb_hdr->hdr_version = PFM_SMPL_VERSION;
+ psb->psb_hdr->hdr_entry_size = psb->psb_entry_size;
+ psb->psb_hdr->hdr_pmds[0] = which_pmds[0];
/*
- * initialize the vma for the sampling buffer
+ * Let's do the difficult operations next.
+ *
+ * now we atomically find some area in the address space and
+ * remap the buffer in it.
*/
- vma->vm_mm = mm;
- vma->vm_start = addr;
- vma->vm_end = addr + size;
- vma->vm_flags = VM_READ|VM_MAYREAD;
- vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
- vma->vm_ops = NULL;
- vma->vm_pgoff = 0;
- vma->vm_file = NULL;
- vma->vm_raend = 0;
+ down_write(¤t->mm->mmap_sem);
+
- vma->vm_private_data = ctx; /* link to pfm_context(not yet used) */
+ /* find some free area in address space, must have mmap sem held */
+ vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
+ if (vma->vm_start == 0UL) {
+ DBprintk(("Cannot find unmapped area for size %ld\n", size));
+ up_write(¤t->mm->mmap_sem);
+ goto error;
+ }
+ vma->vm_end = vma->vm_start + size;
+
+ DBprintk(("entries=%ld aligned size=%ld, unmapped @0x%lx\n", entries, size, vma->vm_start));
+
+ /* can only be applied to current, need to have the mm semaphore held when called */
+ if (pfm_remap_buffer((unsigned long)smpl_buf, vma->vm_start, size)) {
+ DBprintk(("Can't remap buffer\n"));
+ up_write(¤t->mm->mmap_sem);
+ goto error;
+ }
/*
- * now insert the vma in the vm list for the process
+ * now insert the vma in the vm list for the process, must be
+ * done with mmap lock held
*/
insert_vm_struct(mm, vma);
mm->total_vm += size >> PAGE_SHIFT;
+ up_write(¤t->mm->mmap_sem);
+
+ /* store which PMDS to record */
+ ctx->ctx_smpl_regs[0] = which_pmds[0];
+
+
+ /* link to perfmon context */
+ ctx->ctx_psb = psb;
+
/*
- * that's the address returned to the user
+ * keep track of user level virtual address
*/
- *user_addr = (void *)addr;
+ ctx->ctx_smpl_vaddr = *(unsigned long *)user_vaddr = vma->vm_start;
return 0;
- /* outlined error handling */
-no_addr:
- DBprintk(("Cannot find unmapped area for size %ld\n", size));
- return -ENOMEM;
-no_vma:
- DBprintk(("Cannot allocate vma\n"));
- return -ENOMEM;
-cant_remap:
- DBprintk(("Can't remap buffer\n"));
- rvfree(smpl_buf, size);
-no_buffer:
- DBprintk(("Can't allocate sampling buffer\n"));
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
-no_buffer_desc:
- DBprintk(("Can't allocate sampling buffer descriptor\n"));
- kmem_cache_free(vm_area_cachep, vma);
- rvfree(smpl_buf, size);
+error:
+ pfm_rvfree(smpl_buf, size);
+ kfree(psb);
return -ENOMEM;
}
+/*
+ * XXX: do something better here
+ */
+static int
+pfm_bad_permissions(struct task_struct *task)
+{
+ /* stolen from bad_signal() */
+ return (current->session != task->session)
+ && (current->euid ^ task->suid) && (current->euid ^ task->uid)
+ && (current->uid ^ task->suid) && (current->uid ^ task->uid);
+}
+
+
static int
-pfx_is_sane(pfreq_context_t *pfx)
+pfx_is_sane(struct task_struct *task, pfarg_context_t *pfx)
{
int ctx_flags;
+ int cpu;
/* valid signal */
- //if (pfx->notify_sig < 1 || pfx->notify_sig >= _NSIG) return -EINVAL;
- if (pfx->notify_sig !=0 && pfx->notify_sig != SIGPROF) return -EINVAL;
/* cannot send to process 1, 0 means do not notify */
- if (pfx->notify_pid < 0 || pfx->notify_pid == 1) return -EINVAL;
-
- ctx_flags = pfx->flags;
+ if (pfx->ctx_notify_pid == 1) {
+ DBprintk(("invalid notify_pid %d\n", pfx->ctx_notify_pid));
+ return -EINVAL;
+ }
+ ctx_flags = pfx->ctx_flags;
if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
-#ifdef CONFIG_SMP
- if (smp_num_cpus > 1) {
- printk("perfmon: system wide monitoring on SMP not yet supported\n");
+ DBprintk(("cpu_mask=0x%lx\n", pfx->ctx_cpu_mask));
+ /*
+ * cannot block in this mode
+ */
+ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
+ DBprintk(("cannot use blocking mode when in system wide monitoring\n"));
return -EINVAL;
}
-#endif
- if ((ctx_flags & PFM_FL_SMPL_OVFL_NOBLOCK) == 0) {
- printk("perfmon: system wide monitoring cannot use blocking notification mode\n");
+ /*
+ * must only have one bit set in the CPU mask
+ */
+ if (hweight64(pfx->ctx_cpu_mask) != 1UL) {
+ DBprintk(("invalid CPU mask specified\n"));
+ return -EINVAL;
+ }
+ /*
+ * and it must be a valid CPU
+ */
+ cpu = ffs(pfx->ctx_cpu_mask);
+ if (cpu > smp_num_cpus) {
+ DBprintk(("CPU%d is not online\n", cpu));
+ return -EINVAL;
+ }
+ /*
+ * check for pre-existing pinning, if conflicting reject
+ */
+ if (task->cpus_allowed != ~0UL && (task->cpus_allowed & (1UL<<cpu)) == 0) {
+ DBprintk(("[%d] pinned on 0x%lx, mask for CPU%d \n", task->pid,
+ task->cpus_allowed, cpu));
return -EINVAL;
}
+
+ } else {
+ /*
+ * must provide a target for the signal in blocking mode even when
+ * no counter is configured with PFM_FL_REG_OVFL_NOTIFY
+ */
+ if ((ctx_flags & PFM_FL_NOTIFY_BLOCK) && pfx->ctx_notify_pid == 0) return -EINVAL;
}
/* probably more to add here */
@@ -622,68 +859,97 @@
}
static int
-pfm_context_create(int flags, perfmon_req_t *req)
+pfm_create_context(struct task_struct *task, pfm_context_t *ctx, void *req, int count,
+ struct pt_regs *regs)
{
- pfm_context_t *ctx;
- struct task_struct *task = NULL;
- perfmon_req_t tmp;
+ pfarg_context_t tmp;
void *uaddr = NULL;
- int ret;
+ int ret, cpu = 0;
int ctx_flags;
- pid_t pid;
+ pid_t notify_pid;
- /* to go away */
- if (flags) {
- printk("perfmon: use context flags instead of perfmon() flags. Obsoleted API\n");
- }
+ /* a context has already been defined */
+ if (ctx) return -EBUSY;
+
+ /*
+ * not yet supported
+ */
+ if (task != current) return -EINVAL;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
- ret = pfx_is_sane(&tmp.pfr_ctx);
+ ret = pfx_is_sane(task, &tmp);
if (ret < 0) return ret;
- ctx_flags = tmp.pfr_ctx.flags;
+ ctx_flags = tmp.ctx_flags;
+
+ ret = -EBUSY;
+
+ LOCK_PFS();
if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
+
+ /* at this point, we know there is at least one bit set */
+ cpu = ffs(tmp.ctx_cpu_mask) - 1;
+
+ DBprintk(("requesting CPU%d currently on CPU%d\n",cpu, smp_processor_id()));
+
+ if (pfm_sessions.pfs_task_sessions > 0) {
+ DBprintk(("system wide not possible, task_sessions=%ld\n", pfm_sessions.pfs_task_sessions));
+ goto abort;
+ }
+
+ if (pfm_sessions.pfs_sys_session[cpu]) {
+ DBprintk(("system wide not possible, conflicting session [%d] on CPU%d\n",pfm_sessions.pfs_sys_session[cpu]->pid, cpu));
+ goto abort;
+ }
+ pfm_sessions.pfs_sys_session[cpu] = task;
/*
- * XXX: This is not AT ALL SMP safe
+ * count the number of system wide sessions
*/
- if (pfs_info.pfs_proc_sessions > 0) return -EBUSY;
- if (pfs_info.pfs_sys_session > 0) return -EBUSY;
+ pfm_sessions.pfs_sys_sessions++;
- pfs_info.pfs_sys_session = 1;
-
- } else if (pfs_info.pfs_sys_session >0) {
+ } else if (pfm_sessions.pfs_sys_sessions == 0) {
+ pfm_sessions.pfs_task_sessions++;
+ } else {
/* no per-process monitoring while there is a system wide session */
- return -EBUSY;
- } else
- pfs_info.pfs_proc_sessions++;
+ goto abort;
+ }
+
+ UNLOCK_PFS();
+
+ ret = -ENOMEM;
ctx = pfm_context_alloc();
if (!ctx) goto error;
- /* record the creator (debug only) */
- ctx->ctx_creator = current;
+ /* record the creator (important for inheritance) */
+ ctx->ctx_owner = current;
+
+ notify_pid = tmp.ctx_notify_pid;
- pid = tmp.pfr_ctx.notify_pid;
+ spin_lock_init(&ctx->ctx_lock);
- spin_lock_init(&ctx->ctx_notify_lock);
+ if (notify_pid == current->pid) {
- if (pid == current->pid) {
ctx->ctx_notify_task = task = current;
current->thread.pfm_context = ctx;
- atomic_set(¤t->thread.pfm_notifiers_check, 1);
+ } else if (notify_pid!=0) {
+ struct task_struct *notify_task;
- } else if (pid!=0) {
read_lock(&tasklist_lock);
- task = find_task_by_pid(pid);
- if (task) {
+ notify_task = find_task_by_pid(notify_pid);
+
+ if (notify_task) {
+
+ ret = -EPERM;
+
/*
- * record who to notify
- */
- ctx->ctx_notify_task = task;
+ * check if we can send this task a signal
+ */
+ if (pfm_bad_permissions(notify_task)) goto buffer_error;
/*
* make visible
@@ -702,7 +968,9 @@
* task has been detached from the tasklist otherwise you are
* exposed to race conditions.
*/
- atomic_add(1, &task->thread.pfm_notifiers_check);
+ atomic_add(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check);
+
+ ctx->ctx_notify_task = notify_task;
}
read_unlock(&tasklist_lock);
}
@@ -710,37 +978,48 @@
/*
* notification process does not exist
*/
- if (pid != 0 && task == NULL) {
+ if (notify_pid != 0 && ctx->ctx_notify_task == NULL) {
ret = -EINVAL;
goto buffer_error;
}
- ctx->ctx_notify_sig = SIGPROF; /* siginfo imposes a fixed signal */
+ if (tmp.ctx_smpl_entries) {
+ DBprintk(("sampling entries=%ld\n",tmp.ctx_smpl_entries));
- if (tmp.pfr_ctx.smpl_entries) {
- DBprintk((" sampling entries=%ld\n",tmp.pfr_ctx.smpl_entries));
-
- ret = pfm_smpl_buffer_alloc(ctx, tmp.pfr_ctx.smpl_regs,
- tmp.pfr_ctx.smpl_entries, &uaddr);
+ ret = pfm_smpl_buffer_alloc(ctx, tmp.ctx_smpl_regs,
+ tmp.ctx_smpl_entries, &uaddr);
if (ret<0) goto buffer_error;
- tmp.pfr_ctx.smpl_vaddr = uaddr;
+ tmp.ctx_smpl_vaddr = uaddr;
}
/* initialization of context's flags */
- ctx->ctx_fl_inherit = ctx_flags & PFM_FL_INHERIT_MASK;
- ctx->ctx_fl_noblock = (ctx_flags & PFM_FL_SMPL_OVFL_NOBLOCK) ? 1 : 0;
- ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
- ctx->ctx_fl_exclintr = (ctx_flags & PFM_FL_EXCL_INTR) ? 1: 0;
- ctx->ctx_fl_frozen = 0;
+ ctx->ctx_fl_inherit = ctx_flags & PFM_FL_INHERIT_MASK;
+ ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+ ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+ ctx->ctx_fl_frozen = 0;
+ /*
+ * setting this flag to 0 here means, that the creator or the task that the
+ * context is being attached are granted access. Given that a context can only
+ * be created for the calling process this, in effect only allows the creator
+ * to access the context. See pfm_protect() for more.
+ */
+ ctx->ctx_fl_protected = 0;
+
+ /* for system wide mode only (only 1 bit set) */
+ ctx->ctx_cpu = cpu;
+
+ atomic_set(&ctx->ctx_last_cpu,-1); /* SMP only, means no CPU */
/*
* Keep track of the pmds we want to sample
* XXX: may be we don't need to save/restore the DEAR/IEAR pmds
* but we do need the BTB for sure. This is because of a hardware
* buffer of 1 only for non-BTB pmds.
+ *
+ * We ignore the unimplemented pmds specified by the user
*/
- ctx->ctx_used_pmds[0] = tmp.pfr_ctx.smpl_regs;
- ctx->ctx_used_pmcs[0] = 1; /* always save/restore PMC[0] */
+ ctx->ctx_used_pmds[0] = tmp.ctx_smpl_regs[0] & pmu_conf.impl_regs[4];
+ ctx->ctx_saved_pmcs[0] = 1; /* always save/restore PMC[0] */
sema_init(&ctx->ctx_restart_sem, 0); /* init this semaphore to locked */
@@ -750,31 +1029,28 @@
goto buffer_error;
}
- DBprintk((" context=%p, pid=%d notify_sig %d notify_task=%p\n",(void *)ctx, current->pid, ctx->ctx_notify_sig, ctx->ctx_notify_task));
- DBprintk((" context=%p, pid=%d flags=0x%x inherit=%d noblock=%d system=%d\n",(void *)ctx, current->pid, ctx_flags, ctx->ctx_fl_inherit, ctx->ctx_fl_noblock, ctx->ctx_fl_system));
+ DBprintk(("context=%p, pid=%d notify_task=%p\n",
+ (void *)ctx, task->pid, ctx->ctx_notify_task));
+
+ DBprintk(("context=%p, pid=%d flags=0x%x inherit=%d block=%d system=%d\n",
+ (void *)ctx, task->pid, ctx_flags, ctx->ctx_fl_inherit,
+ ctx->ctx_fl_block, ctx->ctx_fl_system));
/*
* when no notification is required, we can make this visible at the last moment
*/
- if (pid == 0) current->thread.pfm_context = ctx;
-
+ if (notify_pid == 0) task->thread.pfm_context = ctx;
/*
- * by default, we always include interrupts for system wide
- * DCR.pp is set by default to zero by kernel in cpu_init()
+ * pin task to CPU and force reschedule on exit to ensure
+ * that when back to user level the task runs on the designated
+ * CPU.
*/
if (ctx->ctx_fl_system) {
- if (ctx->ctx_fl_exclintr == 0) {
- unsigned long dcr = ia64_get_dcr();
-
- ia64_set_dcr(dcr|IA64_DCR_PP);
- /*
- * keep track of the kernel default value
- */
- pfs_info.pfs_dfl_dcr = dcr;
-
- DBprintk((" dcr.pp is set\n"));
- }
- }
+ ctx->ctx_saved_cpus_allowed = task->cpus_allowed;
+ task->cpus_allowed = 1UL << cpu;
+ task->need_resched = 1;
+ DBprintk(("[%d] rescheduled allowed=0x%lx\n", task->pid,task->cpus_allowed));
+ }
return 0;
@@ -784,225 +1060,492 @@
/*
* undo session reservation
*/
+ LOCK_PFS();
+
if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
- pfs_info.pfs_sys_session = 0;
+ pfm_sessions.pfs_sys_session[cpu] = NULL;
+ pfm_sessions.pfs_sys_sessions--;
} else {
- pfs_info.pfs_proc_sessions--;
+ pfm_sessions.pfs_task_sessions--;
}
+abort:
+ UNLOCK_PFS();
+
return ret;
}
static void
-pfm_reset_regs(pfm_context_t *ctx)
+pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int flag)
{
- unsigned long mask = ctx->ctx_ovfl_regs;
- int i, cnum;
+ unsigned long mask = ovfl_regs[0];
+ unsigned long reset_others = 0UL;
+ unsigned long val;
+ int i;
+
+ DBprintk(("masks=0x%lx\n", mask));
- DBprintk((" ovfl_regs=0x%lx\n", mask));
/*
* now restore reset value on sampling overflowed counters
*/
- for(i=0, cnum=PMU_FIRST_COUNTER; i < pmu_conf.max_counters; i++, cnum++, mask >>= 1) {
+ mask >>= PMU_FIRST_COUNTER;
+ for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
if (mask & 0x1) {
- DBprintk((" reseting PMD[%d]=%lx\n", cnum, ctx->ctx_pmds[i].smpl_rval & pmu_conf.perf_ovfl_val));
+ val = flag == PFM_RELOAD_LONG_RESET ?
+ ctx->ctx_soft_pmds[i].long_reset:
+ ctx->ctx_soft_pmds[i].short_reset;
+
+ reset_others |= ctx->ctx_soft_pmds[i].reset_pmds[0];
+
+ DBprintk(("[%d] %s reset soft_pmd[%d]=%lx\n",
+ current->pid,
+ flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
/* upper part is ignored on rval */
- ia64_set_pmd(cnum, ctx->ctx_pmds[i].smpl_rval);
+ pfm_write_soft_counter(ctx, i, val);
+ }
+ }
- /*
- * we must reset BTB index (clears pmd16.full to make
- * sure we do not report the same branches twice.
- * The non-blocking case in handled in update_counters()
- */
- if (cnum == ctx->ctx_btb_counter) {
- DBprintk(("reseting PMD16\n"));
- ia64_set_pmd(16, 0);
- }
+ /*
+ * Now take care of resetting the other registers
+ */
+ for(i = 0; reset_others; i++, reset_others >>= 1) {
+
+ if ((reset_others & 0x1) == 0) continue;
+
+ val = flag == PFM_RELOAD_LONG_RESET ?
+ ctx->ctx_soft_pmds[i].long_reset:
+ ctx->ctx_soft_pmds[i].short_reset;
+
+ if (PMD_IS_COUNTING(i)) {
+ pfm_write_soft_counter(ctx, i, val);
+ } else {
+ ia64_set_pmd(i, val);
}
+
+ DBprintk(("[%d] %s reset_others pmd[%d]=%lx\n",
+ current->pid,
+ flag == PFM_RELOAD_LONG_RESET ? "long" : "short", i, val));
}
/* just in case ! */
- ctx->ctx_ovfl_regs = 0;
+ ctx->ctx_ovfl_regs[0] = 0UL;
}
static int
-pfm_write_pmcs(struct task_struct *ta, perfmon_req_t *req, int count)
+pfm_write_pmcs(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
struct thread_struct *th = &ta->thread;
- pfm_context_t *ctx = th->pfm_context;
- perfmon_req_t tmp;
- unsigned long cnum;
+ pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
+ unsigned int cnum;
int i;
+ int ret = 0, reg_retval = 0;
+
+ /* we don't quite support this right now */
+ if (ta != current) return -EINVAL;
+
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
/* XXX: ctx locking may be required here */
for (i = 0; i < count; i++, req++) {
+
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
- cnum = tmp.pfr_reg.reg_num;
+ cnum = tmp.reg_num;
- /* XXX needs to check validity of the data maybe */
- if (!PMC_IS_IMPL(cnum)) {
- DBprintk((" invalid pmc[%ld]\n", cnum));
- return -EINVAL;
+ /*
+ * we reject all non implemented PMC as well
+ * as attempts to modify PMC[0-3] which are used
+ * as status registers by the PMU
+ */
+ if (!PMC_IS_IMPL(cnum) || cnum < 4) {
+ DBprintk(("pmc[%u] is unimplemented or invalid\n", cnum));
+ ret = -EINVAL;
+ goto abort_mission;
}
+ /*
+ * A PMC used to configure monitors must be:
+ * - system-wide session: privileged monitor
+ * - per-task : user monitor
+ * any other configuration is rejected.
+ */
+ if (PMC_IS_MONITOR(cnum)) {
+ pfm_monitor_t *p = (pfm_monitor_t *)&tmp.reg_value;
- if (PMC_IS_COUNTER(cnum)) {
+ DBprintk(("pmc[%u].pm = %d\n", cnum, p->pmc_pm));
+ if (ctx->ctx_fl_system ^ p->pmc_pm) {
+ //if ((ctx->ctx_fl_system == 1 && p->pmc_pm == 0)
+ // ||(ctx->ctx_fl_system == 0 && p->pmc_pm == 1)) {
+ ret = -EINVAL;
+ goto abort_mission;
+ }
/*
- * we keep track of EARS/BTB to speed up sampling later
+ * enforce generation of overflow interrupt. Necessary on all
+ * CPUs which do not implement 64-bit hardware counters.
*/
- if (PMC_IS_DEAR(&tmp.pfr_reg.reg_value)) {
- ctx->ctx_dear_counter = cnum;
- } else if (PMC_IS_IEAR(&tmp.pfr_reg.reg_value)) {
- ctx->ctx_iear_counter = cnum;
- } else if (PMC_IS_BTB(&tmp.pfr_reg.reg_value)) {
- ctx->ctx_btb_counter = cnum;
+ p->pmc_oi = 1;
+ }
+
+ if (PMC_IS_COUNTING(cnum)) {
+ if (tmp.reg_flags & PFM_REGFL_OVFL_NOTIFY) {
+ /*
+ * must have a target for the signal
+ */
+ if (ctx->ctx_notify_task == NULL) {
+ ret = -EINVAL;
+ goto abort_mission;
+ }
+
+ ctx->ctx_soft_pmds[cnum].flags |= PFM_REGFL_OVFL_NOTIFY;
}
-#if 0
- if (tmp.pfr_reg.reg_flags & PFM_REGFL_OVFL_NOTIFY)
- ctx->ctx_pmds[cnum - PMU_FIRST_COUNTER].flags |= PFM_REGFL_OVFL_NOTIFY;
-#endif
+ /*
+ * copy reset vector
+ */
+ ctx->ctx_soft_pmds[cnum].reset_pmds[0] = tmp.reg_reset_pmds[0];
+ ctx->ctx_soft_pmds[cnum].reset_pmds[1] = tmp.reg_reset_pmds[1];
+ ctx->ctx_soft_pmds[cnum].reset_pmds[2] = tmp.reg_reset_pmds[2];
+ ctx->ctx_soft_pmds[cnum].reset_pmds[3] = tmp.reg_reset_pmds[3];
+
+ /*
+ * needed in case the user does not initialize the equivalent
+ * PMD. Clearing is done in reset_pmu() so there is no possible
+ * leak here.
+ */
+ CTX_USED_PMD(ctx, cnum);
}
- /* keep track of what we use */
- CTX_USED_PMC(ctx, cnum);
- ia64_set_pmc(cnum, tmp.pfr_reg.reg_value);
+abort_mission:
+ if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
- DBprintk((" setting PMC[%ld]=0x%lx flags=0x%x used_pmcs=0%lx\n", cnum, tmp.pfr_reg.reg_value, ctx->ctx_pmds[cnum - PMU_FIRST_COUNTER].flags, ctx->ctx_used_pmcs[0]));
+ PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
- }
- /*
- * we have to set this here event hough we haven't necessarily started monitoring
- * because we may be context switched out
- */
- if (ctx->ctx_fl_system==0) th->flags |= IA64_THREAD_PM_VALID;
+ /*
+ * update register return value, abort all if problem during copy.
+ */
+ if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
- return 0;
+ /*
+ * if there was something wrong on this register, don't touch
+ * the hardware at all and abort write request for others.
+ *
+ * On error, the user mut sequentially scan the table and the first
+ * entry which has a return flag set is the one that caused the error.
+ */
+ if (ret != 0) {
+ DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
+ ta->pid, cnum, tmp.reg_value, reg_retval));
+ break;
+ }
+
+ /*
+ * We can proceed with this register!
+ */
+
+ /*
+ * keep copy the pmc, used for register reload
+ */
+ th->pmc[cnum] = tmp.reg_value;
+
+ ia64_set_pmc(cnum, tmp.reg_value);
+
+ DBprintk(("[%d] pmc[%u]=0x%lx flags=0x%x save_pmcs=0%lx reload_pmcs=0x%lx\n",
+ ta->pid, cnum, tmp.reg_value,
+ ctx->ctx_soft_pmds[cnum].flags,
+ ctx->ctx_saved_pmcs[0], ctx->ctx_reload_pmcs[0]));
+
+ }
+ return ret;
}
static int
-pfm_write_pmds(struct task_struct *ta, perfmon_req_t *req, int count)
+pfm_write_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
- struct thread_struct *th = &ta->thread;
- pfm_context_t *ctx = th->pfm_context;
- perfmon_req_t tmp;
- unsigned long cnum;
+ pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
+ unsigned int cnum;
int i;
+ int ret = 0, reg_retval = 0;
+
+ /* we don't quite support this right now */
+ if (ta != current) return -EINVAL;
+
+ /*
+ * Cannot do anything before PMU is enabled
+ */
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
/* XXX: ctx locking may be required here */
for (i = 0; i < count; i++, req++) {
- int k;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
- cnum = tmp.pfr_reg.reg_num;
-
- k = cnum - PMU_FIRST_COUNTER;
+ cnum = tmp.reg_num;
- if (!PMD_IS_IMPL(cnum)) return -EINVAL;
+ if (!PMD_IS_IMPL(cnum)) {
+ ret = -EINVAL;
+ goto abort_mission;
+ }
/* update virtualized (64bits) counter */
- if (PMD_IS_COUNTER(cnum)) {
- ctx->ctx_pmds[k].ival = tmp.pfr_reg.reg_value;
- ctx->ctx_pmds[k].val = tmp.pfr_reg.reg_value & ~pmu_conf.perf_ovfl_val;
- ctx->ctx_pmds[k].smpl_rval = tmp.pfr_reg.reg_smpl_reset;
- ctx->ctx_pmds[k].ovfl_rval = tmp.pfr_reg.reg_ovfl_reset;
+ if (PMD_IS_COUNTING(cnum)) {
+ ctx->ctx_soft_pmds[cnum].ival = tmp.reg_value;
+ ctx->ctx_soft_pmds[cnum].val = tmp.reg_value & ~pmu_conf.perf_ovfl_val;
+ ctx->ctx_soft_pmds[cnum].long_reset = tmp.reg_long_reset;
+ ctx->ctx_soft_pmds[cnum].short_reset = tmp.reg_short_reset;
+
+ }
+abort_mission:
+ if (ret == -EINVAL) reg_retval = PFM_REG_RETFL_EINVAL;
+
+ PFM_REG_RETFLAG_SET(tmp.reg_flags, reg_retval);
- if (tmp.pfr_reg.reg_flags & PFM_REGFL_OVFL_NOTIFY)
- ctx->ctx_pmds[cnum - PMU_FIRST_COUNTER].flags |= PFM_REGFL_OVFL_NOTIFY;
+ if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
+
+ /*
+ * if there was something wrong on this register, don't touch
+ * the hardware at all and abort write request for others.
+ *
+ * On error, the user mut sequentially scan the table and the first
+ * entry which has a return flag set is the one that caused the error.
+ */
+ if (ret != 0) {
+ DBprintk(("[%d] pmc[%u]=0x%lx error %d\n",
+ ta->pid, cnum, tmp.reg_value, reg_retval));
+ break;
}
+
/* keep track of what we use */
CTX_USED_PMD(ctx, cnum);
/* writes to unimplemented part is ignored, so this is safe */
- ia64_set_pmd(cnum, tmp.pfr_reg.reg_value);
+ ia64_set_pmd(cnum, tmp.reg_value);
/* to go away */
ia64_srlz_d();
- DBprintk((" setting PMD[%ld]: ovfl_notify=%d pmd.val=0x%lx pmd.ovfl_rval=0x%lx pmd.smpl_rval=0x%lx pmd=%lx used_pmds=0%lx\n",
- cnum,
- PMD_OVFL_NOTIFY(ctx, cnum - PMU_FIRST_COUNTER),
- ctx->ctx_pmds[k].val,
- ctx->ctx_pmds[k].ovfl_rval,
- ctx->ctx_pmds[k].smpl_rval,
- ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val,
- ctx->ctx_used_pmds[0]));
+ DBprintk(("[%d] pmd[%u]: soft_pmd=0x%lx short_reset=0x%lx "
+ "long_reset=0x%lx hw_pmd=%lx notify=%c used_pmds=0x%lx reset_pmds=0x%lx\n",
+ ta->pid, cnum,
+ ctx->ctx_soft_pmds[cnum].val,
+ ctx->ctx_soft_pmds[cnum].short_reset,
+ ctx->ctx_soft_pmds[cnum].long_reset,
+ ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val,
+ PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
+ ctx->ctx_used_pmds[0],
+ ctx->ctx_soft_pmds[cnum].reset_pmds[0]));
}
- /*
- * we have to set this here event hough we haven't necessarily started monitoring
- * because we may be context switched out
- */
- if (ctx->ctx_fl_system==0) th->flags |= IA64_THREAD_PM_VALID;
-
- return 0;
+ return ret;
}
static int
-pfm_read_pmds(struct task_struct *ta, perfmon_req_t *req, int count)
+pfm_read_pmds(struct task_struct *ta, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
{
struct thread_struct *th = &ta->thread;
- pfm_context_t *ctx = th->pfm_context;
unsigned long val=0;
- perfmon_req_t tmp;
+ pfarg_reg_t tmp, *req = (pfarg_reg_t *)arg;
int i;
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
/*
* XXX: MUST MAKE SURE WE DON"T HAVE ANY PENDING OVERFLOW BEFORE READING
- * This is required when the monitoring has been stoppped by user of kernel.
- * If ity is still going on, then that's fine because we a re not gauranteed
- * to return an accurate value in this case
+ * This is required when the monitoring has been stoppped by user or kernel.
+ * If it is still going on, then that's fine because we a re not guaranteed
+ * to return an accurate value in this case.
*/
/* XXX: ctx locking may be required here */
+ DBprintk(("ctx_last_cpu=%d for [%d]\n", atomic_read(&ctx->ctx_last_cpu), ta->pid));
+
for (i = 0; i < count; i++, req++) {
- unsigned long reg_val = ~0, ctx_val = ~0;
+ unsigned long reg_val = ~0UL, ctx_val = ~0UL;
if (copy_from_user(&tmp, req, sizeof(tmp))) return -EFAULT;
- if (!PMD_IS_IMPL(tmp.pfr_reg.reg_num)) return -EINVAL;
+ if (!PMD_IS_IMPL(tmp.reg_num)) goto abort_mission;
- if (PMD_IS_COUNTER(tmp.pfr_reg.reg_num)) {
- if (ta == current){
- val = ia64_get_pmd(tmp.pfr_reg.reg_num);
- } else {
- val = reg_val = th->pmd[tmp.pfr_reg.reg_num];
+ /*
+ * If the task is not the current one, then we check if the
+ * PMU state is still in the local live register due to lazy ctxsw.
+ * If true, then we read directly from the registers.
+ */
+ if (atomic_read(&ctx->ctx_last_cpu) == smp_processor_id()){
+ ia64_srlz_d();
+ val = reg_val = ia64_get_pmd(tmp.reg_num);
+ DBprintk(("reading pmd[%u]=0x%lx from hw\n", tmp.reg_num, val));
+ } else {
+#ifdef CONFIG_SMP
+ int cpu;
+ /*
+ * for SMP system, the context may still be live on another
+ * CPU so we need to fetch it before proceeding with the read
+ * This call we only be made once for the whole loop because
+ * of ctx_last_cpu becoming == -1.
+ *
+ * We cannot reuse ctx_last_cpu as it may change before we get to the
+ * actual IPI call. In this case, we will do the call for nothing but
+ * there is no way around it. The receiving side will simply do nothing.
+ */
+ cpu = atomic_read(&ctx->ctx_last_cpu);
+ if (cpu != -1) {
+ DBprintk(("must fetch on CPU%d for [%d]\n", cpu, ta->pid));
+ pfm_fetch_regs(cpu, ta, ctx);
}
- val &= pmu_conf.perf_ovfl_val;
+#endif
+ /* context has been saved */
+ val = reg_val = th->pmd[tmp.reg_num];
+ }
+ if (PMD_IS_COUNTING(tmp.reg_num)) {
/*
- * lower part of .val may not be zero, so we must be an addition because of
- * residual count (see update_counters).
+ * XXX: need to check for overflow
*/
- val += ctx_val = ctx->ctx_pmds[tmp.pfr_reg.reg_num - PMU_FIRST_COUNTER].val;
+
+ val &= pmu_conf.perf_ovfl_val;
+ val += ctx_val = ctx->ctx_soft_pmds[tmp.reg_num].val;
} else {
- /* for now */
- if (ta != current) return -EINVAL;
- ia64_srlz_d();
- val = ia64_get_pmd(tmp.pfr_reg.reg_num);
+ val = reg_val = ia64_get_pmd(tmp.reg_num);
}
- tmp.pfr_reg.reg_value = val;
+ PFM_REG_RETFLAG_SET(tmp.reg_flags, 0);
+ tmp.reg_value = val;
- DBprintk((" reading PMD[%ld]=0x%lx reg=0x%lx ctx_val=0x%lx pmc=0x%lx\n",
- tmp.pfr_reg.reg_num, val, reg_val, ctx_val, ia64_get_pmc(tmp.pfr_reg.reg_num)));
+ DBprintk(("read pmd[%u] soft_pmd=0x%lx reg=0x%lx pmc=0x%lx\n",
+ tmp.reg_num, ctx_val, reg_val,
+ ia64_get_pmc(tmp.reg_num)));
if (copy_to_user(req, &tmp, sizeof(tmp))) return -EFAULT;
}
return 0;
+abort_mission:
+ PFM_REG_RETFLAG_SET(tmp.reg_flags, PFM_REG_RETFL_EINVAL);
+ /*
+ * XXX: if this fails, we stick we the original failure, flag not updated!
+ */
+ copy_to_user(req, &tmp, sizeof(tmp));
+ return -EINVAL;
+
+}
+
+#ifdef PFM_PMU_USES_DBR
+/*
+ * Only call this function when a process it trying to
+ * write the debug registers (reading is always allowed)
+ */
+int
+pfm_use_debug_registers(struct task_struct *task)
+{
+ pfm_context_t *ctx = task->thread.pfm_context;
+ int ret = 0;
+
+ DBprintk(("called for [%d]\n", task->pid));
+
+ /*
+ * do it only once
+ */
+ if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
+
+ /*
+ * Even on SMP, we do not need to use an atomic here because
+ * the only way in is via ptrace() and this is possible only when the
+ * process is stopped. Even in the case where the ctxsw out is not totally
+ * completed by the time we come here, there is no way the 'stopped' process
+ * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
+ * So this is always safe.
+ */
+ if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
+
+ /*
+ * XXX: not pretty
+ */
+ LOCK_PFS();
+
+ /*
+ * We only allow the use of debug registers when there is no system
+ * wide monitoring
+ * XXX: we could relax this by
+ */
+ if (pfm_sessions.pfs_sys_use_dbregs> 0)
+ ret = -1;
+ else
+ pfm_sessions.pfs_ptrace_use_dbregs++;
+
+ DBprintk(("ptrace_use_dbregs=%lu sys_use_dbregs=%lu by [%d] ret = %d\n",
+ pfm_sessions.pfs_ptrace_use_dbregs,
+ pfm_sessions.pfs_sys_use_dbregs,
+ task->pid, ret));
+
+ UNLOCK_PFS();
+
+ return ret;
+}
+
+/*
+ * This function is called for every task that exits with the
+ * IA64_THREAD_DBG_VALID set. This indicates a task which was
+ * able to use the debug registers for debugging purposes via
+ * ptrace(). Therefore we know it was not using them for
+ * perfmormance monitoring, so we only decrement the number
+ * of "ptraced" debug register users to keep the count up to date
+ */
+int
+pfm_release_debug_registers(struct task_struct *task)
+{
+ int ret;
+
+ LOCK_PFS();
+ if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
+ printk("perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+ ret = -1;
+ } else {
+ pfm_sessions.pfs_ptrace_use_dbregs--;
+ ret = 0;
+ }
+ UNLOCK_PFS();
+
+ return ret;
+}
+#else /* PFM_PMU_USES_DBR is true */
+/*
+ * in case, the PMU does not use the debug registers, these two functions are nops.
+ * The first function is called from arch/ia64/kernel/ptrace.c.
+ * The second function is called from arch/ia64/kernel/process.c.
+ */
+int
+pfm_use_debug_registers(struct task_struct *task)
+{
+ return 0;
+}
+int
+pfm_release_debug_registers(struct task_struct *task)
+{
+ return 0;
}
+#endif /* PFM_PMU_USES_DBR */
static int
-pfm_do_restart(struct task_struct *task)
+pfm_restart(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
{
- struct thread_struct *th = &task->thread;
- pfm_context_t *ctx = th->pfm_context;
void *sem = &ctx->ctx_restart_sem;
+ /*
+ * Cannot do anything before PMU is enabled
+ */
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
+
+ if (ctx->ctx_fl_frozen==0) {
+ printk("task %d without pmu_frozen set\n", task->pid);
+ return -EINVAL;
+ }
+
if (task == current) {
- DBprintk((" restarting self %d frozen=%d \n", current->pid, ctx->ctx_fl_frozen));
+ DBprintk(("restarting self %d frozen=%d \n", current->pid, ctx->ctx_fl_frozen));
+
+ pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
- pfm_reset_regs(ctx);
+ ctx->ctx_ovfl_regs[0] = 0UL;
/*
* We ignore block/don't block because we never block
@@ -1011,26 +1554,36 @@
ctx->ctx_fl_frozen = 0;
if (CTX_HAS_SMPL(ctx)) {
- ctx->ctx_smpl_buf->psb_hdr->hdr_count = 0;
- ctx->ctx_smpl_buf->psb_index = 0;
+ ctx->ctx_psb->psb_hdr->hdr_count = 0;
+ ctx->ctx_psb->psb_index = 0;
}
- /* pfm_reset_smpl_buffers(ctx,th->pfm_ovfl_regs);*/
-
/* simply unfreeze */
ia64_set_pmc(0, 0);
ia64_srlz_d();
return 0;
- }
+ }
+ /* restart on another task */
- /* check if blocking */
+ /*
+ * if blocking, then post the semaphore.
+ * if non-blocking, then we ensure that the task will go into
+ * pfm_overflow_must_block() before returning to user mode.
+ * We cannot explicitely reset another task, it MUST always
+ * be done by the task itself. This works for system wide because
+ * the tool that is controlling the session is doing "self-monitoring".
+ *
+ * XXX: what if the task never goes back to user?
+ *
+ */
if (CTX_OVFL_NOBLOCK(ctx) == 0) {
- DBprintk((" unblocking %d \n", task->pid));
+ DBprintk(("unblocking %d \n", task->pid));
up(sem);
- return 0;
+ } else {
+ task->thread.pfm_ovfl_block_reset = 1;
}
-
+#if 0
/*
* in case of non blocking mode, then it's just a matter of
* of reseting the sampling buffer (if any) index. The PMU
@@ -1041,281 +1594,719 @@
* must reset the header count first
*/
if (CTX_HAS_SMPL(ctx)) {
- DBprintk((" resetting sampling indexes for %d \n", task->pid));
- ctx->ctx_smpl_buf->psb_hdr->hdr_count = 0;
- ctx->ctx_smpl_buf->psb_index = 0;
+ DBprintk(("resetting sampling indexes for %d \n", task->pid));
+ ctx->ctx_psb->psb_hdr->hdr_count = 0;
+ ctx->ctx_psb->psb_index = 0;
}
-
+#endif
return 0;
}
+#ifndef CONFIG_SMP
/*
- * system-wide mode: propagate activation/desactivation throughout the tasklist
- *
- * XXX: does not work for SMP, of course
+ * On UP kernels, we do not need to constantly set the psr.pp bit
+ * when a task is scheduled. The psr.pp bit can only be changed in
+ * the kernel because of a user request. Given we are on a UP non preeemptive
+ * kernel we know that no other task is running, so we cna simply update their
+ * psr.pp from their saved state. There is this no impact on the context switch
+ * code compared to the SMP case.
*/
static void
-pfm_process_tasklist(int cmd)
+pfm_tasklist_toggle_pp(unsigned int val)
{
struct task_struct *p;
struct pt_regs *regs;
+ DBprintk(("invoked by [%d] pp=%u\n", current->pid, val));
+
+ read_lock(&tasklist_lock);
+
for_each_task(p) {
- regs = (struct pt_regs *)((unsigned long)p + IA64_STK_OFFSET);
+ regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
+
+ /*
+ * position on pt_regs saved on stack on 1st entry into the kernel
+ */
regs--;
- ia64_psr(regs)->pp = cmd;
+
+ /*
+ * update psr.pp
+ */
+ ia64_psr(regs)->pp = val;
}
+ read_unlock(&tasklist_lock);
}
+#endif
+
+
static int
-do_perfmonctl (struct task_struct *task, int cmd, int flags, perfmon_req_t *req, int count, struct pt_regs *regs)
+pfm_stop(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
{
- perfmon_req_t tmp;
- struct thread_struct *th = &task->thread;
- pfm_context_t *ctx = th->pfm_context;
-
- memset(&tmp, 0, sizeof(tmp));
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
- if (ctx == NULL && cmd != PFM_CREATE_CONTEXT && cmd < PFM_DEBUG_BASE) {
- DBprintk((" PFM_WRITE_PMCS: no context for task %d\n", task->pid));
- return -EINVAL;
- }
+ /*
+ * Cannot do anything before PMU is enabled
+ */
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
- switch (cmd) {
- case PFM_CREATE_CONTEXT:
- /* a context has already been defined */
- if (ctx) return -EBUSY;
+ DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
+ current->pid,
+ ctx->ctx_fl_system, PMU_OWNER(),
+ current));
+ /* simply stop monitoring but not the PMU */
+ if (ctx->ctx_fl_system) {
- /*
- * cannot directly create a context in another process
- */
- if (task != current) return -EINVAL;
+ __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
- if (req == NULL || count != 1) return -EINVAL;
+ /* disable dcr pp */
+ ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
- if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+#ifdef CONFIG_SMP
+ local_cpu_data->pfm_dcr_pp = 0;
+#else
+ pfm_tasklist_toggle_pp(0);
+#endif
- return pfm_context_create(flags, req);
+ ia64_psr(regs)->pp = 0;
- case PFM_WRITE_PMCS:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
+ } else {
+ __asm__ __volatile__ ("rum psr.up;;"::: "memory");
- if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+ ia64_psr(regs)->up = 0;
+ }
+ return 0;
+}
- return pfm_write_pmcs(task, req, count);
+static int
+pfm_disable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
- case PFM_WRITE_PMDS:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
- if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
+ /*
+ * stop monitoring, freeze PMU, and save state in context
+ * this call will clear IA64_THREAD_PM_VALID for per-task sessions.
+ */
+ pfm_flush_regs(task);
- return pfm_write_pmds(task, req, count);
+ if (ctx->ctx_fl_system) {
+ ia64_psr(regs)->pp = 0;
+ } else {
+ ia64_psr(regs)->up = 0;
+ }
+ /*
+ * goes back to default behavior
+ * no need to change live psr.sp because useless at the kernel level
+ */
+ ia64_psr(regs)->sp = 1;
- case PFM_START:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
+ DBprintk(("enabling psr.sp for [%d]\n", current->pid));
- if (PMU_OWNER() && PMU_OWNER() != current && PFM_CAN_DO_LAZY()) pfm_lazy_save_regs(PMU_OWNER());
+ ctx->ctx_flags.state = PFM_CTX_DISABLED;
- SET_PMU_OWNER(current);
+ return 0;
+}
- /* will start monitoring right after rfi */
- ia64_psr(regs)->up = 1;
- ia64_psr(regs)->pp = 1;
- if (ctx->ctx_fl_system) {
- pfm_process_tasklist(1);
- pfs_info.pfs_pp = 1;
- }
- /*
- * mark the state as valid.
- * this will trigger save/restore at context switch
- */
- if (ctx->ctx_fl_system==0) th->flags |= IA64_THREAD_PM_VALID;
+static int
+pfm_destroy_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
- ia64_set_pmc(0, 0);
- ia64_srlz_d();
+ /*
+ * if context was never enabled, then there is not much
+ * to do
+ */
+ if (!CTX_IS_ENABLED(ctx)) goto skipped_stop;
- break;
+ /*
+ * Disable context: stop monitoring, flush regs to software state (useless here),
+ * and freeze PMU
+ *
+ * The IA64_THREAD_PM_VALID is cleared by pfm_flush_regs() called from pfm_disable()
+ */
+ pfm_disable(task, ctx, arg, count, regs);
- case PFM_ENABLE:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
+ if (ctx->ctx_fl_system) {
+ ia64_psr(regs)->pp = 0;
+ } else {
+ ia64_psr(regs)->up = 0;
+ }
- if (PMU_OWNER() && PMU_OWNER() != current && PFM_CAN_DO_LAZY()) pfm_lazy_save_regs(PMU_OWNER());
+ /* restore security level */
+ ia64_psr(regs)->sp = 1;
- /* reset all registers to stable quiet state */
- ia64_reset_pmu();
+skipped_stop:
+ /*
+ * remove sampling buffer mapping, if any
+ */
+ if (ctx->ctx_smpl_vaddr) pfm_remove_smpl_mapping(task);
- /* make sure nothing starts */
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->pp = 0;
+ /* now free context and related state */
+ pfm_context_exit(task);
- /* do it on the live register as well */
- __asm__ __volatile__ ("rsm psr.pp|psr.pp;;"::: "memory");
+ return 0;
+}
- SET_PMU_OWNER(current);
+/*
+ * does nothing at the moment
+ */
+static int
+pfm_unprotect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ return 0;
+}
- /*
- * mark the state as valid.
- * this will trigger save/restore at context switch
- */
- if (ctx->ctx_fl_system==0) th->flags |= IA64_THREAD_PM_VALID;
+static int
+pfm_protect_context(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ DBprintk(("context from [%d] is protected\n", task->pid));
+ /*
+ * from now on, only the creator of the context has access to it
+ */
+ ctx->ctx_fl_protected = 1;
- /* simply unfreeze */
- ia64_set_pmc(0, 0);
- ia64_srlz_d();
- break;
+ /*
+ * reinforce secure monitoring: cannot toggle psr.up
+ */
+ ia64_psr(regs)->sp = 1;
- case PFM_DISABLE:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
+ return 0;
+}
- /* simply freeze */
- ia64_set_pmc(0, 1);
- ia64_srlz_d();
- /*
- * XXX: cannot really toggle IA64_THREAD_PM_VALID
- * but context is still considered valid, so any
- * read request would return something valid. Same
- * thing when this task terminates (pfm_flush_regs()).
- */
- break;
+static int
+pfm_debug(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ unsigned int mode = *(unsigned int *)arg;
- case PFM_READ_PMDS:
- if (!access_ok(VERIFY_READ, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
- if (!access_ok(VERIFY_WRITE, req, sizeof(struct perfmon_req_t)*count)) return -EFAULT;
-
- return pfm_read_pmds(task, req, count);
-
- case PFM_STOP:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
-
- /* simply stop monitors, not PMU */
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->pp = 0;
-
- if (ctx->ctx_fl_system) {
- pfm_process_tasklist(0);
- pfs_info.pfs_pp = 0;
- }
+ pfm_debug_mode = mode == 0 ? 0 : 1;
- break;
+ printk("perfmon debugging %s\n", pfm_debug_mode ? "on" : "off");
+
+ return 0;
+}
+
+#ifdef PFM_PMU_USES_DBR
+
+typedef struct {
+ unsigned long ibr_mask:56;
+ unsigned long ibr_plm:4;
+ unsigned long ibr_ig:3;
+ unsigned long ibr_x:1;
+} ibr_mask_reg_t;
+
+typedef struct {
+ unsigned long dbr_mask:56;
+ unsigned long dbr_plm:4;
+ unsigned long dbr_ig:2;
+ unsigned long dbr_w:1;
+ unsigned long dbr_r:1;
+} dbr_mask_reg_t;
+
+typedef union {
+ unsigned long val;
+ ibr_mask_reg_t ibr;
+ dbr_mask_reg_t dbr;
+} dbreg_t;
+
+
+static int
+pfm_write_ibr_dbr(int mode, struct task_struct *task, void *arg, int count, struct pt_regs *regs)
+{
+ struct thread_struct *thread = &task->thread;
+ pfm_context_t *ctx = task->thread.pfm_context;
+ pfarg_dbreg_t tmp, *req = (pfarg_dbreg_t *)arg;
+ dbreg_t dbreg;
+ unsigned int rnum;
+ int first_time;
+ int i, ret = 0;
+
+ /*
+ * for range restriction: psr.db must be cleared or the
+ * the PMU will ignore the debug registers.
+ *
+ * XXX: may need more in system wide mode,
+ * no task can have this bit set?
+ */
+ if (ia64_psr(regs)->db == 1) return -EINVAL;
+
+
+ first_time = ctx->ctx_fl_using_dbreg == 0;
+
+ /*
+ * check for debug registers in system wide mode
+ *
+ */
+ LOCK_PFS();
+ if (ctx->ctx_fl_system && first_time) {
+ if (pfm_sessions.pfs_ptrace_use_dbregs)
+ ret = -EBUSY;
+ else
+ pfm_sessions.pfs_sys_use_dbregs++;
+ }
+ UNLOCK_PFS();
- case PFM_RESTART: /* temporary, will most likely end up as a PFM_ENABLE */
+ if (ret != 0) return ret;
- if ((th->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system==0) {
- printk(" PFM_RESTART not monitoring\n");
- return -EINVAL;
+ if (ctx->ctx_fl_system) {
+ /* we mark ourselves as owner of the debug registers */
+ ctx->ctx_fl_using_dbreg = 1;
+ } else {
+ if (ctx->ctx_fl_using_dbreg == 0) {
+ ret= -EBUSY;
+ if ((thread->flags & IA64_THREAD_DBG_VALID) != 0) {
+ DBprintk(("debug registers already in use for [%d]\n", task->pid));
+ goto abort_mission;
+ }
+ /* we mark ourselves as owner of the debug registers */
+ ctx->ctx_fl_using_dbreg = 1;
+
+ /*
+ * Given debug registers cannot be used for both debugging
+ * and performance monitoring at the same time, we reuse
+ * the storage area to save and restore the registers on ctxsw.
+ */
+ memset(task->thread.dbr, 0, sizeof(task->thread.dbr));
+ memset(task->thread.ibr, 0, sizeof(task->thread.ibr));
+
+ /*
+ * clear hardware registers to make sure we don't leak
+ * information and pick up stale state
+ */
+ for (i=0; i < pmu_conf.num_ibrs; i++) {
+ ia64_set_ibr(i, 0UL);
}
- if (CTX_OVFL_NOBLOCK(ctx) == 0 && ctx->ctx_fl_frozen==0) {
- printk("task %d without pmu_frozen set\n", task->pid);
- return -EINVAL;
+ for (i=0; i < pmu_conf.num_dbrs; i++) {
+ ia64_set_dbr(i, 0UL);
}
+ }
+ }
- return pfm_do_restart(task); /* we only look at first entry */
+ ret = -EFAULT;
- case PFM_DESTROY_CONTEXT:
- /* we don't quite support this right now */
- if (task != current) return -EINVAL;
-
- /* first stop monitors */
- ia64_psr(regs)->up = 0;
- ia64_psr(regs)->pp = 0;
+ /*
+ * Now install the values into the registers
+ */
+ for (i = 0; i < count; i++, req++) {
- /* then freeze PMU */
- ia64_set_pmc(0, 1);
- ia64_srlz_d();
+
+ if (copy_from_user(&tmp, req, sizeof(tmp))) goto abort_mission;
+
+ rnum = tmp.dbreg_num;
+ dbreg.val = tmp.dbreg_value;
+
+ ret = -EINVAL;
- /* don't save/restore on context switch */
- if (ctx->ctx_fl_system ==0) task->thread.flags &= ~IA64_THREAD_PM_VALID;
+ if ((mode == 0 && !IBR_IS_IMPL(rnum)) || ((mode == 1) && !DBR_IS_IMPL(rnum))) {
+ DBprintk(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
+ rnum, dbreg.val, mode, i, count));
- SET_PMU_OWNER(NULL);
+ goto abort_mission;
+ }
- /* now free context and related state */
- pfm_context_exit(task);
- break;
+ /*
+ * make sure we do not install enabled breakpoint
+ */
+ if (rnum & 0x1) {
+ if (mode == 0)
+ dbreg.ibr.ibr_x = 0;
+ else
+ dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
+ }
- case PFM_DEBUG_ON:
- printk("perfmon debugging on\n");
- pfm_debug = 1;
- break;
+ /*
+ * clear return flags and copy back to user
+ *
+ * XXX: fix once EAGAIN is implemented
+ */
+ ret = -EFAULT;
- case PFM_DEBUG_OFF:
- printk("perfmon debugging off\n");
- pfm_debug = 0;
- break;
+ PFM_REG_RETFLAG_SET(tmp.dbreg_flags, 0);
+
+ if (copy_to_user(req, &tmp, sizeof(tmp))) goto abort_mission;
+
+ /*
+ * Debug registers, just like PMC, can only be modified
+ * by a kernel call. Moreover, perfmon() access to those
+ * registers are centralized in this routine. The hardware
+ * does not modify the value of these registers, therefore,
+ * if we save them as they are written, we can avoid having
+ * to save them on context switch out. This is made possible
+ * by the fact that when perfmon uses debug registers, ptrace()
+ * won't be able to modify them concurrently.
+ */
+ if (mode == 0) {
+ CTX_USED_IBR(ctx, rnum);
+
+ ia64_set_ibr(rnum, dbreg.val);
- default:
- DBprintk((" UNknown command 0x%x\n", cmd));
+ thread->ibr[rnum] = dbreg.val;
+
+ DBprintk(("write ibr%u=0x%lx used_ibrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_ibrs[0]));
+ } else {
+ CTX_USED_DBR(ctx, rnum);
+
+ ia64_set_dbr(rnum, dbreg.val);
+
+ thread->dbr[rnum] = dbreg.val;
+
+ DBprintk(("write dbr%u=0x%lx used_dbrs=0x%lx\n", rnum, dbreg.val, ctx->ctx_used_dbrs[0]));
+ }
+ }
+
+ return 0;
+
+abort_mission:
+ /*
+ * in case it was our first attempt, we undo the global modifications
+ */
+ if (first_time) {
+ LOCK_PFS();
+ if (ctx->ctx_fl_system) {
+ pfm_sessions.pfs_sys_use_dbregs--;
+ }
+ UNLOCK_PFS();
+ ctx->ctx_fl_using_dbreg = 0;
+ }
+ /*
+ * install error return flag
+ */
+ if (ret != -EFAULT) {
+ /*
+ * XXX: for now we can only come here on EINVAL
+ */
+ PFM_REG_RETFLAG_SET(tmp.dbreg_flags, PFM_REG_RETFL_EINVAL);
+ copy_to_user(req, &tmp, sizeof(tmp));
+ }
+ return ret;
+}
+
+static int
+pfm_write_ibrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
+ return pfm_write_ibr_dbr(0, task, arg, count, regs);
+}
+
+static int
+pfm_write_dbrs(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
+ return pfm_write_ibr_dbr(1, task, arg, count, regs);
+}
+
+#endif /* PFM_PMU_USES_DBR */
+
+static int
+pfm_get_features(struct task_struct *task, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
+{
+ pfarg_features_t tmp;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.ft_version = PFM_VERSION;
+ tmp.ft_smpl_version = PFM_SMPL_VERSION;
+
+ if (copy_to_user(arg, &tmp, sizeof(tmp))) return -EFAULT;
+
+ return 0;
+}
+
+static int
+pfm_start(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ /*
+ * Cannot do anything before PMU is enabled
+ */
+ if (!CTX_IS_ENABLED(ctx)) return -EINVAL;
+
+ DBprintk(("[%d] fl_system=%d owner=%p current=%p\n",
+ current->pid,
+ ctx->ctx_fl_system, PMU_OWNER(),
+ current));
+
+ if (PMU_OWNER() != task) {
+ printk("perfmon: pfm_start task [%d] not pmu owner\n", task->pid);
+ return -EINVAL;
+ }
+
+ if (ctx->ctx_fl_system) {
+
+ /* enable dcr pp */
+ ia64_set_dcr(ia64_get_dcr()|IA64_DCR_PP);
+
+#ifdef CONFIG_SMP
+ local_cpu_data->pfm_dcr_pp = 1;
+#else
+ pfm_tasklist_toggle_pp(1);
+#endif
+ ia64_psr(regs)->pp = 1;
+
+ __asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
+
+ } else {
+ if ((task->thread.flags & IA64_THREAD_PM_VALID) == 0) {
+ printk("perfmon: pfm_start task flag not set for [%d]\n", task->pid);
return -EINVAL;
+ }
+ ia64_psr(regs)->up = 1;
+ __asm__ __volatile__ ("sum psr.up;;"::: "memory");
+ }
+ ia64_srlz_d();
+
+ return 0;
+}
+
+static int
+pfm_enable(struct task_struct *task, pfm_context_t *ctx, void *arg, int count,
+ struct pt_regs *regs)
+{
+ /* we don't quite support this right now */
+ if (task != current) return -EINVAL;
+
+ if (ctx->ctx_fl_system == 0 && PMU_OWNER() && PMU_OWNER() != current)
+ pfm_lazy_save_regs(PMU_OWNER());
+
+ /* reset all registers to stable quiet state */
+ ia64_reset_pmu(task);
+
+ /* make sure nothing starts */
+ if (ctx->ctx_fl_system) {
+ ia64_psr(regs)->pp = 0;
+ ia64_psr(regs)->up = 0; /* just to make sure! */
+
+ __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+
+#ifdef CONFIG_SMP
+ local_cpu_data->pfm_syst_wide = 1;
+ local_cpu_data->pfm_dcr_pp = 0;
+#endif
+ } else {
+ /*
+ * needed in case the task was a passive task during
+ * a system wide session and now wants to have its own
+ * session
+ */
+ ia64_psr(regs)->pp = 0; /* just to make sure! */
+ ia64_psr(regs)->up = 0;
+
+ __asm__ __volatile__ ("rum psr.up;;"::: "memory");
+ /*
+ * allow user control (user monitors only)
+ if (task == ctx->ctx_owner) {
+ */
+ {
+ DBprintk(("clearing psr.sp for [%d]\n", current->pid));
+ ia64_psr(regs)->sp = 0;
+ }
+ task->thread.flags |= IA64_THREAD_PM_VALID;
}
+
+ SET_PMU_OWNER(task);
+
+
+ ctx->ctx_flags.state = PFM_CTX_ENABLED;
+ atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
+
+ /* simply unfreeze */
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+
return 0;
}
/*
- * XXX: do something better here
+ * functions MUST be listed in the increasing order of their index (see permfon.h)
*/
+static pfm_cmd_desc_t pfm_cmd_tab[]={
+/* 0 */{ NULL, 0, 0, 0}, /* not used */
+/* 1 */{ pfm_write_pmcs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
+/* 2 */{ pfm_write_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
+/* 3 */{ pfm_read_pmds, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_reg_t)},
+/* 4 */{ pfm_stop, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 5 */{ pfm_start, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 6 */{ pfm_enable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 7 */{ pfm_disable, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 8 */{ pfm_create_context, PFM_CMD_ARG_READ, 1, sizeof(pfarg_context_t)},
+/* 9 */{ pfm_destroy_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 10 */{ pfm_restart, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_NOCHK, 0, 0},
+/* 11 */{ pfm_protect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 12 */{ pfm_get_features, PFM_CMD_ARG_WRITE, 0, 0},
+/* 13 */{ pfm_debug, 0, 1, sizeof(unsigned int)},
+/* 14 */{ pfm_unprotect_context, PFM_CMD_PID|PFM_CMD_CTX, 0, 0},
+/* 15 */{ NULL, 0, 0, 0}, /* not used */
+/* 16 */{ NULL, 0, 0, 0}, /* not used */
+/* 17 */{ NULL, 0, 0, 0}, /* not used */
+/* 18 */{ NULL, 0, 0, 0}, /* not used */
+/* 19 */{ NULL, 0, 0, 0}, /* not used */
+/* 20 */{ NULL, 0, 0, 0}, /* not used */
+/* 21 */{ NULL, 0, 0, 0}, /* not used */
+/* 22 */{ NULL, 0, 0, 0}, /* not used */
+/* 23 */{ NULL, 0, 0, 0}, /* not used */
+/* 24 */{ NULL, 0, 0, 0}, /* not used */
+/* 25 */{ NULL, 0, 0, 0}, /* not used */
+/* 26 */{ NULL, 0, 0, 0}, /* not used */
+/* 27 */{ NULL, 0, 0, 0}, /* not used */
+/* 28 */{ NULL, 0, 0, 0}, /* not used */
+/* 29 */{ NULL, 0, 0, 0}, /* not used */
+/* 30 */{ NULL, 0, 0, 0}, /* not used */
+/* 31 */{ NULL, 0, 0, 0}, /* not used */
+#ifdef PFM_PMU_USES_DBR
+/* 32 */{ pfm_write_ibrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)},
+/* 33 */{ pfm_write_dbrs, PFM_CMD_PID|PFM_CMD_CTX|PFM_CMD_ARG_READ|PFM_CMD_ARG_WRITE, PFM_CMD_ARG_MANY, sizeof(pfarg_dbreg_t)}
+#endif
+};
+#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
+
static int
-perfmon_bad_permissions(struct task_struct *task)
+check_task_state(struct task_struct *task)
{
- /* stolen from bad_signal() */
- return (current->session != task->session)
- && (current->euid ^ task->suid) && (current->euid ^ task->uid)
- && (current->uid ^ task->suid) && (current->uid ^ task->uid);
+ int ret = 0;
+#ifdef CONFIG_SMP
+ /* We must wait until the state has been completely
+ * saved. There can be situations where the reader arrives before
+ * after the task is marked as STOPPED but before pfm_save_regs()
+ * is completed.
+ */
+ for (;;) {
+
+ task_lock(task);
+ if (!task_has_cpu(task)) break;
+ task_unlock(task);
+
+ do {
+ if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) return -EBUSY;
+ barrier();
+ cpu_relax();
+ } while (task_has_cpu(task));
+ }
+ task_unlock(task);
+#else
+ if (task->state != TASK_ZOMBIE && task->state != TASK_STOPPED) {
+ DBprintk(("warning [%d] not in stable state %ld\n", task->pid, task->state));
+ ret = -EBUSY;
+ }
+#endif
+ return ret;
}
asmlinkage int
-sys_perfmonctl (int pid, int cmd, int flags, perfmon_req_t *req, int count, long arg6, long arg7, long arg8, long stack)
+sys_perfmonctl (pid_t pid, int cmd, void *arg, int count, long arg5, long arg6, long arg7,
+ long arg8, long stack)
{
- struct pt_regs *regs = (struct pt_regs *) &stack;
- struct task_struct *child = current;
- int ret = -ESRCH;
+ struct pt_regs *regs = (struct pt_regs *)&stack;
+ struct task_struct *task = current;
+ pfm_context_t *ctx = task->thread.pfm_context;
+ size_t sz;
+ int ret = -ESRCH, narg;
- /* sanity check:
- *
- * ensures that we don't do bad things in case the OS
- * does not have enough storage to save/restore PMC/PMD
+ /*
+ * reject any call if perfmon was disabled at initialization time
*/
- if (PERFMON_IS_DISABLED()) return -ENOSYS;
+ if (PFM_IS_DISABLED()) return -ENOSYS;
- /* XXX: pid interface is going away in favor of pfm context */
- if (pid != current->pid) {
- read_lock(&tasklist_lock);
+ DBprintk(("cmd=%d idx=%d valid=%d narg=0x%x\n", cmd, PFM_CMD_IDX(cmd),
+ PFM_CMD_IS_VALID(cmd), PFM_CMD_NARG(cmd)));
- child = find_task_by_pid(pid);
+ if (PFM_CMD_IS_VALID(cmd) == 0) return -EINVAL;
- if (!child) goto abort_call;
+ /* ingore arguments when command has none */
+ narg = PFM_CMD_NARG(cmd);
+ if ((narg == PFM_CMD_ARG_MANY && count == 0) || (narg > 0 && narg != count)) return -EINVAL;
- ret = -EPERM;
+ sz = PFM_CMD_ARG_SIZE(cmd);
- if (perfmon_bad_permissions(child)) goto abort_call;
+ if (PFM_CMD_READ_ARG(cmd) && !access_ok(VERIFY_READ, arg, sz*count)) return -EFAULT;
- /*
- * XXX: need to do more checking here
+ if (PFM_CMD_WRITE_ARG(cmd) && !access_ok(VERIFY_WRITE, arg, sz*count)) return -EFAULT;
+
+ if (PFM_CMD_USE_PID(cmd)) {
+ /*
+ * XXX: may need to fine tune this one
*/
- if (child->state != TASK_ZOMBIE && child->state != TASK_STOPPED) {
- DBprintk((" warning process %d not in stable state %ld\n", pid, child->state));
+ if (pid < 2) return -EPERM;
+
+ if (pid != current->pid) {
+
+ read_lock(&tasklist_lock);
+
+ task = find_task_by_pid(pid);
+
+ if (!task) goto abort_call;
+
+ ret = -EPERM;
+
+ if (pfm_bad_permissions(task)) goto abort_call;
+
+ if (PFM_CMD_CHK(cmd)) {
+ ret = check_task_state(task);
+ if (ret != 0) goto abort_call;
+ }
+ ctx = task->thread.pfm_context;
}
+ }
+
+ if (PFM_CMD_USE_CTX(cmd)) {
+ ret = -EINVAL;
+ if (ctx == NULL) {
+ DBprintk(("no context for task %d\n", task->pid));
+ goto abort_call;
+ }
+ ret = -EPERM;
+ /*
+ * we only grant access to the context if:
+ * - the caller is the creator of the context (ctx_owner)
+ * OR - the context is attached to the caller AND The context IS NOT
+ * in protected mode
+ */
+ if (ctx->ctx_owner != current && (ctx->ctx_fl_protected || task != current)) {
+ DBprintk(("context protected, no access for [%d]\n", task->pid));
+ goto abort_call;
+ }
}
- ret = do_perfmonctl(child, cmd, flags, req, count, regs);
+
+ ret = (*pfm_cmd_tab[PFM_CMD_IDX(cmd)].cmd_func)(task, ctx, arg, count, regs);
abort_call:
- if (child != current) read_unlock(&tasklist_lock);
+ if (task != current) read_unlock(&tasklist_lock);
return ret;
}
#if __GNUC__ >= 3
void asmlinkage
-pfm_block_on_overflow(void)
+pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, u64 arg7, long info)
#else
void asmlinkage
-pfm_block_on_overflow(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
+pfm_ovfl_block_reset(u64 arg0, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, u64 arg7, long info)
#endif
{
struct thread_struct *th = ¤t->thread;
@@ -1323,32 +2314,22 @@
int ret;
/*
- * NO matter what notify_pid is,
- * we clear overflow, won't notify again
+ * clear the flag, to make sure we won't get here
+ * again
*/
- th->pfm_must_block = 0;
+ th->pfm_ovfl_block_reset = 0;
/*
* do some sanity checks first
*/
if (!ctx) {
- printk("perfmon: process %d has no PFM context\n", current->pid);
- return;
- }
- if (ctx->ctx_notify_task == 0) {
- printk("perfmon: process %d has no task to notify\n", current->pid);
+ printk("perfmon: [%d] has no PFM context\n", current->pid);
return;
}
- DBprintk((" current=%d task=%d\n", current->pid, ctx->ctx_notify_task->pid));
-
- /* should not happen */
- if (CTX_OVFL_NOBLOCK(ctx)) {
- printk("perfmon: process %d non-blocking ctx should not be here\n", current->pid);
- return;
- }
+ if (CTX_OVFL_NOBLOCK(ctx)) goto non_blocking;
- DBprintk((" CPU%d %d before sleep\n", smp_processor_id(), current->pid));
+ DBprintk(("[%d] before sleeping\n", current->pid));
/*
* may go through without blocking on SMP systems
@@ -1356,12 +2337,14 @@
*/
ret = down_interruptible(&ctx->ctx_restart_sem);
- DBprintk((" CPU%d %d after sleep ret=%d\n", smp_processor_id(), current->pid, ret));
+ DBprintk(("[%d] after sleeping ret=%d\n", current->pid, ret));
/*
* in case of interruption of down() we don't restart anything
*/
if (ret >= 0) {
+
+non_blocking:
/* we reactivate on context switch */
ctx->ctx_fl_frozen = 0;
/*
@@ -1369,19 +2352,19 @@
* use the local reference
*/
- pfm_reset_regs(ctx);
+ pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_RELOAD_LONG_RESET);
+
+ ctx->ctx_ovfl_regs[0] = 0UL;
/*
* Unlock sampling buffer and reset index atomically
* XXX: not really needed when blocking
*/
if (CTX_HAS_SMPL(ctx)) {
- ctx->ctx_smpl_buf->psb_hdr->hdr_count = 0;
- ctx->ctx_smpl_buf->psb_index = 0;
+ ctx->ctx_psb->psb_hdr->hdr_count = 0;
+ ctx->ctx_psb->psb_index = 0;
}
- DBprintk((" CPU%d %d unfreeze PMU\n", smp_processor_id(), current->pid));
-
ia64_set_pmc(0, 0);
ia64_srlz_d();
@@ -1390,23 +2373,111 @@
}
/*
+ * This function will record an entry in the sampling if it is not full already.
+ * Return:
+ * 0 : buffer is not full (did not BECOME full: still space or was already full)
+ * 1 : buffer is full (recorded the last entry)
+ */
+static int
+pfm_record_sample(struct task_struct *task, pfm_context_t *ctx, unsigned long ovfl_mask, struct pt_regs *regs)
+{
+ pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
+ unsigned long *e, m, idx;
+ perfmon_smpl_entry_t *h;
+ int j;
+
+
+pfm_recorded_samples_count++;
+ idx = ia64_fetch_and_add(1, &psb->psb_index);
+ DBprintk(("recording index=%ld entries=%ld\n", idx-1, psb->psb_entries));
+
+ /*
+ * XXX: there is a small chance that we could run out on index before resetting
+ * but index is unsigned long, so it will take some time.....
+ * We use > instead of == because fetch_and_add() is off by one (see below)
+ *
+ * This case can happen in non-blocking mode or with multiple processes.
+ * For non-blocking, we need to reload and continue.
+ */
+ if (idx > psb->psb_entries) return 0;
+
+ /* first entry is really entry 0, not 1 caused by fetch_and_add */
+ idx--;
+
+ h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size));
+
+ /*
+ * initialize entry header
+ */
+ h->pid = task->pid;
+ h->cpu = smp_processor_id();
+ h->rate = 0; /* XXX: add the sampling rate used here */
+ h->ip = regs ? regs->cr_iip : 0x0; /* where did the fault happened */
+ h->regs = ovfl_mask; /* which registers overflowed */
+
+ /* guaranteed to monotonically increase on each cpu */
+ h->stamp = pfm_get_stamp();
+ h->period = 0UL; /* not yet used */
+
+ /* position for first pmd */
+ e = (unsigned long *)(h+1);
+
+ /*
+ * selectively store PMDs in increasing index number
+ */
+ m = ctx->ctx_smpl_regs[0];
+ for (j=0; m; m >>=1, j++) {
+
+ if ((m & 0x1) == 0) continue;
+
+ if (PMD_IS_COUNTING(j)) {
+ *e = pfm_read_soft_counter(ctx, j);
+ /* check if this pmd overflowed as well */
+ *e += ovfl_mask & (1UL<<j) ? 1 + pmu_conf.perf_ovfl_val : 0;
+ } else {
+ *e = ia64_get_pmd(j); /* slow */
+ }
+ DBprintk(("e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
+ e++;
+ }
+ /*
+ * make the new entry visible to user, needs to be atomic
+ */
+ ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
+
+ DBprintk(("index=%ld entries=%ld hdr_count=%ld\n",
+ idx, psb->psb_entries, psb->psb_hdr->hdr_count));
+ /*
+ * sampling buffer full ?
+ */
+ if (idx == (psb->psb_entries-1)) {
+ DBprintk(("sampling buffer full\n"));
+ /*
+ * XXX: must reset buffer in blocking mode and lost notified
+ */
+ return 1;
+ }
+ return 0;
+}
+
+/*
* main overflow processing routine.
* it can be called from the interrupt path or explicitely during the context switch code
* Return:
* new value of pmc[0]. if 0x0 then unfreeze, else keep frozen
*/
-unsigned long
-update_counters (struct task_struct *task, u64 pmc0, struct pt_regs *regs)
+static unsigned long
+pfm_overflow_handler(struct task_struct *task, u64 pmc0, struct pt_regs *regs)
{
- unsigned long mask, i, cnum;
- struct thread_struct *th;
+ unsigned long mask;
+ struct thread_struct *t;
pfm_context_t *ctx;
- unsigned long bv = 0;
+ unsigned long old_val;
+ unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL;
+ int i;
int my_cpu = smp_processor_id();
- int ret = 1, buffer_is_full = 0;
- int ovfl_has_long_recovery, can_notify, need_reset_pmd16=0;
+ int ret = 1;
struct siginfo si;
-
/*
* It is never safe to access the task for which the overflow interrupt is destinated
* using the current variable as the interrupt may occur in the middle of a context switch
@@ -1421,233 +2492,151 @@
*/
if (task == NULL) {
- DBprintk((" owners[%d]=NULL\n", my_cpu));
+ DBprintk(("owners[%d]=NULL\n", my_cpu));
return 0x1;
}
- th = &task->thread;
- ctx = th->pfm_context;
+ t = &task->thread;
+ ctx = task->thread.pfm_context;
+
+ if (!ctx) {
+ printk("perfmon: Spurious overflow interrupt: process %d has no PFM context\n",
+ task->pid);
+ return 0;
+ }
/*
* XXX: debug test
* Don't think this could happen given upfront tests
*/
- if ((th->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
- printk("perfmon: Spurious overflow interrupt: process %d not using perfmon\n", task->pid);
+ if ((t->flags & IA64_THREAD_PM_VALID) == 0 && ctx->ctx_fl_system == 0) {
+ printk("perfmon: Spurious overflow interrupt: process %d not using perfmon\n",
+ task->pid);
return 0x1;
}
- if (!ctx) {
- printk("perfmon: Spurious overflow interrupt: process %d has no PFM context\n", task->pid);
- return 0;
- }
-
/*
* sanity test. Should never happen
*/
- if ((pmc0 & 0x1 )== 0) {
- printk("perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n", task->pid, pmc0);
+ if ((pmc0 & 0x1) == 0) {
+ printk("perfmon: pid %d pmc0=0x%lx assumption error for freeze bit\n",
+ task->pid, pmc0);
return 0x0;
}
mask = pmc0 >> PMU_FIRST_COUNTER;
- DBprintk(("pmc0=0x%lx pid=%d owner=%d iip=0x%lx, ctx is in %s mode used_pmds=0x%lx used_pmcs=0x%lx\n",
- pmc0, task->pid, PMU_OWNER()->pid, regs->cr_iip,
- CTX_OVFL_NOBLOCK(ctx) ? "NO-BLOCK" : "BLOCK",
- ctx->ctx_used_pmds[0],
- ctx->ctx_used_pmcs[0]));
+ DBprintk(("pmc0=0x%lx pid=%d iip=0x%lx, %s"
+ " mode used_pmds=0x%lx save_pmcs=0x%lx reload_pmcs=0x%lx\n",
+ pmc0, task->pid, (regs ? regs->cr_iip : 0),
+ CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
+ ctx->ctx_used_pmds[0],
+ ctx->ctx_saved_pmcs[0],
+ ctx->ctx_reload_pmcs[0]));
/*
- * XXX: need to record sample only when an EAR/BTB has overflowed
+ * First we update the virtual counters
*/
- if (CTX_HAS_SMPL(ctx)) {
- pfm_smpl_buffer_desc_t *psb = ctx->ctx_smpl_buf;
- unsigned long *e, m, idx=0;
- perfmon_smpl_entry_t *h;
- int j;
-
- idx = ia64_fetch_and_add(1, &psb->psb_index);
- DBprintk((" recording index=%ld entries=%ld\n", idx, psb->psb_entries));
+ for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
+
+ /* skip pmd which did not overflow */
+ if ((mask & 0x1) == 0) continue;
+
+ DBprintk(("PMD[%d] overflowed hw_pmd=0x%lx soft_pmd=0x%lx\n",
+ i, ia64_get_pmd(i), ctx->ctx_soft_pmds[i].val));
/*
- * XXX: there is a small chance that we could run out on index before resetting
- * but index is unsigned long, so it will take some time.....
- * We use > instead of == because fetch_and_add() is off by one (see below)
- *
- * This case can happen in non-blocking mode or with multiple processes.
- * For non-blocking, we need to reload and continue.
+ * Because we sometimes (EARS/BTB) reset to a specific value, we cannot simply use
+ * val to count the number of times we overflowed. Otherwise we would loose the
+ * current value in the PMD (which can be >0). So to make sure we don't loose
+ * the residual counts we set val to contain full 64bits value of the counter.
*/
- if (idx > psb->psb_entries) {
- buffer_is_full = 1;
- goto reload_pmds;
- }
-
- /* first entry is really entry 0, not 1 caused by fetch_and_add */
- idx--;
+ old_val = ctx->ctx_soft_pmds[i].val;
+ ctx->ctx_soft_pmds[i].val = 1 + pmu_conf.perf_ovfl_val + pfm_read_soft_counter(ctx, i);
- h = (perfmon_smpl_entry_t *)(((char *)psb->psb_addr) + idx*(psb->psb_entry_size));
- h->pid = task->pid;
- h->cpu = my_cpu;
- h->rate = 0;
- h->ip = regs ? regs->cr_iip : 0x0; /* where did the fault happened */
- h->regs = mask; /* which registers overflowed */
+ DBprintk(("soft_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx\n",
+ i, ctx->ctx_soft_pmds[i].val, old_val,
+ ia64_get_pmd(i) & pmu_conf.perf_ovfl_val));
- /* guaranteed to monotonically increase on each cpu */
- h->stamp = perfmon_get_stamp();
-
- e = (unsigned long *)(h+1);
-
- /*
- * selectively store PMDs in increasing index number
- */
- for (j=0, m = ctx->ctx_smpl_regs; m; m >>=1, j++) {
- if (m & 0x1) {
- if (PMD_IS_COUNTER(j))
- *e = ctx->ctx_pmds[j-PMU_FIRST_COUNTER].val
- + (ia64_get_pmd(j) & pmu_conf.perf_ovfl_val);
- else {
- *e = ia64_get_pmd(j); /* slow */
- }
- DBprintk((" e=%p pmd%d =0x%lx\n", (void *)e, j, *e));
- e++;
- }
- }
/*
- * make the new entry visible to user, needs to be atomic
+ * now that we have extracted the hardware counter, we can clear it to ensure
+ * that a subsequent PFM_READ_PMDS will not include it again.
*/
- ia64_fetch_and_add(1, &psb->psb_hdr->hdr_count);
+ ia64_set_pmd(i, 0UL);
- DBprintk((" index=%ld entries=%ld hdr_count=%ld\n", idx, psb->psb_entries, psb->psb_hdr->hdr_count));
- /*
- * sampling buffer full ?
+ /*
+ * check for overflow condition
*/
- if (idx == (psb->psb_entries-1)) {
- /*
- * will cause notification, cannot be 0
- */
- bv = mask << PMU_FIRST_COUNTER;
+ if (old_val > ctx->ctx_soft_pmds[i].val) {
- buffer_is_full = 1;
+ ovfl_pmds |= 1UL << i;
- DBprintk((" sampling buffer full must notify bv=0x%lx\n", bv));
+ DBprintk(("soft_pmd[%d] overflowed flags=0x%x, ovfl=0x%lx\n", i, ctx->ctx_soft_pmds[i].flags, ovfl_pmds));
- /*
- * we do not reload here, when context is blocking
- */
- if (!CTX_OVFL_NOBLOCK(ctx)) goto no_reload;
-
- /*
- * here, we have a full buffer but we are in non-blocking mode
- * so we need to reload overflowed PMDs with sampling reset values
- * and restart right away.
- */
+ if (PMC_OVFL_NOTIFY(ctx, i)) {
+ ovfl_notify |= 1UL << i;
+ }
}
- /* FALL THROUGH */
}
-reload_pmds:
-
- /*
- * in the case of a non-blocking context, we reload
- * with the ovfl_rval when no user notification is taking place (short recovery)
- * otherwise when the buffer is full which requires user interaction) then we use
- * smpl_rval which is the long_recovery path (disturbance introduce by user execution).
- *
- * XXX: implies that when buffer is full then there is always notification.
- */
- ovfl_has_long_recovery = CTX_OVFL_NOBLOCK(ctx) && buffer_is_full;
/*
- * XXX: CTX_HAS_SMPL() should really be something like CTX_HAS_SMPL() and is activated,i.e.,
- * one of the PMC is configured for EAR/BTB.
+ * check for sampling buffer
*
- * When sampling, we can only notify when the sampling buffer is full.
+ * if present, record sample. We propagate notification ONLY when buffer
+ * becomes full.
*/
- can_notify = CTX_HAS_SMPL(ctx) == 0 && ctx->ctx_notify_task;
-
- DBprintk((" ovfl_has_long_recovery=%d can_notify=%d\n", ovfl_has_long_recovery, can_notify));
-
- for (i = 0, cnum = PMU_FIRST_COUNTER; mask ; cnum++, i++, mask >>= 1) {
-
- if ((mask & 0x1) == 0) continue;
-
- DBprintk((" PMD[%ld] overflowed pmd=0x%lx pmod.val=0x%lx\n", cnum, ia64_get_pmd(cnum), ctx->ctx_pmds[i].val));
-
- /*
- * Because we sometimes (EARS/BTB) reset to a specific value, we cannot simply use
- * val to count the number of times we overflowed. Otherwise we would loose the current value
- * in the PMD (which can be >0). So to make sure we don't loose
- * the residual counts we set val to contain full 64bits value of the counter.
- *
- * XXX: is this needed for EARS/BTB ?
- */
- ctx->ctx_pmds[i].val += 1 + pmu_conf.perf_ovfl_val
- + (ia64_get_pmd(cnum) & pmu_conf.perf_ovfl_val); /* slow */
-
- DBprintk((" pmod[%ld].val=0x%lx pmd=0x%lx\n", i, ctx->ctx_pmds[i].val, ia64_get_pmd(cnum)&pmu_conf.perf_ovfl_val));
-
- if (can_notify && PMD_OVFL_NOTIFY(ctx, i)) {
- DBprintk((" CPU%d should notify task %p with signal %d\n", my_cpu, ctx->ctx_notify_task, ctx->ctx_notify_sig));
- bv |= 1 << i;
- } else {
- DBprintk((" CPU%d PMD[%ld] overflow, no notification\n", my_cpu, cnum));
+ if(CTX_HAS_SMPL(ctx)) {
+ ret = pfm_record_sample(task, ctx, ovfl_pmds, regs);
+ if (ret == 1) {
/*
- * In case no notification is requested, we reload the reset value right away
- * otherwise we wait until the notify_pid process has been called and has
- * has finished processing data. Check out pfm_overflow_notify()
+ * Sampling buffer became full
+ * If no notication was requested, then we reset buffer index
+ * and reset registers (done below) and resume.
+ * If notification requested, then defer reset until pfm_restart()
*/
-
- /* writes to upper part are ignored, so this is safe */
- if (ovfl_has_long_recovery) {
- DBprintk((" CPU%d PMD[%ld] reload with smpl_val=%lx\n", my_cpu, cnum,ctx->ctx_pmds[i].smpl_rval));
- ia64_set_pmd(cnum, ctx->ctx_pmds[i].smpl_rval);
- } else {
- DBprintk((" CPU%d PMD[%ld] reload with ovfl_val=%lx\n", my_cpu, cnum,ctx->ctx_pmds[i].smpl_rval));
- ia64_set_pmd(cnum, ctx->ctx_pmds[i].ovfl_rval);
+ if (ovfl_notify == 0UL) {
+ ctx->ctx_psb->psb_hdr->hdr_count = 0UL;
+ ctx->ctx_psb->psb_index = 0UL;
}
+ } else {
+ /*
+ * sample recorded in buffer, no need to notify user
+ */
+ ovfl_notify = 0UL;
}
- if (cnum == ctx->ctx_btb_counter) need_reset_pmd16=1;
}
- /*
- * In case of BTB overflow we need to reset the BTB index.
- */
- if (need_reset_pmd16) {
- DBprintk(("reset PMD16\n"));
- ia64_set_pmd(16, 0);
- }
-
-no_reload:
/*
- * some counters overflowed, but they did not require
- * user notification, so after having reloaded them above
- * we simply restart
+ * No overflow requiring a user level notification
*/
- if (!bv) return 0x0;
+ if (ovfl_notify == 0UL) {
+ pfm_reset_regs(ctx, &ovfl_pmds, PFM_RELOAD_SHORT_RESET);
+ return 0x0;
+ }
- ctx->ctx_ovfl_regs = bv; /* keep track of what to reset when unblocking */
- /*
- * Now we know that:
- * - we have some counters which overflowed (contains in bv)
- * - someone has asked to be notified on overflow.
+ /*
+ * keep track of what to reset when unblocking
*/
+ ctx->ctx_ovfl_regs[0] = ovfl_pmds;
-
/*
- * If the notification task is still present, then notify_task is non
- * null. It is clean by that task if it ever exits before we do.
+ * we have come to this point because there was an overflow and that notification
+ * was requested. The notify_task may have disappeared, in which case notify_task
+ * is NULL.
*/
-
if (ctx->ctx_notify_task) {
si.si_errno = 0;
si.si_addr = NULL;
si.si_pid = task->pid; /* who is sending */
- si.si_signo = ctx->ctx_notify_sig; /* is SIGPROF */
- si.si_code = PROF_OVFL; /* goes to user */
- si.si_pfm_ovfl = bv;
-
-
+ si.si_signo = SIGPROF;
+ si.si_code = PROF_OVFL; /* indicates a perfmon SIGPROF signal */
+ /*
+ * Shift the bitvector such that the user sees bit 4 for PMD4 and so on.
+ * We only use smpl_ovfl[0] for now. It should be fine for quite a while
+ * until we have more than 61 PMD available.
+ */
+ si.si_pfm_ovfl[0] = ovfl_notify;
/*
* when the target of the signal is not ourself, we have to be more
@@ -1659,15 +2648,29 @@
if (ctx->ctx_notify_task != current) {
/*
* grab the notification lock for this task
+ * This guarantees that the sequence: test + send_signal
+ * is atomic with regards to the ctx_notify_task field.
+ *
+ * We need a spinlock and not just an atomic variable for this.
+ *
*/
- spin_lock(&ctx->ctx_notify_lock);
+ spin_lock(&ctx->ctx_lock);
/*
* now notify_task cannot be modified until we're done
* if NULL, they it got modified while we were in the handler
*/
if (ctx->ctx_notify_task == NULL) {
- spin_unlock(&ctx->ctx_notify_lock);
+
+ spin_unlock(&ctx->ctx_lock);
+
+ /*
+ * If we've lost the notified task, then we will run
+ * to completion wbut keep the PMU frozen. Results
+ * will be incorrect anyway. We do not kill task
+ * to leave it possible to attach perfmon context
+ * to already running task.
+ */
goto lost_notify;
}
/*
@@ -1681,20 +2684,23 @@
* necessarily go to the signal handler (if any) when it goes back to
* user mode.
*/
- DBprintk((" %d sending %d notification to %d\n", task->pid, si.si_signo, ctx->ctx_notify_task->pid));
+ DBprintk(("[%d] sending notification to [%d]\n",
+ task->pid, ctx->ctx_notify_task->pid));
/*
* this call is safe in an interrupt handler, so does read_lock() on tasklist_lock
*/
- ret = send_sig_info(ctx->ctx_notify_sig, &si, ctx->ctx_notify_task);
- if (ret != 0) printk(" send_sig_info(process %d, SIGPROF)=%d\n", ctx->ctx_notify_task->pid, ret);
+ ret = send_sig_info(SIGPROF, &si, ctx->ctx_notify_task);
+ if (ret != 0)
+ printk("send_sig_info(process %d, SIGPROF)=%d\n",
+ ctx->ctx_notify_task->pid, ret);
/*
* now undo the protections in order
*/
if (ctx->ctx_notify_task != current) {
read_unlock(&tasklist_lock);
- spin_unlock(&ctx->ctx_notify_lock);
+ spin_unlock(&ctx->ctx_lock);
}
/*
@@ -1711,35 +2717,41 @@
* before, changing it to NULL will still maintain this invariant.
* Of course, when it is equal to current it cannot change at this point.
*/
- if (!CTX_OVFL_NOBLOCK(ctx) && ctx->ctx_notify_task != current) {
- th->pfm_must_block = 1; /* will cause blocking */
+ DBprintk(("block=%d notify [%d] current [%d]\n",
+ ctx->ctx_fl_block,
+ ctx->ctx_notify_task ? ctx->ctx_notify_task->pid: -1,
+ current->pid ));
+
+ if (!CTX_OVFL_NOBLOCK(ctx) && ctx->ctx_notify_task != task) {
+ t->pfm_ovfl_block_reset = 1; /* will cause blocking */
}
} else {
-lost_notify:
- DBprintk((" notification task has disappeared !\n"));
+lost_notify: /* XXX: more to do here, to convert to non-blocking (reset values) */
+
+ DBprintk(("notification task has disappeared !\n"));
/*
- * for a non-blocking context, we make sure we do not fall into the pfm_overflow_notify()
- * trap. Also in the case of a blocking context with lost notify process, then we do not
- * want to block either (even though it is interruptible). In this case, the PMU will be kept
- * frozen and the process will run to completion without monitoring enabled.
+ * for a non-blocking context, we make sure we do not fall into the
+ * pfm_overflow_notify() trap. Also in the case of a blocking context with lost
+ * notify process, then we do not want to block either (even though it is
+ * interruptible). In this case, the PMU will be kept frozen and the process will
+ * run to completion without monitoring enabled.
*
* Of course, we cannot loose notify process when self-monitoring.
*/
- th->pfm_must_block = 0;
+ t->pfm_ovfl_block_reset = 0;
}
/*
- * if we block, we keep the PMU frozen. If non-blocking we restart.
- * in the case of non-blocking were the notify process is lost, we also
- * restart.
+ * If notification was successful, then we rely on the pfm_restart()
+ * call to unfreeze and reset (in both blocking or non-blocking mode).
+ *
+ * If notification failed, then we will keep the PMU frozen and run
+ * the task to completion
*/
- if (!CTX_OVFL_NOBLOCK(ctx))
- ctx->ctx_fl_frozen = 1;
- else
- ctx->ctx_fl_frozen = 0;
+ ctx->ctx_fl_frozen = 1;
- DBprintk((" reload pmc0=0x%x must_block=%ld\n",
- ctx->ctx_fl_frozen ? 0x1 : 0x0, th->pfm_must_block));
+ DBprintk(("reload pmc0=0x%x must_block=%ld\n",
+ ctx->ctx_fl_frozen ? 0x1 : 0x0, t->pfm_ovfl_block_reset));
return ctx->ctx_fl_frozen ? 0x1 : 0x0;
}
@@ -1748,29 +2760,40 @@
perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
{
u64 pmc0;
- struct task_struct *ta;
+ struct task_struct *task;
- pmc0 = ia64_get_pmc(0); /* slow */
+ pfm_ovfl_intr_count++;
+
+ /*
+ * srlz.d done before arriving here
+ *
+ * This is slow
+ */
+ pmc0 = ia64_get_pmc(0);
/*
* if we have some pending bits set
* assumes : if any PM[0].bit[63-1] is set, then PMC[0].fr = 1
*/
- if ((pmc0 & ~0x1) && (ta=PMU_OWNER())) {
+ if ((pmc0 & ~0x1UL)!=0UL && (task=PMU_OWNER())!= NULL) {
- /* assumes, PMC[0].fr = 1 at this point */
- pmc0 = update_counters(ta, pmc0, regs);
-
- /*
- * if pmu_frozen = 0
- * pmc0 = 0 and we resume monitoring right away
- * else
- * pmc0 = 0x1 frozen but all pending bits are cleared
+ /*
+ * assumes, PMC[0].fr = 1 at this point
+ *
+ * XXX: change protype to pass &pmc0
*/
- ia64_set_pmc(0, pmc0);
- ia64_srlz_d();
+ pmc0 = pfm_overflow_handler(task, pmc0, regs);
+
+ /* we never explicitely freeze PMU here */
+ if (pmc0 == 0) {
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+ }
} else {
- printk("perfmon: Spurious PMU overflow interrupt: pmc0=0x%lx owner=%p\n", pmc0, (void *)PMU_OWNER());
+ pfm_spurious_ovfl_intr_count++;
+
+ DBprintk(("perfmon: Spurious PMU overflow interrupt on CPU%d: pmc0=0x%lx owner=%p\n",
+ smp_processor_id(), pmc0, (void *)PMU_OWNER()));
}
}
@@ -1778,14 +2801,39 @@
static int
perfmon_proc_info(char *page)
{
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i) 1
+#endif
char *p = page;
u64 pmc0 = ia64_get_pmc(0);
int i;
- p += sprintf(p, "CPU%d.pmc[0]=%lx\nPerfmon debug: %s\n", smp_processor_id(), pmc0, pfm_debug ? "On" : "Off");
- p += sprintf(p, "proc_sessions=%lu sys_sessions=%lu\n",
- pfs_info.pfs_proc_sessions,
- pfs_info.pfs_sys_session);
+ p += sprintf(p, "perfmon enabled: %s\n", pmu_conf.pfm_is_disabled ? "No": "Yes");
+
+ p += sprintf(p, "monitors_pmcs0]=0x%lx\n", pmu_conf.monitor_pmcs[0]);
+ p += sprintf(p, "counter_pmcds[0]=0x%lx\n", pmu_conf.counter_pmds[0]);
+ p += sprintf(p, "overflow interrupts=%lu\n", pfm_ovfl_intr_count);
+ p += sprintf(p, "spurious overflow interrupts=%lu\n", pfm_spurious_ovfl_intr_count);
+ p += sprintf(p, "recorded samples=%lu\n", pfm_recorded_samples_count);
+
+ p += sprintf(p, "CPU%d.pmc[0]=%lx\nPerfmon debug: %s\n",
+ smp_processor_id(), pmc0, pfm_debug_mode ? "On" : "Off");
+
+#ifdef CONFIG_SMP
+ p += sprintf(p, "CPU%d cpu_data.pfm_syst_wide=%d cpu_data.dcr_pp=%d\n",
+ smp_processor_id(), local_cpu_data->pfm_syst_wide, local_cpu_data->pfm_dcr_pp);
+#endif
+
+ LOCK_PFS();
+ p += sprintf(p, "proc_sessions=%lu\nsys_sessions=%lu\nsys_use_dbregs=%lu\nptrace_use_dbregs=%lu\n",
+ pfm_sessions.pfs_task_sessions,
+ pfm_sessions.pfs_sys_sessions,
+ pfm_sessions.pfs_sys_use_dbregs,
+ pfm_sessions.pfs_ptrace_use_dbregs);
+
+ UNLOCK_PFS();
for(i=0; i < NR_CPUS; i++) {
if (cpu_is_online(i)) {
@@ -1794,10 +2842,11 @@
pmu_owners[i].owner ? pmu_owners[i].owner->pid: -1);
}
}
+
return p - page;
}
-/* for debug only */
+/* /proc interface, for debug only */
static int
perfmon_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
{
@@ -1814,153 +2863,90 @@
return len;
}
-static struct irqaction perfmon_irqaction = {
- handler: perfmon_interrupt,
- flags: SA_INTERRUPT,
- name: "perfmon"
-};
-
-void __init
-perfmon_init (void)
+#ifdef CONFIG_SMP
+void
+pfm_syst_wide_update_task(struct task_struct *task, int mode)
{
- pal_perf_mon_info_u_t pm_info;
- s64 status;
+ struct pt_regs *regs = (struct pt_regs *)((unsigned long) task + IA64_STK_OFFSET);
- register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ regs--;
- ia64_set_pmv(IA64_PERFMON_VECTOR);
- ia64_srlz_d();
-
- pmu_conf.pfm_is_disabled = 1;
+ /*
+ * propagate the value of the dcr_pp bit to the psr
+ */
+ ia64_psr(regs)->pp = mode ? local_cpu_data->pfm_dcr_pp : 0;
+}
+#endif
- printk("perfmon: version %s (sampling format v%d)\n", PFM_VERSION, PFM_SMPL_HDR_VERSION);
- printk("perfmon: Interrupt vectored to %u\n", IA64_PERFMON_VECTOR);
- if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
- printk("perfmon: PAL call failed (%ld)\n", status);
- return;
- }
- pmu_conf.perf_ovfl_val = (1L << pm_info.pal_perf_mon_info_s.width) - 1;
- pmu_conf.max_counters = pm_info.pal_perf_mon_info_s.generic;
- pmu_conf.num_pmcs = find_num_pm_regs(pmu_conf.impl_regs);
- pmu_conf.num_pmds = find_num_pm_regs(&pmu_conf.impl_regs[4]);
+void
+pfm_save_regs (struct task_struct *task)
+{
+ pfm_context_t *ctx;
+ u64 psr;
- printk("perfmon: %d bits counters (max value 0x%lx)\n", pm_info.pal_perf_mon_info_s.width, pmu_conf.perf_ovfl_val);
- printk("perfmon: %ld PMC/PMD pairs, %ld PMCs, %ld PMDs\n", pmu_conf.max_counters, pmu_conf.num_pmcs, pmu_conf.num_pmds);
+ ctx = task->thread.pfm_context;
- /* sanity check */
- if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
- printk(KERN_ERR "perfmon: ERROR not enough PMC/PMD storage in kernel, perfmon is DISABLED\n");
- return; /* no need to continue anyway */
- }
- /* we are all set */
- pmu_conf.pfm_is_disabled = 0;
/*
- * Insert the tasklet in the list.
- * It is still disabled at this point, so it won't run
- printk(__FUNCTION__" tasklet is %p state=%d, count=%d\n", &perfmon_tasklet, perfmon_tasklet.state, perfmon_tasklet.count);
+ * save current PSR: needed because we modify it
*/
+ __asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
/*
- * for now here for debug purposes
+ * stop monitoring:
+ * This is the last instruction which can generate an overflow
+ *
+ * We do not need to set psr.sp because, it is irrelevant in kernel.
+ * It will be restored from ipsr when going back to user level
*/
- perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
-}
+ __asm__ __volatile__ ("rum psr.up;;"::: "memory");
+
+ ctx->ctx_saved_psr = psr;
+
+ //ctx->ctx_last_cpu = smp_processor_id();
-void
-perfmon_init_percpu (void)
-{
- ia64_set_pmv(IA64_PERFMON_VECTOR);
- ia64_srlz_d();
}
-void
-pfm_save_regs (struct task_struct *ta)
+static void
+pfm_lazy_save_regs (struct task_struct *task)
{
- struct task_struct *owner;
pfm_context_t *ctx;
struct thread_struct *t;
- u64 pmc0, psr;
unsigned long mask;
int i;
- t = &ta->thread;
- ctx = ta->thread.pfm_context;
+ DBprintk(("on [%d] by [%d]\n", task->pid, current->pid));
- /*
- * We must make sure that we don't loose any potential overflow
- * interrupt while saving PMU context. In this code, external
- * interrupts are always enabled.
- */
+ t = &task->thread;
+ ctx = task->thread.pfm_context;
- /*
- * save current PSR: needed because we modify it
+#ifdef CONFIG_SMP
+ /*
+ * announce we are saving this PMU state
+ * This will cause other CPU, to wait until we're done
+ * before using the context.h
+ *
+ * must be an atomic operation
*/
- __asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+ atomic_set(&ctx->ctx_saving_in_progress, 1);
- /*
- * stop monitoring:
- * This is the only way to stop monitoring without destroying overflow
- * information in PMC[0].
- * This is the last instruction which can cause overflow when monitoring
- * in kernel.
- * By now, we could still have an overflow interrupt in-flight.
- */
- __asm__ __volatile__ ("rsm psr.up|psr.pp;;"::: "memory");
+ /*
+ * if owner is NULL, it means that the other CPU won the race
+ * and the IPI has caused the context to be saved in pfm_handle_fectch_regs()
+ * instead of here. We have nothing to do
+ *
+ * note that this is safe, because the other CPU NEVER modifies saving_in_progress.
+ */
+ if (PMU_OWNER() == NULL) goto do_nothing;
+#endif
/*
- * Mark the PMU as not owned
- * This will cause the interrupt handler to do nothing in case an overflow
- * interrupt was in-flight
- * This also guarantees that pmc0 will contain the final state
- * It virtually gives us full control over overflow processing from that point
- * on.
- * It must be an atomic operation.
+ * do not own the PMU
*/
- owner = PMU_OWNER();
SET_PMU_OWNER(NULL);
- /*
- * read current overflow status:
- *
- * we are guaranteed to read the final stable state
- */
ia64_srlz_d();
- pmc0 = ia64_get_pmc(0); /* slow */
-
- /*
- * freeze PMU:
- *
- * This destroys the overflow information. This is required to make sure
- * next process does not start with monitoring on if not requested
- */
- ia64_set_pmc(0, 1);
-
- /*
- * Check for overflow bits and proceed manually if needed
- *
- * It is safe to call the interrupt handler now because it does
- * not try to block the task right away. Instead it will set a
- * flag and let the task proceed. The blocking will only occur
- * next time the task exits from the kernel.
- */
- if (pmc0 & ~0x1) {
- update_counters(owner, pmc0, NULL);
- /* we will save the updated version of pmc0 */
- }
- /*
- * restore PSR for context switch to save
- */
- __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
-
- /*
- * we do not save registers if we can do lazy
- */
- if (PFM_CAN_DO_LAZY()) {
- SET_PMU_OWNER(owner);
- return;
- }
/*
* XXX needs further optimization.
@@ -1970,30 +2956,75 @@
for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
}
-
- /* skip PMC[0], we handle it separately */
- mask = ctx->ctx_used_pmcs[0]>>1;
- for (i=1; mask; i++, mask>>=1) {
+ /*
+ * XXX: simplify to pmc0 only
+ */
+ mask = ctx->ctx_saved_pmcs[0];
+ for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmc[i] = ia64_get_pmc(i);
}
+
+ /* not owned by this CPU */
+ atomic_set(&ctx->ctx_last_cpu, -1);
+
+#ifdef CONFIG_SMP
+do_nothing:
+#endif
/*
- * Throughout this code we could have gotten an overflow interrupt. It is transformed
- * into a spurious interrupt as soon as we give up pmu ownership.
+ * declare we are done saving this context
+ *
+ * must be an atomic operation
*/
+ atomic_set(&ctx->ctx_saving_in_progress,0);
+
}
-static void
-pfm_lazy_save_regs (struct task_struct *ta)
+#ifdef CONFIG_SMP
+/*
+ * Handles request coming from other CPUs
+ */
+static void
+pfm_handle_fetch_regs(void *info)
{
- pfm_context_t *ctx;
+ pfm_smp_ipi_arg_t *arg = info;
struct thread_struct *t;
+ pfm_context_t *ctx;
unsigned long mask;
int i;
- DBprintk((" on [%d] by [%d]\n", ta->pid, current->pid));
+ ctx = arg->task->thread.pfm_context;
+ t = &arg->task->thread;
+
+ DBprintk(("task=%d owner=%d saving=%d\n",
+ arg->task->pid,
+ PMU_OWNER() ? PMU_OWNER()->pid: -1,
+ atomic_read(&ctx->ctx_saving_in_progress)));
+
+ /* must wait if saving was interrupted */
+ if (atomic_read(&ctx->ctx_saving_in_progress)) {
+ arg->retval = 1;
+ return;
+ }
+
+ /* can proceed, done with context */
+ if (PMU_OWNER() != arg->task) {
+ arg->retval = 0;
+ return;
+ }
+
+ DBprintk(("saving state for [%d] save_pmcs=0x%lx all_pmcs=0x%lx used_pmds=0x%lx\n",
+ arg->task->pid,
+ ctx->ctx_saved_pmcs[0],
+ ctx->ctx_reload_pmcs[0],
+ ctx->ctx_used_pmds[0]));
+
+ /*
+ * XXX: will be replaced with pure assembly call
+ */
+ SET_PMU_OWNER(NULL);
+
+ ia64_srlz_d();
- t = &ta->thread;
- ctx = ta->thread.pfm_context;
/*
* XXX needs further optimization.
* Also must take holes into account
@@ -2003,84 +3034,295 @@
if (mask & 0x1) t->pmd[i] =ia64_get_pmd(i);
}
- /* skip PMC[0], we handle it separately */
- mask = ctx->ctx_used_pmcs[0]>>1;
- for (i=1; mask; i++, mask>>=1) {
+ mask = ctx->ctx_saved_pmcs[0];
+ for (i=0; mask; i++, mask>>=1) {
if (mask & 0x1) t->pmc[i] = ia64_get_pmc(i);
}
- SET_PMU_OWNER(NULL);
+ /* not owned by this CPU */
+ atomic_set(&ctx->ctx_last_cpu, -1);
+
+ /* can proceed */
+ arg->retval = 0;
+}
+
+/*
+ * Function call to fetch PMU state from another CPU identified by 'cpu'.
+ * If the context is being saved on the remote CPU, then we busy wait until
+ * the saving is done and then we return. In this case, non IPI is sent.
+ * Otherwise, we send an IPI to the remote CPU, potentially interrupting
+ * pfm_lazy_save_regs() over there.
+ *
+ * If the retval==1, then it means that we interrupted remote save and that we must
+ * wait until the saving is over before proceeding.
+ * Otherwise, we did the saving on the remote CPU, and it was done by the time we got there.
+ * in either case, we can proceed.
+ */
+static void
+pfm_fetch_regs(int cpu, struct task_struct *task, pfm_context_t *ctx)
+{
+ pfm_smp_ipi_arg_t arg;
+ int ret;
+
+ arg.task = task;
+ arg.retval = -1;
+
+ if (atomic_read(&ctx->ctx_saving_in_progress)) {
+ DBprintk(("no IPI, must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
+
+ /* busy wait */
+ while (atomic_read(&ctx->ctx_saving_in_progress));
+ return;
+ }
+ DBprintk(("calling CPU %d from CPU %d\n", cpu, smp_processor_id()));
+
+ if (cpu == -1) {
+ printk("refusing to use -1 for [%d]\n", task->pid);
+ return;
+ }
+
+ /* will send IPI to other CPU and wait for completion of remote call */
+ if ((ret=smp_call_function_single(cpu, pfm_handle_fetch_regs, &arg, 0, 1))) {
+ printk("perfmon: remote CPU call from %d to %d error %d\n", smp_processor_id(), cpu, ret);
+ return;
+ }
+ /*
+ * we must wait until saving is over on the other CPU
+ * This is the case, where we interrupted the saving which started just at the time we sent the
+ * IPI.
+ */
+ if (arg.retval == 1) {
+ DBprintk(("must wait for [%d] to be saved on [%d]\n", task->pid, cpu));
+ while (atomic_read(&ctx->ctx_saving_in_progress));
+ DBprintk(("done saving for [%d] on [%d]\n", task->pid, cpu));
+ }
}
+#endif /* CONFIG_SMP */
void
-pfm_load_regs (struct task_struct *ta)
+pfm_load_regs (struct task_struct *task)
{
- struct thread_struct *t = &ta->thread;
- pfm_context_t *ctx = ta->thread.pfm_context;
+ struct thread_struct *t;
+ pfm_context_t *ctx;
struct task_struct *owner;
unsigned long mask;
+ u64 psr;
int i;
+#ifdef CONFIG_SMP
+ int cpu;
+#endif
owner = PMU_OWNER();
- if (owner == ta) goto skip_restore;
+ ctx = task->thread.pfm_context;
+
+ /*
+ * if we were the last user, then nothing to do except restore psr
+ */
+ if (owner == task) {
+ if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id())
+ DBprintk(("invalid last_cpu=%d for [%d]\n",
+ atomic_read(&ctx->ctx_last_cpu), task->pid));
+
+ psr = ctx->ctx_saved_psr;
+ __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
+
+ return;
+ }
+ DBprintk(("load_regs: must reload for [%d] owner=%d\n",
+ task->pid, owner ? owner->pid : -1 ));
+ /*
+ * someone else is still using the PMU, first push it out and
+ * then we'll be able to install our stuff !
+ */
if (owner) pfm_lazy_save_regs(owner);
- SET_PMU_OWNER(ta);
+#ifdef CONFIG_SMP
+ /*
+ * check if context on another CPU (-1 means saved)
+ * We MUST use the variable, as last_cpu may change behind our
+ * back. If it changes to -1 (not on a CPU anymore), then in cpu
+ * we have the last CPU the context was on. We may be sending the
+ * IPI for nothing, but we have no way of verifying this.
+ */
+ cpu = atomic_read(&ctx->ctx_last_cpu);
+ if (cpu != -1) {
+ pfm_fetch_regs(cpu, task, ctx);
+ }
+#endif
+ t = &task->thread;
+ /*
+ * XXX: will be replaced by assembly routine
+ * We clear all unused PMDs to avoid leaking information
+ */
mask = ctx->ctx_used_pmds[0];
for (i=0; mask; i++, mask>>=1) {
- if (mask & 0x1) ia64_set_pmd(i, t->pmd[i]);
+ if (mask & 0x1)
+ ia64_set_pmd(i, t->pmd[i]);
+ else
+ ia64_set_pmd(i, 0UL);
}
+ /* XXX: will need to clear all unused pmd, for security */
- /* skip PMC[0] to avoid side effects */
- mask = ctx->ctx_used_pmcs[0]>>1;
+ /*
+ * skip pmc[0] to avoid side-effects,
+ * all PMCs are systematically reloaded, unsued get default value
+ * to avoid picking up stale configuration
+ */
+ mask = ctx->ctx_reload_pmcs[0]>>1;
for (i=1; mask; i++, mask>>=1) {
if (mask & 0x1) ia64_set_pmc(i, t->pmc[i]);
}
-skip_restore:
+
+ /*
+ * restore debug registers when used for range restrictions.
+ * We must restore the unused registers to avoid picking up
+ * stale information.
+ */
+ mask = ctx->ctx_used_ibrs[0];
+ for (i=0; mask; i++, mask>>=1) {
+ if (mask & 0x1)
+ ia64_set_ibr(i, t->ibr[i]);
+ else
+ ia64_set_ibr(i, 0UL);
+ }
+
+ mask = ctx->ctx_used_dbrs[0];
+ for (i=0; mask; i++, mask>>=1) {
+ if (mask & 0x1)
+ ia64_set_dbr(i, t->dbr[i]);
+ else
+ ia64_set_dbr(i, 0UL);
+ }
+
+ if (t->pmc[0] & ~0x1) {
+ ia64_srlz_d();
+ pfm_overflow_handler(task, t->pmc[0], NULL);
+ }
+
+ /*
+ * fl_frozen==1 when we are in blocking mode waiting for restart
+ */
+ if (ctx->ctx_fl_frozen == 0) {
+ ia64_set_pmc(0, 0);
+ ia64_srlz_d();
+ }
+ atomic_set(&ctx->ctx_last_cpu, smp_processor_id());
+
+ SET_PMU_OWNER(task);
+
+ /*
+ * restore the psr we changed in pfm_save_regs()
+ */
+ psr = ctx->ctx_saved_psr;
+ __asm__ __volatile__ ("mov psr.l=%0;; srlz.i;;"::"r"(psr): "memory");
+
+}
+
+/*
+ * XXX: make this routine able to work with non current context
+ */
+static void
+ia64_reset_pmu(struct task_struct *task)
+{
+ struct thread_struct *t = &task->thread;
+ pfm_context_t *ctx = t->pfm_context;
+ unsigned long mask;
+ int i;
+
+ if (task != current) {
+ printk("perfmon: invalid task in ia64_reset_pmu()\n");
+ return;
+ }
+
+ /* Let's make sure the PMU is frozen */
+ ia64_set_pmc(0,1);
+
+ /*
+ * install reset values for PMC. We skip PMC0 (done above)
+ * XX: good up to 64 PMCS
+ */
+ mask = pmu_conf.impl_regs[0] >> 1;
+ for(i=1; mask; mask>>=1, i++) {
+ if (mask & 0x1) {
+ ia64_set_pmc(i, reset_pmcs[i]);
+ /*
+ * When restoring context, we must restore ALL pmcs, even the ones
+ * that the task does not use to avoid leaks and possibly corruption
+ * of the sesion because of configuration conflicts. So here, we
+ * initializaed the table used in the context switch restore routine.
+ */
+ t->pmc[i] = reset_pmcs[i];
+ DBprintk((" pmc[%d]=0x%lx\n", i, reset_pmcs[i]));
+
+ }
+ }
+ /*
+ * clear reset values for PMD.
+ * XX: good up to 64 PMDS. Suppose that zero is a valid value.
+ */
+ mask = pmu_conf.impl_regs[4];
+ for(i=0; mask; mask>>=1, i++) {
+ if (mask & 0x1) ia64_set_pmd(i, 0UL);
+ }
+
/*
- * unfreeze only when possible
+ * On context switched restore, we must restore ALL pmc even
+ * when they are not actively used by the task. In UP, the incoming process
+ * may otherwise pick up left over PMC state from the previous process.
+ * As opposed to PMD, stale PMC can cause harm to the incoming
+ * process because they may change what is being measured.
+ * Therefore, we must systematically reinstall the entire
+ * PMC state. In SMP, the same thing is possible on the
+ * same CPU but also on between 2 CPUs.
+ *
+ * There is unfortunately no easy way to avoid this problem
+ * on either UP or SMP. This definitively slows down the
+ * pfm_load_regs().
*/
- if (ctx->ctx_fl_frozen == 0) {
- ia64_set_pmc(0, 0);
- ia64_srlz_d();
- /* place where we potentially (kernel level) start monitoring again */
- }
-}
+
+ /*
+ * We must include all the PMC in this mask to make sure we don't
+ * see any side effect of the stale state, such as opcode matching
+ * or range restrictions, for instance.
+ */
+ ctx->ctx_reload_pmcs[0] = pmu_conf.impl_regs[0];
+ /*
+ * useful in case of re-enable after disable
+ */
+ ctx->ctx_used_pmds[0] = 0UL;
+ ctx->ctx_used_ibrs[0] = 0UL;
+ ctx->ctx_used_dbrs[0] = 0UL;
+
+ ia64_srlz_d();
+}
/*
* This function is called when a thread exits (from exit_thread()).
* This is a simplified pfm_save_regs() that simply flushes the current
* register state into the save area taking into account any pending
- * overflow. This time no notification is sent because the taks is dying
+ * overflow. This time no notification is sent because the task is dying
* anyway. The inline processing of overflows avoids loosing some counts.
* The PMU is frozen on exit from this call and is to never be reenabled
* again for this task.
+ *
*/
void
-pfm_flush_regs (struct task_struct *ta)
+pfm_flush_regs (struct task_struct *task)
{
pfm_context_t *ctx;
- u64 pmc0, psr, mask;
- int i,j;
+ u64 pmc0;
+ unsigned long mask, mask2, val;
+ int i;
- if (ta == NULL) {
- panic(__FUNCTION__" task is NULL\n");
- }
- ctx = ta->thread.pfm_context;
- if (ctx == NULL) {
- panic(__FUNCTION__" no PFM ctx is NULL\n");
- }
- /*
- * We must make sure that we don't loose any potential overflow
- * interrupt while saving PMU context. In this code, external
- * interrupts are always enabled.
- */
+ ctx = task->thread.pfm_context;
- /*
- * save current PSR: needed because we modify it
+ if (ctx == NULL) return;
+
+ /*
+ * that's it if context already disabled
*/
- __asm__ __volatile__ ("mov %0=psr;;": "=r"(psr) :: "memory");
+ if (ctx->ctx_flags.state == PFM_CTX_DISABLED) return;
/*
* stop monitoring:
@@ -2090,7 +3332,27 @@
* in kernel.
* By now, we could still have an overflow interrupt in-flight.
*/
- __asm__ __volatile__ ("rsm psr.up;;"::: "memory");
+ if (ctx->ctx_fl_system) {
+
+ __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
+
+ /* disable dcr pp */
+ ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
+
+#ifdef CONFIG_SMP
+ local_cpu_data->pfm_syst_wide = 0;
+ local_cpu_data->pfm_dcr_pp = 0;
+#else
+ pfm_tasklist_toggle_pp(0);
+#endif
+
+ } else {
+
+ __asm__ __volatile__ ("rum psr.up;;"::: "memory");
+
+ /* no more save/restore on ctxsw */
+ current->thread.flags &= ~IA64_THREAD_PM_VALID;
+ }
/*
* Mark the PMU as not owned
@@ -2121,85 +3383,68 @@
ia64_srlz_d();
/*
- * restore PSR for context switch to save
+ * We don't need to restore psr, because we are on our way out anyway
*/
- __asm__ __volatile__ ("mov psr.l=%0;;srlz.i;"::"r"(psr): "memory");
/*
* This loop flushes the PMD into the PFM context.
- * IT also processes overflow inline.
+ * It also processes overflow inline.
*
* IMPORTANT: No notification is sent at this point as the process is dying.
* The implicit notification will come from a SIGCHILD or a return from a
* waitpid().
*
- * XXX: must take holes into account
*/
- mask = pmc0 >> PMU_FIRST_COUNTER;
- for (i=0,j=PMU_FIRST_COUNTER; i< pmu_conf.max_counters; i++,j++) {
-
- /* collect latest results */
- ctx->ctx_pmds[i].val += ia64_get_pmd(j) & pmu_conf.perf_ovfl_val;
-
- /*
- * now everything is in ctx_pmds[] and we need
- * to clear the saved context from save_regs() such that
- * pfm_read_pmds() gets the correct value
- */
- ta->thread.pmd[j] = 0;
- /* take care of overflow inline */
- if (mask & 0x1) {
- ctx->ctx_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
- DBprintk((" PMD[%d] overflowed pmd=0x%lx pmds.val=0x%lx\n",
- j, ia64_get_pmd(j), ctx->ctx_pmds[i].val));
- }
- mask >>=1;
- }
-}
+ if (atomic_read(&ctx->ctx_last_cpu) != smp_processor_id())
+ printk("perfmon: [%d] last_cpu=%d\n", task->pid, atomic_read(&ctx->ctx_last_cpu));
-/*
- * XXX: this routine is not very portable for PMCs
- * XXX: make this routine able to work with non current context
- */
-static void
-ia64_reset_pmu(void)
-{
- int i;
+ mask = pmc0 >> PMU_FIRST_COUNTER;
+ mask2 = ctx->ctx_used_pmds[0] >> PMU_FIRST_COUNTER;
- /* PMU is frozen, no pending overflow bits */
- ia64_set_pmc(0,1);
+ for (i = PMU_FIRST_COUNTER; mask2; i++, mask>>=1, mask2>>=1) {
- /* extra overflow bits + counter configs cleared */
- for(i=1; i< PMU_FIRST_COUNTER + pmu_conf.max_counters ; i++) {
- ia64_set_pmc(i,0);
- }
+ /* skip non used pmds */
+ if ((mask2 & 0x1) == 0) continue;
- /* opcode matcher set to all 1s */
- ia64_set_pmc(8,~0);
- ia64_set_pmc(9,~0);
+ val = ia64_get_pmd(i);
- /* I-EAR config cleared, plm=0 */
- ia64_set_pmc(10,0);
+ if (PMD_IS_COUNTING(i)) {
- /* D-EAR config cleared, PMC[11].pt must be 1 */
- ia64_set_pmc(11,1 << 28);
+ DBprintk(("[%d] pmd[%d] soft_pmd=0x%lx hw_pmd=0x%lx\n", task->pid, i, ctx->ctx_soft_pmds[i].val, val & pmu_conf.perf_ovfl_val));
- /* BTB config. plm=0 */
- ia64_set_pmc(12,0);
+ /* collect latest results */
+ ctx->ctx_soft_pmds[i].val += val & pmu_conf.perf_ovfl_val;
- /* Instruction address range, PMC[13].ta must be 1 */
- ia64_set_pmc(13,1);
+ /*
+ * now everything is in ctx_soft_pmds[] and we need
+ * to clear the saved context from save_regs() such that
+ * pfm_read_pmds() gets the correct value
+ */
+ task->thread.pmd[i] = 0;
- /* clears all PMD registers */
- for(i=0;i< pmu_conf.num_pmds; i++) {
- if (PMD_IS_IMPL(i)) ia64_set_pmd(i,0);
+ /* take care of overflow inline */
+ if (mask & 0x1) {
+ ctx->ctx_soft_pmds[i].val += 1 + pmu_conf.perf_ovfl_val;
+ DBprintk(("[%d] pmd[%d] overflowed soft_pmd=0x%lx\n",
+ task->pid, i, ctx->ctx_soft_pmds[i].val));
+ }
+ } else {
+ DBprintk(("[%d] pmd[%d] hw_pmd=0x%lx\n", task->pid, i, val));
+ /* not a counter, just save value as is */
+ task->thread.pmd[i] = val;
+ }
}
- ia64_srlz_d();
+ /*
+ * indicates that context has been saved
+ */
+ atomic_set(&ctx->ctx_last_cpu, -1);
+
}
+
/*
- * task is the newly created task
+ * task is the newly created task, pt_regs for new child
*/
int
pfm_inherit(struct task_struct *task, struct pt_regs *regs)
@@ -2207,25 +3452,29 @@
pfm_context_t *ctx = current->thread.pfm_context;
pfm_context_t *nctx;
struct thread_struct *th = &task->thread;
- int i, cnum;
+ unsigned long m;
+ int i;
/*
- * bypass completely for system wide
+ * make sure child cannot mess up the monitoring session
*/
- if (pfs_info.pfs_sys_session) {
- DBprintk((" enabling psr.pp for %d\n", task->pid));
- ia64_psr(regs)->pp = pfs_info.pfs_pp;
- return 0;
- }
+ ia64_psr(regs)->sp = 1;
+ DBprintk(("enabling psr.sp for [%d]\n", task->pid));
+
+ /*
+ * remove any sampling buffer mapping from child user
+ * address space. Must be done for all cases of inheritance.
+ */
+ if (ctx->ctx_smpl_vaddr) pfm_remove_smpl_mapping(task);
/*
* takes care of easiest case first
*/
if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_NONE) {
- DBprintk((" removing PFM context for %d\n", task->pid));
+ DBprintk(("removing PFM context for [%d]\n", task->pid));
task->thread.pfm_context = NULL;
- task->thread.pfm_must_block = 0;
- atomic_set(&task->thread.pfm_notifiers_check, 0);
+ task->thread.pfm_ovfl_block_reset = 0;
+
/* copy_thread() clears IA64_THREAD_PM_VALID */
return 0;
}
@@ -2235,45 +3484,81 @@
/* copy content */
*nctx = *ctx;
+
if (CTX_INHERIT_MODE(ctx) == PFM_FL_INHERIT_ONCE) {
nctx->ctx_fl_inherit = PFM_FL_INHERIT_NONE;
- atomic_set(&task->thread.pfm_notifiers_check, 0);
- DBprintk((" downgrading to INHERIT_NONE for %d\n", task->pid));
- pfs_info.pfs_proc_sessions++;
+ atomic_set(&nctx->ctx_last_cpu, -1);
+
+ /*
+ * task is not yet visible in the tasklist, so we do
+ * not need to lock the newly created context.
+ * However, we must grab the tasklist_lock to ensure
+ * that the ctx_owner or ctx_notify_task do not disappear
+ * while we increment their check counters.
+ */
+ read_lock(&tasklist_lock);
+
+ if (nctx->ctx_notify_task)
+ atomic_inc(&nctx->ctx_notify_task->thread.pfm_notifiers_check);
+
+ if (nctx->ctx_owner)
+ atomic_inc(&nctx->ctx_owner->thread.pfm_owners_check);
+
+ read_unlock(&tasklist_lock);
+
+ DBprintk(("downgrading to INHERIT_NONE for [%d]\n", task->pid));
+
+ LOCK_PFS();
+ pfm_sessions.pfs_task_sessions++;
+ UNLOCK_PFS();
}
/* initialize counters in new context */
- for(i=0, cnum= PMU_FIRST_COUNTER; i < pmu_conf.max_counters; cnum++, i++) {
- nctx->ctx_pmds[i].val = nctx->ctx_pmds[i].ival & ~pmu_conf.perf_ovfl_val;
- th->pmd[cnum] = nctx->ctx_pmds[i].ival & pmu_conf.perf_ovfl_val;
+ m = pmu_conf.counter_pmds[0] >> PMU_FIRST_COUNTER;
+ for(i = PMU_FIRST_COUNTER ; m ; m>>=1, i++) {
+ if (m & 0x1) {
+ nctx->ctx_soft_pmds[i].val = nctx->ctx_soft_pmds[i].ival & ~pmu_conf.perf_ovfl_val;
+ th->pmd[i] = nctx->ctx_soft_pmds[i].ival & pmu_conf.perf_ovfl_val;
+ }
}
/* clear BTB index register */
th->pmd[16] = 0;
/* if sampling then increment number of users of buffer */
- if (nctx->ctx_smpl_buf) {
- atomic_inc(&nctx->ctx_smpl_buf->psb_refcnt);
+ if (nctx->ctx_psb) {
+
+ /*
+ * XXX: nopt very pretty!
+ */
+ LOCK_PSB(nctx->ctx_psb);
+ nctx->ctx_psb->psb_refcnt++;
+ UNLOCK_PSB(nctx->ctx_psb);
+ /*
+ * remove any pointer to sampling buffer mapping
+ */
+ nctx->ctx_smpl_vaddr = 0;
}
nctx->ctx_fl_frozen = 0;
- nctx->ctx_ovfl_regs = 0;
+ nctx->ctx_ovfl_regs[0] = 0UL;
+
sema_init(&nctx->ctx_restart_sem, 0); /* reset this semaphore to locked */
/* clear pending notification */
- th->pfm_must_block = 0;
+ th->pfm_ovfl_block_reset = 0;
/* link with new task */
- th->pfm_context = nctx;
+ th->pfm_context = nctx;
- DBprintk((" nctx=%p for process %d\n", (void *)nctx, task->pid));
+ DBprintk(("nctx=%p for process [%d]\n", (void *)nctx, task->pid));
/*
* the copy_thread routine automatically clears
* IA64_THREAD_PM_VALID, so we need to reenable it, if it was used by the caller
*/
if (current->thread.flags & IA64_THREAD_PM_VALID) {
- DBprintk((" setting PM_VALID for %d\n", task->pid));
+ DBprintk(("setting PM_VALID for [%d]\n", task->pid));
th->flags |= IA64_THREAD_PM_VALID;
}
@@ -2281,100 +3566,249 @@
}
/*
- * called from release_thread(), at this point this task is not in the
- * tasklist anymore
+ *
+ * We cannot touch any of the PMU registers at this point as we may
+ * not be running on the same CPU the task was last run on. Therefore
+ * it is assumed that the PMU has been stopped appropriately in
+ * pfm_flush_regs() called from exit_thread().
+ *
+ * The function is called in the context of the parent via a release_thread()
+ * and wait4(). The task is not in the tasklist anymore.
*/
void
pfm_context_exit(struct task_struct *task)
{
pfm_context_t *ctx = task->thread.pfm_context;
- if (!ctx) {
- DBprintk((" invalid context for %d\n", task->pid));
- return;
- }
+ /*
+ * check sampling buffer
+ */
+ if (ctx->ctx_psb) {
+ pfm_smpl_buffer_desc_t *psb = ctx->ctx_psb;
+
+ LOCK_PSB(psb);
+
+ DBprintk(("sampling buffer from [%d] @%p size %ld vma_flag=0x%x\n",
+ task->pid,
+ psb->psb_hdr, psb->psb_size, psb->psb_flags));
+
+ /*
+ * in the case where we are the last user, we may be able to free
+ * the buffer
+ */
+ psb->psb_refcnt--;
+
+ if (psb->psb_refcnt == 0) {
+
+ /*
+ * The flag is cleared in pfm_vm_close(). which gets
+ * called from do_exit() via exit_mm().
+ * By the time we come here, the task has no more mm context.
+ *
+ * We can only free the psb and buffer here after the vm area
+ * describing the buffer has been removed. This normally happens
+ * as part of do_exit() but the entire mm context is ONLY removed
+ * once its reference counts goes to zero. This is typically
+ * the case except for multi-threaded (several tasks) processes.
+ *
+ * See pfm_vm_close() and pfm_cleanup_smpl_buf() for more details.
+ */
+ if ((psb->psb_flags & PFM_PSB_VMA) == 0) {
+
+ DBprintk(("cleaning sampling buffer from [%d] @%p size %ld\n",
+ task->pid,
+ psb->psb_hdr, psb->psb_size));
+
+ /*
+ * free the buffer and psb
+ */
+ pfm_rvfree(psb->psb_hdr, psb->psb_size);
+ kfree(psb);
+ psb = NULL;
+ }
+ }
+ /* psb may have been deleted */
+ if (psb) UNLOCK_PSB(psb);
+ }
+
+ DBprintk(("cleaning [%d] pfm_context @%p notify_task=%p check=%d mm=%p\n",
+ task->pid, ctx,
+ ctx->ctx_notify_task,
+ atomic_read(&task->thread.pfm_notifiers_check), task->mm));
- /* check is we have a sampling buffer attached */
- if (ctx->ctx_smpl_buf) {
- pfm_smpl_buffer_desc_t *psb = ctx->ctx_smpl_buf;
-
- /* if only user left, then remove */
- DBprintk((" [%d] [%d] psb->refcnt=%d\n", current->pid, task->pid, psb->psb_refcnt.counter));
-
- if (atomic_dec_and_test(&psb->psb_refcnt) ) {
- rvfree(psb->psb_hdr, psb->psb_size);
- vfree(psb);
- DBprintk((" [%d] cleaning [%d] sampling buffer\n", current->pid, task->pid ));
- }
- }
- DBprintk((" [%d] cleaning [%d] pfm_context @%p\n", current->pid, task->pid, (void *)ctx));
-
- /*
- * To avoid getting the notified task scan the entire process list
- * when it exits because it would have pfm_notifiers_check set, we
- * decrease it by 1 to inform the task, that one less task is going
- * to send it notification. each new notifer increases this field by
- * 1 in pfm_context_create(). Of course, there is race condition between
- * decreasing the value and the notified task exiting. The danger comes
- * from the fact that we have a direct pointer to its task structure
- * thereby bypassing the tasklist. We must make sure that if we have
- * notify_task!= NULL, the target task is still somewhat present. It may
- * already be detached from the tasklist but that's okay. Note that it is
- * okay if we 'miss the deadline' and the task scans the list for nothing,
- * it will affect performance but not correctness. The correctness is ensured
- * by using the notify_lock whic prevents the notify_task from changing on us.
- * Once holdhing this lock, if we see notify_task!= NULL, then it will stay like
+ /*
+ * To avoid getting the notified task or owner task scan the entire process
+ * list when they exit, we decrement notifiers_check and owners_check respectively.
+ *
+ * Of course, there is race condition between decreasing the value and the
+ * task exiting. The danger comes from the fact that, in both cases, we have a
+ * direct pointer to a task structure thereby bypassing the tasklist.
+ * We must make sure that, if we have task!= NULL, the target task is still
+ * present and is identical to the initial task specified
+ * during pfm_create_context(). It may already be detached from the tasklist but
+ * that's okay. Note that it is okay if we miss the deadline and the task scans
+ * the list for nothing, it will affect performance but not correctness.
+ * The correctness is ensured by using the ctx_lock which prevents the
+ * notify_task from changing the fields in our context.
+ * Once holdhing this lock, if we see task!= NULL, then it will stay like
* that until we release the lock. If it is NULL already then we came too late.
*/
- spin_lock(&ctx->ctx_notify_lock);
+ LOCK_CTX(ctx);
- if (ctx->ctx_notify_task) {
- DBprintk((" [%d] [%d] atomic_sub on [%d] notifiers=%u\n", current->pid, task->pid,
- ctx->ctx_notify_task->pid,
- atomic_read(&ctx->ctx_notify_task->thread.pfm_notifiers_check)));
+ if (ctx->ctx_notify_task != NULL) {
+ DBprintk(("[%d], [%d] atomic_sub on [%d] notifiers=%u\n", current->pid,
+ task->pid,
+ ctx->ctx_notify_task->pid,
+ atomic_read(&ctx->ctx_notify_task->thread.pfm_notifiers_check)));
+
+ atomic_dec(&ctx->ctx_notify_task->thread.pfm_notifiers_check);
+ }
+
+ if (ctx->ctx_owner != NULL) {
+ DBprintk(("[%d], [%d] atomic_sub on [%d] owners=%u\n",
+ current->pid,
+ task->pid,
+ ctx->ctx_owner->pid,
+ atomic_read(&ctx->ctx_owner->thread.pfm_owners_check)));
- atomic_sub(1, &ctx->ctx_notify_task->thread.pfm_notifiers_check);
+ atomic_dec(&ctx->ctx_owner->thread.pfm_owners_check);
}
- spin_unlock(&ctx->ctx_notify_lock);
+ UNLOCK_CTX(ctx);
+
+ LOCK_PFS();
if (ctx->ctx_fl_system) {
- /*
- * if included interrupts (true by default), then reset
- * to get default value
- */
- if (ctx->ctx_fl_exclintr == 0) {
- /*
- * reload kernel default DCR value
- */
- ia64_set_dcr(pfs_info.pfs_dfl_dcr);
- DBprintk((" restored dcr to 0x%lx\n", pfs_info.pfs_dfl_dcr));
+
+ pfm_sessions.pfs_sys_session[ctx->ctx_cpu] = NULL;
+ pfm_sessions.pfs_sys_sessions--;
+ DBprintk(("freeing syswide session on CPU%ld\n", ctx->ctx_cpu));
+ /* update perfmon debug register counter */
+ if (ctx->ctx_fl_using_dbreg) {
+ if (pfm_sessions.pfs_sys_use_dbregs == 0) {
+ printk("perfmon: invalid release for [%d] sys_use_dbregs=0\n", task->pid);
+ } else
+ pfm_sessions.pfs_sys_use_dbregs--;
}
- /*
- * free system wide session slot
- */
- pfs_info.pfs_sys_session = 0;
+
+ /*
+ * remove any CPU pinning
+ */
+ task->cpus_allowed = ctx->ctx_saved_cpus_allowed;
+ task->need_resched = 1;
} else {
- pfs_info.pfs_proc_sessions--;
+ pfm_sessions.pfs_task_sessions--;
}
+ UNLOCK_PFS();
pfm_context_free(ctx);
/*
* clean pfm state in thread structure,
*/
- task->thread.pfm_context = NULL;
- task->thread.pfm_must_block = 0;
+ task->thread.pfm_context = NULL;
+ task->thread.pfm_ovfl_block_reset = 0;
+
/* pfm_notifiers is cleaned in pfm_cleanup_notifiers() */
+}
+
+/*
+ * function invoked from release_thread when pfm_smpl_buf_list is not NULL
+ */
+int
+pfm_cleanup_smpl_buf(struct task_struct *task)
+{
+ pfm_smpl_buffer_desc_t *tmp, *psb = task->thread.pfm_smpl_buf_list;
+ if (psb == NULL) {
+ printk("perfmon: psb is null in [%d]\n", current->pid);
+ return -1;
+ }
+ /*
+ * Walk through the list and free the sampling buffer and psb
+ */
+ while (psb) {
+ DBprintk(("[%d] freeing smpl @%p size %ld\n", current->pid, psb->psb_hdr, psb->psb_size));
+
+ pfm_rvfree(psb->psb_hdr, psb->psb_size);
+ tmp = psb->psb_next;
+ kfree(psb);
+ psb = tmp;
+ }
+
+ /* just in case */
+ task->thread.pfm_smpl_buf_list = NULL;
+
+ return 0;
+}
+
+/*
+ * function invoked from release_thread to make sure that the ctx_owner field does not
+ * point to an unexisting task.
+ */
+void
+pfm_cleanup_owners(struct task_struct *task)
+{
+ struct task_struct *p;
+ pfm_context_t *ctx;
+
+ DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
+
+ read_lock(&tasklist_lock);
+
+ for_each_task(p) {
+ /*
+ * It is safe to do the 2-step test here, because thread.ctx
+ * is cleaned up only in release_thread() and at that point
+ * the task has been detached from the tasklist which is an
+ * operation which uses the write_lock() on the tasklist_lock
+ * so it cannot run concurrently to this loop. So we have the
+ * guarantee that if we find p and it has a perfmon ctx then
+ * it is going to stay like this for the entire execution of this
+ * loop.
+ */
+ ctx = p->thread.pfm_context;
+
+ //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
+
+ if (ctx && ctx->ctx_owner == task) {
+ DBprintk(("trying for owner [%d] in [%d]\n", task->pid, p->pid));
+ /*
+ * the spinlock is required to take care of a race condition
+ * with the send_sig_info() call. We must make sure that
+ * either the send_sig_info() completes using a valid task,
+ * or the notify_task is cleared before the send_sig_info()
+ * can pick up a stale value. Note that by the time this
+ * function is executed the 'task' is already detached from the
+ * tasklist. The problem is that the notifiers have a direct
+ * pointer to it. It is okay to send a signal to a task in this
+ * stage, it simply will have no effect. But it is better than sending
+ * to a completely destroyed task or worse to a new task using the same
+ * task_struct address.
+ */
+ LOCK_CTX(ctx);
+
+ ctx->ctx_owner = NULL;
+
+ UNLOCK_CTX(ctx);
+
+ DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
+ }
+ }
+ read_unlock(&tasklist_lock);
}
+
+/*
+ * function called from release_thread to make sure that the ctx_notify_task is not pointing
+ * to an unexisting task
+ */
void
pfm_cleanup_notifiers(struct task_struct *task)
{
struct task_struct *p;
pfm_context_t *ctx;
- DBprintk((" [%d] called\n", task->pid));
+ DBprintk(("called by [%d] for [%d]\n", current->pid, task->pid));
read_lock(&tasklist_lock);
@@ -2391,10 +3825,10 @@
*/
ctx = p->thread.pfm_context;
- DBprintk((" [%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
+ //DBprintk(("[%d] scanning task [%d] ctx=%p\n", task->pid, p->pid, ctx));
if (ctx && ctx->ctx_notify_task == task) {
- DBprintk((" trying for notifier %d in %d\n", task->pid, p->pid));
+ DBprintk(("trying for notifier [%d] in [%d]\n", task->pid, p->pid));
/*
* the spinlock is required to take care of a race condition
* with the send_sig_info() call. We must make sure that
@@ -2408,23 +3842,146 @@
* to a completely destroyed task or worse to a new task using the same
* task_struct address.
*/
- spin_lock(&ctx->ctx_notify_lock);
+ LOCK_CTX(ctx);
ctx->ctx_notify_task = NULL;
- spin_unlock(&ctx->ctx_notify_lock);
+ UNLOCK_CTX(ctx);
- DBprintk((" done for notifier %d in %d\n", task->pid, p->pid));
+ DBprintk(("done for notifier [%d] in [%d]\n", task->pid, p->pid));
}
}
read_unlock(&tasklist_lock);
+}
+
+static struct irqaction perfmon_irqaction = {
+ handler: perfmon_interrupt,
+ flags: SA_INTERRUPT,
+ name: "perfmon"
+};
+
+static void
+pfm_pmu_snapshot(void)
+{
+ int i;
+
+ for (i=0; i < IA64_NUM_PMC_REGS; i++) {
+ if (i >= pmu_conf.num_pmcs) break;
+ if (PMC_IS_IMPL(i)) reset_pmcs[i] = ia64_get_pmc(i);
+ }
+}
+
+/*
+ * perfmon initialization routine, called from the initcall() table
+ */
+int __init
+perfmon_init (void)
+{
+ pal_perf_mon_info_u_t pm_info;
+ s64 status;
+
+ register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+
+ ia64_set_pmv(IA64_PERFMON_VECTOR);
+ ia64_srlz_d();
+
+ pmu_conf.pfm_is_disabled = 1;
+
+ printk("perfmon: version %u.%u (sampling format v%u.%u) IRQ %u\n",
+ PFM_VERSION_MAJ,
+ PFM_VERSION_MIN,
+ PFM_SMPL_VERSION_MAJ,
+ PFM_SMPL_VERSION_MIN,
+ IA64_PERFMON_VECTOR);
+
+ if ((status=ia64_pal_perf_mon_info(pmu_conf.impl_regs, &pm_info)) != 0) {
+ printk("perfmon: PAL call failed (%ld), perfmon disabled\n", status);
+ return -1;
+ }
+
+ pmu_conf.perf_ovfl_val = (1UL << pm_info.pal_perf_mon_info_s.width) - 1;
+ pmu_conf.max_counters = pm_info.pal_perf_mon_info_s.generic;
+ pmu_conf.num_pmcs = find_num_pm_regs(pmu_conf.impl_regs);
+ pmu_conf.num_pmds = find_num_pm_regs(&pmu_conf.impl_regs[4]);
+
+ printk("perfmon: %u bits counters\n", pm_info.pal_perf_mon_info_s.width);
+
+ printk("perfmon: %lu PMC/PMD pairs, %lu PMCs, %lu PMDs\n",
+ pmu_conf.max_counters, pmu_conf.num_pmcs, pmu_conf.num_pmds);
+
+ /* sanity check */
+ if (pmu_conf.num_pmds >= IA64_NUM_PMD_REGS || pmu_conf.num_pmcs >= IA64_NUM_PMC_REGS) {
+ printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon is DISABLED\n");
+ return -1; /* no need to continue anyway */
+ }
+
+ if (ia64_pal_debug_info(&pmu_conf.num_ibrs, &pmu_conf.num_dbrs)) {
+ printk(KERN_WARNING "perfmon: unable to get number of debug registers\n");
+ pmu_conf.num_ibrs = pmu_conf.num_dbrs = 0;
+ }
+ /* PAL reports the number of pairs */
+ pmu_conf.num_ibrs <<=1;
+ pmu_conf.num_dbrs <<=1;
+
+ /*
+ * take a snapshot of all PMU registers. PAL is supposed
+ * to configure them with stable/safe values, i.e., not
+ * capturing anything.
+ * We take a snapshot now, before we make any modifications. This
+ * will become our master copy. Then we will reuse the snapshot
+ * to reset the PMU in pfm_enable(). Using this technique, perfmon
+ * does NOT have to know about the specific values to program for
+ * the PMC/PMD. The safe values may be different from one CPU model to
+ * the other.
+ */
+ pfm_pmu_snapshot();
+
+ /*
+ * list the pmc registers used to control monitors
+ * XXX: unfortunately this information is not provided by PAL
+ *
+ * We start with the architected minimum and then refine for each CPU model
+ */
+ pmu_conf.monitor_pmcs[0] = PMM(4)|PMM(5)|PMM(6)|PMM(7);
+
+ /*
+ * architected counters
+ */
+ pmu_conf.counter_pmds[0] |= PMM(4)|PMM(5)|PMM(6)|PMM(7);
+
+#ifdef CONFIG_ITANIUM
+ pmu_conf.monitor_pmcs[0] |= PMM(10)|PMM(11)|PMM(12);
+ /* Itanium does not add more counters */
+#endif
+ /* we are all set */
+ pmu_conf.pfm_is_disabled = 0;
+
+ /*
+ * for now here for debug purposes
+ */
+ perfmon_dir = create_proc_read_entry ("perfmon", 0, 0, perfmon_read_entry, NULL);
+
+ spin_lock_init(&pfm_sessions.pfs_lock);
+
+ return 0;
+}
+
+__initcall(perfmon_init);
+
+void
+perfmon_init_percpu (void)
+{
+ ia64_set_pmv(IA64_PERFMON_VECTOR);
+ ia64_srlz_d();
}
+
#else /* !CONFIG_PERFMON */
asmlinkage int
-sys_perfmonctl (int pid, int cmd, int flags, perfmon_req_t *req, int count, long arg6, long arg7, long arg8, long stack)
+sys_perfmonctl (int pid, int cmd, void *req, int count, long arg5, long arg6,
+ long arg7, long arg8, long stack)
{
return -ENOSYS;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)