diff -ru vanilla/arch/x86_64/kernel/setup64.c slhist/arch/x86_64/kernel/setup64.c
--- vanilla/arch/x86_64/kernel/setup64.c	2007-10-09 22:31:38.000000000 +0200
+++ slhist/arch/x86_64/kernel/setup64.c	2009-08-10 11:51:07.000000000 +0200
@@ -286,4 +286,15 @@
 	fpu_init(); 
 
 	raw_local_save_flags(kernel_eflags);
+
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM_SETUPPMC
+	printk("Setting up PMC0\n");
+	/* setup pmc to count cycles */
+    asm volatile (
+        "wrmsr"
+        :
+        : "d" (0x00000000), "a" (0x00430076), "c" (0xC0010000)
+    );
+#endif
+
 }
diff -ru vanilla/include/linux/lockdep.h slhist/include/linux/lockdep.h
--- vanilla/include/linux/lockdep.h	2007-10-09 22:31:38.000000000 +0200
+++ slhist/include/linux/lockdep.h	2009-08-10 11:51:07.000000000 +0200
@@ -123,6 +123,14 @@
 };
 
 #ifdef CONFIG_LOCK_STAT
+
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+extern long long slhist_count[64];
+extern long long slhist_loops[64];
+extern long long slhist_tsc[64];
+extern long long slhist_pmc[64];
+#endif
+
 struct lock_time {
 	s64				min;
 	s64				max;
diff -ru vanilla/kernel/lockdep_proc.c slhist/kernel/lockdep_proc.c
--- vanilla/kernel/lockdep_proc.c	2007-10-09 22:31:38.000000000 +0200
+++ slhist/kernel/lockdep_proc.c	2009-08-10 11:51:07.000000000 +0200
@@ -351,6 +351,13 @@
 
 #ifdef CONFIG_LOCK_STAT
 
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+long long slhist_count[64];
+long long slhist_loops[64];
+long long slhist_tsc[64];
+long long slhist_pmc[64];
+#endif
+
 struct lock_stat_data {
 	struct lock_class *class;
 	struct lock_class_stats stats;
@@ -421,6 +428,15 @@
 	class = data->class;
 	stats = &data->stats;
 
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+	seq_puts(m,"\n");
+	
+	for (i=0; i<64; i++)
+		seq_printf(m, "2^%02u  count= %8llu  loops= %10llu  tsc= %12llu  pmc= %12llu\n", i, slhist_count[i], slhist_loops[i], slhist_tsc[i], slhist_pmc[i]);
+
+	seq_puts(m, "\n");
+#endif
+
 	namelen = 38;
 	if (class->name_version > 1)
 		namelen -= 2; /* XXX truncates versions > 9 */
diff -ru vanilla/lib/Kconfig.debug slhist/lib/Kconfig.debug
--- vanilla/lib/Kconfig.debug	2007-10-09 22:31:38.000000000 +0200
+++ slhist/lib/Kconfig.debug	2009-08-10 11:51:44.000000000 +0200
@@ -296,6 +296,37 @@
 
 	 For more details, see Documentation/lockstat.txt
 
+config LOCK_STAT_OSRC
+	bool "OSRC's spinlock instrumentation"
+	depends on LOCK_STAT
+	help
+	 A C implementation of spinlocks. Basis for other stuff.
+
+config LOCK_STAT_HISTOGRAM
+	bool "Lock time histogram"
+	depends on LOCK_STAT && LOCK_STAT_OSRC
+	help
+	 This feature enables the output of spinlock waittime histogram 
+
+config LOCK_STAT_HISTOGRAM_SETUPPMC
+	bool "Set up PMC 0 to count cycles"
+	depends on LOCK_STAT_HISTOGRAM
+	help
+	 Use this if you want to run natively. Else Xen did this for you already.
+
+config LOCK_STAT_YIELD
+	bool "Yield hypercall in spinlock code"
+	depends on LOCK_STAT_OSRC
+	help
+	 Yield after certain amount of time to aquire a spinlock
+
+config LOCK_STAT_YIELD_THRESHOLD
+	int "Yield threshold (2^n cycles)"
+	depends on LOCK_STAT_YIELD
+	default 16
+	help
+	 Yield after every 2^n cycles unsuccessfully waiting to aquire the lock
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
diff -ru vanilla/lib/spinlock_debug.c slhist/lib/spinlock_debug.c
--- vanilla/lib/spinlock_debug.c	2007-10-09 22:31:38.000000000 +0200
+++ slhist/lib/spinlock_debug.c	2009-08-10 13:13:06.000000000 +0200
@@ -98,6 +98,135 @@
 	lock->owner_cpu = -1;
 }
 
+#if defined(CONFIG_LOCK_STAT_HISTOGRAM) || defined (CONFIG_LOCK_STAT_YIELD)
+static inline void lock_add64(long long *addr, long long val) {
+	asm volatile ( "lock addq %0, %1" ::"r"(val),"m"(*addr) );
+}
+
+static inline void lock_inc64(long long *addr) {
+	asm volatile ( "lock incq %0" ::"m"(*addr) );
+}
+
+static inline void rdtscpmc64(long long *tsc, long long *pmc) {
+	long tlo, thi, plo, phi;
+	asm volatile (
+		"rdtscp        \n"
+		"xorq %%rax, %%rax \n"
+		"xorq %%rcx, %%rcx \n"
+		"xorq %%rdx, %%rdx \n"
+		"rdpmc         \n"
+		"mov %%rdx, %2 \n"
+		"mov %%rax, %3 \n"
+		"rdtscp        \n"
+		"mov %%rdx, %0 \n"
+		"mov %%rax, %1 \n"
+		: "=r" (thi), "=r" (tlo), "=r" (phi), "=r" (plo)
+		:
+		: "rax", "rcx", "rdx"
+	);
+
+	*tsc = (thi<<32)+tlo;
+	*pmc = (phi<<32)+plo;
+}
+
+static inline long long rdtsc64(void) {
+	long lo, hi;
+	asm volatile (
+		"rdtsc  \n"
+		: "=d" (hi), "=a" (lo)
+		: "d" (0), "a" (0)
+	);
+	return (hi<<32)+lo;
+}
+
+static inline long long rdpmc64(int counter) {
+	long lo, hi;
+	asm volatile (
+		"rdpmc  \n"
+		: "=d" (hi), "=a" (lo)
+		: "c" (counter), "d" (0), "a" (0)
+	);
+	return (hi<<32)+lo;
+}
+#endif
+
+#ifdef CONFIG_LOCK_STAT_OSRC
+static void __spin_lock_debug(spinlock_t *lock)
+{
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+	long long bin = 0;
+#endif
+#if defined(CONFIG_LOCK_STAT_HISTOGRAM) || defined (CONFIG_LOCK_STAT_YIELD)
+	long long time, time_start;
+	long long pmc0, pmc0_start, pmc0_nextyield;
+#endif
+
+	// a quick way out first
+	if ( (! __raw_spin_is_locked(&lock->raw_lock)) && __raw_spin_trylock(&lock->raw_lock) ) {
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+		lock_inc64(&(slhist_count[0]));
+#endif
+		return;
+	}
+
+#if defined(CONFIG_LOCK_STAT_HISTOGRAM) || defined (CONFIG_LOCK_STAT_YIELD)
+	rdtscpmc64(&time_start, &pmc0_start);
+#ifdef CONFIG_LOCK_STAT_YIELD
+	pmc0_nextyield = pmc0_start + (1 << CONFIG_LOCK_STAT_YIELD_THRESHOLD);
+#endif
+#endif
+
+	for (;;) {
+		if ( (! __raw_spin_is_locked(&lock->raw_lock)) && __raw_spin_trylock(&lock->raw_lock) ) {
+#ifdef CONFIG_LOCK_STAT_HISTOGRAM
+			rdtscpmc64(&time, &pmc0);
+			time -= time_start;
+			pmc0 -= pmc0_start;
+
+			// test if one of the cycle counters had an overrun
+			// this happens about every 2 days uptime for the pmc, virtually never for the tsc
+			if ( (time < 0) ) {
+				lock_inc64(&slhist_tsc[63]);
+				return;
+			}
+			if ( (pmc0 < 0) ) {
+				lock_inc64(&slhist_pmc[63]);
+				return;
+			}
+
+			asm volatile (
+				"xorq %0, %0 \n"
+				"bsrq %1, %0 \n"
+				: "=r" (bin)
+				: "r"  (pmc0)
+			);
+			bin++;
+			lock_inc64(&(slhist_count[bin]));
+//			lock_add64(&(slhist_loops[bin]), loopcount);
+			lock_add64(&(slhist_tsc  [bin]), time);
+			lock_add64(&(slhist_pmc  [bin]), pmc0);
+#endif
+
+			return;
+		}
+#ifdef CONFIG_LOCK_STAT_YIELD
+		rdtscpmc64(&time, &pmc0);
+		if ( unlikely( pmc0 >= pmc0_nextyield ) ) {
+			pmc0_nextyield = pmc0 + (1 << CONFIG_LOCK_STAT_YIELD_THRESHOLD);
+			asm volatile(
+				"movq $0xbeefbeef00010001, %%rax\n\t"
+				"vmmcall\n\t"
+				:
+				:
+				: "rax"
+			);
+		}
+#endif
+	}
+}
+
+#else /* CONFIG_LOCK_STAT_OSRC */
+
 static void __spin_lock_debug(spinlock_t *lock)
 {
 	u64 i;
@@ -125,6 +254,8 @@
 	}
 }
 
+#endif /* CONFIG_LOCK_STAT_OSRC */
+
 void _raw_spin_lock(spinlock_t *lock)
 {
 	debug_spin_lock_before(lock);
