From b0772d7a5c74d68a31697ebc971438faa7a132f5 Mon Sep 17 00:00:00 2001
From: Christian Halstrick <christian.halstrick@sap.com>
Date: Mon, 19 Oct 2009 17:10:10 +0200
Subject: [PATCH] Add performance tests for MyersDiff

Add some tests which make sure that the diff algorithm really behaves in the
promised O(N*D) manner. This tests compute diffs between multiple big chunks
of data, measure time for computing the diffs and fail if the measured times
are off O(N*D) by more than a factor 10

Signed-off-by: Christian Halstrick <christian.halstrick@sap.com>
Change-Id: I8e1e0be60299472828718371b231f1d8a9dc21a7
Signed-off-by: Robin Rosenberg <robin.rosenberg@dewire.com>
---
 .../jgit/diff/DiffTestDataGenerator.java      |  90 ++++++++
 .../jgit/diff/MyersDiffPerformanceTest.java   | 196 ++++++++++++++++++
 .../eclipse/jgit/util/CPUTimeStopWatch.java   | 111 ++++++++++
 3 files changed, 397 insertions(+)
 create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java
 create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java
 create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java

diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java
new file mode 100644
index 000000000..c40311214
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/DiffTestDataGenerator.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.diff;
+
+public class DiffTestDataGenerator {
+	/**
+	 * Generate sequence of characters in ascending order. The first character
+	 * is a space. All subsequent characters have an ASCII code one greater then
+	 * the ASCII code of the preceding character. On exception: the character
+	 * following which follows '~' is again a ' '.
+	 *
+	 * @param len
+	 *            length of the String to be returned
+	 * @return the sequence of characters as String
+	 */
+	public static String generateSequence(int len) {
+		return generateSequence(len, 0, 0);
+	}
+
+	/**
+	 * Generate sequence of characters similar to the one returned by
+	 * {@link #generateSequence(int)}. But this time in each chunk of
+	 * <skipPeriod> characters the last <skipLength> characters are left out. By
+	 * calling this method twice with two different prime skipPeriod values and
+	 * short skipLength values you create test data which is similar to what
+	 * programmers do to their source code - huge files with only few
+	 * insertions/deletions/changes.
+	 *
+	 * @param len
+	 *            length of the String to be returned
+	 * @param skipPeriod
+	 * @param skipLength
+	 * @return the sequence of characters as String
+	 */
+	public static String generateSequence(int len, int skipPeriod,
+			int skipLength) {
+		StringBuilder text = new StringBuilder(len);
+		int skipStart = skipPeriod - skipLength;
+		int skippedChars = 0;
+		for (int i = 0; i - skippedChars < len; ++i) {
+			if (skipPeriod == 0 || i % skipPeriod < skipStart) {
+				text.append((char) (32 + i % 95));
+			} else {
+				skippedChars++;
+			}
+		}
+		return text.toString();
+	}
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java
new file mode 100644
index 000000000..fe63e3d18
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/MyersDiffPerformanceTest.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.diff;
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.eclipse.jgit.util.CPUTimeStopWatch;
+
+/**
+ * Test cases for the performance of the diff implementation. The tests test
+ * that the performance of the MyersDiff algorithm is really O(N*D). Means the
+ * time for computing the diff between a and b should depend on the product of
+ * a.length+b.length and the number of found differences. The tests compute
+ * diffs between chunks of different length, measure the needed time and check
+ * that time/(N*D) does not differ more than a certain factor (currently 10)
+ */
+public class MyersDiffPerformanceTest extends TestCase {
+	private static final long longTaskBoundary = 5000000000L;
+
+	private static final int minCPUTimerTicks = 10;
+
+	private static final int maxFactor = 15;
+
+	private CPUTimeStopWatch stopwatch=CPUTimeStopWatch.createInstance();
+
+	public class PerfData {
+		private NumberFormat fmt = new DecimalFormat("#.##E0");
+
+		public long runningTime;
+
+		public long D;
+
+		public long N;
+
+		private double p1 = -1;
+
+		private double p2 = -1;
+
+		public double perf1() {
+			if (p1 < 0)
+				p1 = runningTime / ((double) N * D);
+			return p1;
+		}
+
+		public double perf2() {
+			if (p2 < 0)
+				p2 = runningTime / ((double) N * D * D);
+			return p2;
+		}
+
+		public String toString() {
+			return ("diffing " + N / 2 + " bytes took " + runningTime
+					+ " ns. N=" + N + ", D=" + D + ", time/(N*D):"
+					+ fmt.format(perf1()) + ", time/(N*D^2):" + fmt
+					.format(perf2()));
+		}
+	}
+
+	public static Comparator<PerfData> getComparator(final int whichPerf) {
+		return new Comparator<PerfData>() {
+			public int compare(PerfData o1, PerfData o2) {
+				double p1 = (whichPerf == 1) ? o1.perf1() : o1.perf2();
+				double p2 = (whichPerf == 1) ? o2.perf1() : o2.perf2();
+				return (p1 < p2) ? -1 : (p1 > p2) ? 1 : 0;
+			}
+		};
+	}
+
+	public void test() {
+		if (stopwatch!=null) {
+			List<PerfData> perfData = new LinkedList<PerfData>();
+			perfData.add(test(10000));
+			perfData.add(test(20000));
+			perfData.add(test(50000));
+			perfData.add(test(80000));
+			perfData.add(test(99999));
+			perfData.add(test(999999));
+
+			Comparator<PerfData> c = getComparator(1);
+			double factor = Collections.max(perfData, c).perf1()
+					/ Collections.min(perfData, c).perf1();
+			assertTrue(
+					"minimun and maximum of performance-index t/(N*D) differed too much. Measured factor of "
+							+ factor
+							+ " (maxFactor="
+							+ maxFactor
+							+ "). Perfdata=<" + perfData.toString() + ">",
+					factor < maxFactor);
+		}
+	}
+
+	/**
+	 * Tests the performance of MyersDiff for texts which are similar (not
+	 * random data). The CPU time is measured and returned. Because of bad
+	 * accuracy of CPU time information the diffs are repeated. During each
+	 * repetition the interim CPU time is checked. The diff operation is
+	 * repeated until we have seen the CPU time clock changed its value at least
+	 * {@link #minCPUTimerTicks} times.
+	 *
+	 * @param characters
+	 *            the size of the diffed character sequences.
+	 * @return performance data
+	 */
+	private PerfData test(int characters) {
+		PerfData ret = new PerfData();
+		String a = DiffTestDataGenerator.generateSequence(characters, 971, 3);
+		String b = DiffTestDataGenerator.generateSequence(characters, 1621, 5);
+		CharArray ac = new CharArray(a);
+		CharArray bc = new CharArray(b);
+		MyersDiff myersDiff = null;
+		int cpuTimeChanges = 0;
+		long lastReadout = 0;
+		long interimTime = 0;
+		int repetitions = 0;
+		stopwatch.start();
+		while (cpuTimeChanges < minCPUTimerTicks && interimTime < longTaskBoundary) {
+			myersDiff = new MyersDiff(ac, bc);
+			repetitions++;
+			interimTime = stopwatch.readout();
+			if (interimTime != lastReadout) {
+				cpuTimeChanges++;
+				lastReadout = interimTime;
+			}
+		}
+		ret.runningTime = stopwatch.stop() / repetitions;
+		ret.N = (ac.size() + bc.size());
+		ret.D = myersDiff.getEdits().size();
+
+		return ret;
+	}
+
+	private static class CharArray implements Sequence {
+		private final char[] array;
+
+		public CharArray(String s) {
+			array = s.toCharArray();
+		}
+
+		public int size() {
+			return array.length;
+		}
+
+		public boolean equals(int i, Sequence other, int j) {
+			CharArray o = (CharArray) other;
+			return array[i] == o.array[j];
+		}
+	}
+}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java
new file mode 100644
index 000000000..55e51f710
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/CPUTimeStopWatch.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2009, Christian Halstrick <christian.halstrick@sap.com>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.util;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+
+/**
+ * A simple stopwatch which measures elapsed CPU time of the current thread. CPU
+ * time is the time spent on executing your own code plus the time spent on
+ * executing operating system calls triggered by your application.
+ * <p>
+ * This stopwatch needs a VM which supports getting CPU Time information for the
+ * current thread. The static method createInstance() will take care to return
+ * only a new instance of this class if the VM is capable of returning CPU time.
+ */
+public class CPUTimeStopWatch {
+	private long start;
+
+	private static ThreadMXBean mxBean=ManagementFactory.getThreadMXBean();
+
+	/**
+	 * use this method instead of the constructor to be sure that the underlying
+	 * VM provides all features needed by this class.
+	 *
+	 * @return a new instance of {@link #CPUTimeStopWatch()} or
+	 *         <code>null</code> if the VM does not support getting CPU time
+	 *         information
+	 */
+	public static CPUTimeStopWatch createInstance() {
+		return mxBean.isCurrentThreadCpuTimeSupported() ? new CPUTimeStopWatch()
+				: null;
+	}
+
+	/**
+	 * Starts the stopwatch. If the stopwatch is already started this will
+	 * restart the stopwatch.
+	 */
+	public void start() {
+		start = mxBean.getCurrentThreadCpuTime();
+	}
+
+	/**
+	 * Stops the stopwatch and return the elapsed CPU time in nanoseconds.
+	 * Should be called only on started stopwatches.
+	 *
+	 * @return the elapsed CPU time in nanoseconds. When called on non-started
+	 *         stopwatches (either because {@link #start()} was never called or
+	 *         {@link #stop()} was called after the last call to
+	 *         {@link #start()}) this method will return 0.
+	 */
+	public long stop() {
+		long cpuTime = readout();
+		start = 0;
+		return cpuTime;
+	}
+
+	/**
+	 * Return the elapsed CPU time in nanoseconds. In contrast to
+	 * {@link #stop()} the stopwatch will continue to run after this call.
+	 *
+	 * @return the elapsed CPU time in nanoseconds. When called on non-started
+	 *         stopwatches (either because {@link #start()} was never called or
+	 *         {@link #stop()} was called after the last call to
+	 *         {@link #start()}) this method will return 0.
+	 */
+	public long readout() {
+		return (start == 0) ? 0 : mxBean.getCurrentThreadCpuTime() - start;
+	}
+}
-- 
GitLab