From cc64794b245a094175d817bce6b581d4dddb47cf Mon Sep 17 00:00:00 2001
From: Constantine Plotnikov <constantine.plotnikov@gmail.com>
Date: Thu, 21 Jan 2010 21:06:54 +0300
Subject: [PATCH] Added caching for loose object lookup during pack indexing

On Windows systems, file system lookup is a slow operation, so
checking each object if it exists during indexing (after receiving
the pack) could take a siginificant time. This patch introduces
CachedObjectDirectory that pre-caches lookup results.

Bug: 300397
Change-Id: I471b93f9bb3ee173eb37cae1d75e9e4eb49985e7
Signed-off-by: Constantine Plotnikov <constantine.plotnikov@gmail.com>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 .../jgit/lib/AlternateRepositoryDatabase.java |   6 +
 .../jgit/lib/CachedObjectDatabase.java        | 132 ++++++++++++++++++
 .../jgit/lib/CachedObjectDirectory.java       | 112 +++++++++++++++
 .../org/eclipse/jgit/lib/ObjectDatabase.java  |  12 ++
 .../org/eclipse/jgit/lib/ObjectDirectory.java |   5 +
 .../org/eclipse/jgit/transport/IndexPack.java |  10 +-
 6 files changed, 276 insertions(+), 1 deletion(-)
 create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDatabase.java
 create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDirectory.java

diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AlternateRepositoryDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AlternateRepositoryDatabase.java
index 311839e43..f8570a420 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AlternateRepositoryDatabase.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AlternateRepositoryDatabase.java
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2010, Constantine Plotnikov <constantine.plotnikov@gmail.com>
  * Copyright (C) 2009, Google Inc.
  * and other copyright owners as documented in the project's IP log.
  *
@@ -130,4 +131,9 @@ protected ObjectDatabase[] loadAlternates() throws IOException {
 	protected void closeAlternates(final ObjectDatabase[] alt) {
 		// Do nothing; these belong to odb to close, not us.
 	}
+
+	@Override
+	public ObjectDatabase newCachedDatabase() {
+		return odb.newCachedDatabase();
+	}
 }
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDatabase.java
new file mode 100644
index 000000000..3dcea1636
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDatabase.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2010, Constantine Plotnikov <constantine.plotnikov@gmail.com>
+ * Copyright (C) 2010, JetBrains s.r.o.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.lib;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * {@link ObjectDatabase} wrapper providing temporary lookup caching.
+ * <p>
+ * The base class for {@code ObjectDatabase}s that wrap other database instances
+ * and optimize querying for objects by caching some database dependent
+ * information. Instances of this class (or any of its subclasses) can be
+ * returned from the method {@link ObjectDatabase#newCachedDatabase()}. This
+ * class can be used in scenarios where the database does not change, or when
+ * changes in the database while some operation is in progress is an acceptable
+ * risk.
+ * <p>
+ * The default implementation delegates all requests to the wrapped database.
+ * The instance might be indirectly invalidated if the wrapped instance is
+ * closed. Closing the delegating instance does not implies closing the wrapped
+ * instance. For alternative databases, cached instances are used as well.
+ */
+public class CachedObjectDatabase extends ObjectDatabase {
+	/**
+	 * The wrapped database instance
+	 */
+	protected final ObjectDatabase wrapped;
+
+	/**
+	 * Create the delegating database instance
+	 *
+	 * @param wrapped
+	 *            the wrapped object database
+	 */
+	public CachedObjectDatabase(ObjectDatabase wrapped) {
+		this.wrapped = wrapped;
+	}
+
+	@Override
+	protected boolean hasObject1(AnyObjectId objectId) {
+		return wrapped.hasObject1(objectId);
+	}
+
+	@Override
+	protected ObjectLoader openObject1(WindowCursor curs, AnyObjectId objectId)
+			throws IOException {
+		return wrapped.openObject1(curs, objectId);
+	}
+
+	@Override
+	protected boolean hasObject2(String objectName) {
+		return wrapped.hasObject2(objectName);
+	}
+
+	@Override
+	protected ObjectDatabase[] loadAlternates() throws IOException {
+		ObjectDatabase[] loaded = wrapped.getAlternates();
+		ObjectDatabase[] result = new ObjectDatabase[loaded.length];
+		for (int i = 0; i < loaded.length; i++) {
+			result[i] = loaded[i].newCachedDatabase();
+		}
+		return result;
+	}
+
+	@Override
+	protected ObjectLoader openObject2(WindowCursor curs, String objectName,
+			AnyObjectId objectId) throws IOException {
+		return wrapped.openObject2(curs, objectName, objectId);
+	}
+
+	@Override
+	void openObjectInAllPacks1(Collection<PackedObjectLoader> out,
+			WindowCursor curs, AnyObjectId objectId) throws IOException {
+		wrapped.openObjectInAllPacks1(out, curs, objectId);
+	}
+
+	@Override
+	protected boolean tryAgain1() {
+		return wrapped.tryAgain1();
+	}
+
+	@Override
+	public ObjectDatabase newCachedDatabase() {
+		// Note that "this" is not returned since subclasses might actually do something,
+		// on closeSelf() (for example closing database connections or open repositories).
+		// The situation might become even more tricky if we will consider alternates.
+		return wrapped.newCachedDatabase();
+	}
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDirectory.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDirectory.java
new file mode 100644
index 000000000..3724f8446
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CachedObjectDirectory.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2010, Constantine Plotnikov <constantine.plotnikov@gmail.com>
+ * Copyright (C) 2010, JetBrains s.r.o.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.lib;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * The cached instance of an {@link ObjectDirectory}.
+ * <p>
+ * This class caches the list of loose objects in memory, so the file system is
+ * not queried with stat calls.
+ */
+public class CachedObjectDirectory extends CachedObjectDatabase {
+	/**
+	 * The set that contains unpacked objects identifiers, it is created when
+	 * the cached instance is created.
+	 */
+	private final ObjectIdSubclassMap<ObjectId> unpackedObjects = new ObjectIdSubclassMap<ObjectId>();
+
+	/**
+	 * The constructor
+	 *
+	 * @param wrapped
+	 *            the wrapped database
+	 */
+	public CachedObjectDirectory(ObjectDirectory wrapped) {
+		super(wrapped);
+		File objects = wrapped.getDirectory();
+		String[] fanout = objects.list();
+		if (fanout == null)
+			fanout = new String[0];
+		for (String d : fanout) {
+			if (d.length() != 2)
+				continue;
+			String[] entries = new File(objects, d).list();
+			if (entries == null)
+				continue;
+			for (String e : entries) {
+				if (e.length() != Constants.OBJECT_ID_STRING_LENGTH - 2)
+					continue;
+				try {
+					unpackedObjects.add(ObjectId.fromString(d + e));
+				} catch (IllegalArgumentException notAnObject) {
+					// ignoring the file that does not represent loose object
+				}
+			}
+		}
+	}
+
+	@Override
+	protected ObjectLoader openObject2(WindowCursor curs, String objectName,
+			AnyObjectId objectId) throws IOException {
+		if (unpackedObjects.get(objectId) == null)
+			return null;
+		return super.openObject2(curs, objectName, objectId);
+	}
+
+	@Override
+	protected boolean hasObject1(AnyObjectId objectId) {
+		if (unpackedObjects.get(objectId) != null)
+			return true; // known to be loose
+		return super.hasObject1(objectId);
+	}
+
+	@Override
+	protected boolean hasObject2(String name) {
+		return false; // loose objects were tested by hasObject1
+	}
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDatabase.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDatabase.java
index 21b7b9dc9..7eac79fb7 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDatabase.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDatabase.java
@@ -380,4 +380,16 @@ protected void closeAlternates(ObjectDatabase[] alt) {
 			d.close();
 		}
 	}
+
+	/**
+	 * Create a new cached database instance over this database. This instance might
+	 * optimize queries by caching some information about database. So some modifications
+	 * done after instance creation might fail to be noticed.
+	 *
+	 * @return new cached database instance
+	 * @see CachedObjectDatabase
+	 */
+	public ObjectDatabase newCachedDatabase() {
+		return new CachedObjectDatabase(this);
+	}
 }
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDirectory.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDirectory.java
index a6fdbfae5..a8d6dda06 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDirectory.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectDirectory.java
@@ -541,4 +541,9 @@ boolean tryAgain(final long currLastModified) {
 			return true;
 		}
 	}
+
+	@Override
+	public ObjectDatabase newCachedDatabase() {
+		return new CachedObjectDirectory(this);
+	}
 }
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java
index 6e0f22807..7c94767b4 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/IndexPack.java
@@ -68,6 +68,7 @@
 import org.eclipse.jgit.lib.InflaterCache;
 import org.eclipse.jgit.lib.MutableObjectId;
 import org.eclipse.jgit.lib.ObjectChecker;
+import org.eclipse.jgit.lib.ObjectDatabase;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectIdSubclassMap;
 import org.eclipse.jgit.lib.ObjectLoader;
@@ -130,6 +131,11 @@ public static IndexPack create(final Repository db, final InputStream is)
 
 	private final Repository repo;
 
+	/**
+	 * Object database used for loading existing objects
+	 */
+	private final ObjectDatabase objectDatabase;
+
 	private Inflater inflater;
 
 	private final MessageDigest objectDigest;
@@ -199,6 +205,7 @@ public static IndexPack create(final Repository db, final InputStream is)
 	public IndexPack(final Repository db, final InputStream src,
 			final File dstBase) throws IOException {
 		repo = db;
+		objectDatabase = db.getObjectDatabase().newCachedDatabase();
 		in = src;
 		inflater = InflaterCache.get();
 		readCurs = new WindowCursor();
@@ -350,6 +357,7 @@ public void index(final ProgressMonitor progress) throws IOException {
 					InflaterCache.release(inflater);
 				} finally {
 					inflater = null;
+					objectDatabase.close();
 				}
 				readCurs = WindowCursor.release(readCurs);
 
@@ -756,7 +764,7 @@ private void verifySafeObject(final AnyObjectId id, final int type,
 			}
 		}
 
-		final ObjectLoader ldr = repo.openObject(readCurs, id);
+		final ObjectLoader ldr = objectDatabase.openObject(readCurs, id);
 		if (ldr != null) {
 			final byte[] existingData = ldr.getCachedBytes();
 			if (ldr.getType() != type || !Arrays.equals(data, existingData)) {
-- 
GitLab