GNU bug report logs

#24937 "deleting unused links" GC phase is too slow

PackageSource(s)Maintainer(s)
guix PTS Buildd Popcon
Full log

Message #13 received at 24937@debbugs.gnu.org (full text, mbox, reply):

Received: (at 24937) by debbugs.gnu.org; 11 Dec 2016 13:50:25 +0000
From debbugs-submit-bounces@debbugs.gnu.org Sun Dec 11 08:50:25 2016
Received: from localhost ([127.0.0.1]:37654 helo=debbugs.gnu.org)
	by debbugs.gnu.org with esmtp (Exim 4.84_2)
	(envelope-from <debbugs-submit-bounces@debbugs.gnu.org>)
	id 1cG4WH-0003bI-5M
	for submit@debbugs.gnu.org; Sun, 11 Dec 2016 08:50:25 -0500
Received: from eggs.gnu.org ([208.118.235.92]:53030)
 by debbugs.gnu.org with esmtp (Exim 4.84_2)
 (envelope-from <ludo@gnu.org>) id 1cG4WC-0003as-TF
 for 24937@debbugs.gnu.org; Sun, 11 Dec 2016 08:50:23 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
 (envelope-from <ludo@gnu.org>) id 1cG4Vw-0005rg-7V
 for 24937@debbugs.gnu.org; Sun, 11 Dec 2016 08:50:07 -0500
X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on eggs.gnu.org
X-Spam-Level: 
X-Spam-Status: No, score=-0.7 required=5.0 tests=BAYES_50,FAKE_REPLY_C,
 RP_MATCHES_RCVD autolearn=disabled version=3.3.2
Received: from fencepost.gnu.org ([2001:4830:134:3::e]:42762)
 by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from <ludo@gnu.org>)
 id 1cG4Vh-0005jy-VM; Sun, 11 Dec 2016 08:49:50 -0500
Received: from [81.253.18.84] (port=43514 helo=pluto)
 by fencepost.gnu.org with esmtpsa (TLS1.2:RSA_AES_256_CBC_SHA1:256)
 (Exim 4.82) (envelope-from <ludo@gnu.org>)
 id 1cG4Vc-000709-U4; Sun, 11 Dec 2016 08:49:49 -0500
From: ludo@gnu.org (Ludovic Courtès)
To: 24937@debbugs.gnu.org
Subject: Re: bug#24937: "deleting unused links" GC phase is too slow
Date: Sun, 11 Dec 2016 14:46:13 +0100
Message-ID: <87lgvm4lzu.fsf@gnu.org>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.1 (gnu/linux)
X-URL: http://www.fdn.fr/~lcourtes/
X-Revolutionary-Date: 21 Frimaire an 225 de la Révolution
X-PGP-Key-ID: 0x090B11993D9AEBB5
X-PGP-Key: http://www.fdn.fr/~lcourtes/ludovic.asc
X-PGP-Fingerprint: 3CE4 6455 8A84 FDC6 9DB4  0CFB 090B 1199 3D9A EBB5
X-OS: x86_64-unknown-linux-gnu
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="=-=-="
X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic]
X-Received-From: 2001:4830:134:3::e
X-Spam-Score: -8.0 (--------)
X-Debbugs-Envelope-To: 24937
Cc: Mark H Weaver <mhw@netris.org>
X-BeenThere: debbugs-submit@debbugs.gnu.org
X-Mailman-Version: 2.1.18
Precedence: list
List-Id: <debbugs-submit.debbugs.gnu.org>
List-Unsubscribe: <https://debbugs.gnu.org/cgi-bin/mailman/options/debbugs-submit>, 
 <mailto:debbugs-submit-request@debbugs.gnu.org?subject=unsubscribe>
List-Archive: <https://debbugs.gnu.org/cgi-bin/mailman/private/debbugs-submit/>
List-Post: <mailto:debbugs-submit@debbugs.gnu.org>
List-Help: <mailto:debbugs-submit-request@debbugs.gnu.org?subject=help>
List-Subscribe: <https://debbugs.gnu.org/cgi-bin/mailman/listinfo/debbugs-submit>, 
 <mailto:debbugs-submit-request@debbugs.gnu.org?subject=subscribe>
Errors-To: debbugs-submit-bounces@debbugs.gnu.org
Sender: "Debbugs-submit" <debbugs-submit-bounces@debbugs.gnu.org>
X-Spam-Score: -8.0 (--------)
[Message part 1 (text/plain, inline)]
Hello!

Here’s a proposed patch that follows your suggestion, Mark, but places
an upper bound on the number of directory entries loaded in memory.

On my laptop, which has ~500k entries in /gnu/store/.links, the result
is something like this (notice the inode numbers in ‘lstat’ calls):

--8<---------------cut here---------------start------------->8---
13738 write(4, "gmlo\0\0\0\0\31\0\0\0\0\0\0\0deleting unused "..., 48) = 48
13738 open("/gnu/store/.links", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC) = 8
13738 fstat(8, {st_dev=makedev(8, 2), st_ino=4014083, st_mode=S_IFDIR|0755, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=119224, st_size=60977152, st_atime=2016/12/11-12:01:59, st_mtime=2016/12/11-09:39:45, st_ctime=2016/12/11-09:39:45}) = 0
13738 getdents(8, {{d_ino=4014083, d_off=4294967296, d_reclen=24, d_name="."}
[...]
13738 lstat("/gnu/store/.links/1f2f3g8waxwymp9sl2slcfyara164i8w1y2sz3h9js2fcviv2rnc", {st_dev=makedev(8, 2), st_ino=47, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=88, st_size=41482, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
13738 lstat("/gnu/store/.links/1p25kpyfw354in4kykmgh5sy9h925hnil1jdzgxhz7n6abbws8px", {st_dev=makedev(8, 2), st_ino=65, st_mode=S_IFREG|0444, st_nlink=9, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2313, st_atime=2016/12/08-21:02:26, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:49}) = 0
13738 lstat("/gnu/store/.links/163187g637br9ys5pmshb01wjav53bs1g1a83m7c2alpdyx3yqz2", {st_dev=makedev(8, 2), st_ino=68, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=13561, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
[...]
13738 getdents(8, {{d_ino=4257114, d_off=1734093409898198492,
[...]
13738 lstat("/gnu/store/.links/1m6g06i01ybbkhjjbirjnj7fckw1b772cwygkvbd6v6zgkln7f7m", {st_dev=makedev(8, 2), st_ino=19, st_mode=S_IFREG|0444, st_nlink=4, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2553, st_atime=2016/12/08-21:02:54, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/07-00:05:19}) = 0
13738 lstat("/gnu/store/.links/1ml8n3q55ikn8h60sn67jq1y7z7mvdp5kwr33pqrid2r6kk1d4kb", {st_dev=makedev(8, 2), st_ino=30, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=2090, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:21}) = 0
13738 lstat("/gnu/store/.links/1c8cvwlqyyqby3k13cwm40g26pwca5iiz5dcj43xrgn9y91lfvc2", {st_dev=makedev(8, 2), st_ino=33, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7958, st_atime=2015/11/04-18:55:32, st_mtime=1970/01/01-01:00:01, st_ctime=2016/01/05-11:35:49}) = 0
[...]
13738 getdents(8, {{d_ino=328672,
[...]
13738 lstat("/gnu/store/.links/1l55l59dxb74himmkfzx5v63cv7287i6rjhdns1fdlwajqd73lnz", {st_dev=makedev(8, 2), st_ino=21, st_mode=S_IFREG|0444, st_nlink=31, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=615, st_atime=2016/12/08-21:02:30, st_mtime=1970/01/01-01:00:01, st_ctime=2016/12/11-01:44:47}) = 0
13738 lstat("/gnu/store/.links/1c7mm5amw743mb45f1zg4d4r3g549ch35wks9izkcgkx0jirpxsg", {st_dev=makedev(8, 2), st_ino=48, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=360, st_size=176750, st_atime=2015/03/10-11:29:06, st_mtime=1970/01/01-01:00:01, st_ctime=2015/11/25-11:29:20}) = 0
13738 lstat("/gnu/store/.links/0z5s7b0yk8mfn1np6gk3cdbmpnjgxg1g0l8vfq1aa01zwp06d3f0", {st_dev=makedev(8, 2), st_ino=61, st_mode=S_IFREG|0444, st_nlink=2, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=1440, st_atime=2016/11/03-20:37:50, st_mtime=1970/01/01-01:00:01, st_ctime=2016/11/07-00:01:57}) = 0
--8<---------------cut here---------------end--------------->8---

I can’t tell exactly how this affects performance because my machine has
an SSD where this operation takes ~3 seconds on a cold cache.  I suspect
it has performance comparable to that of doing all the ‘readdir’ at once
followed by all the ‘lstat’.

Mark, how does that sound?

I’d like to commit it soon if there are no objections.

Thanks,
Ludo’.

[Message part 2 (text/x-patch, inline)]
diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc
index 72eff52..db58603 100644
--- a/nix/libstore/gc.cc
+++ b/nix/libstore/gc.cc
@@ -545,6 +545,9 @@ void LocalStore::tryToDelete(GCState & state, const Path & path)
 }
 
 
+/* Like 'dirent', but with just what we need.  */
+typedef std::pair<Path, ino_t> MiniDirEntry;
+
 /* Unlink all files in /nix/store/.links that have a link count of 1,
    which indicates that there are no other links and so they can be
    safely deleted.  FIXME: race condition with optimisePath(): we
@@ -555,32 +558,57 @@ void LocalStore::removeUnusedLinks(const GCState & state)
     AutoCloseDir dir = opendir(linksDir.c_str());
     if (!dir) throw SysError(format("opening directory `%1%'") % linksDir);
 
+    /* Maximum number of entries stored in memory; each 'MiniDirEntry' takes
+       ~80 bytes.  */
+    const size_t maxEntries = 100000;
+
     long long actualSize = 0, unsharedSize = 0;
 
-    struct dirent * dirent;
-    while (errno = 0, dirent = readdir(dir)) {
-        checkInterrupt();
-        string name = dirent->d_name;
-        if (name == "." || name == "..") continue;
-        Path path = linksDir + "/" + name;
-
-        struct stat st;
-        if (lstat(path.c_str(), &st) == -1)
-            throw SysError(format("statting `%1%'") % path);
-
-        if (st.st_nlink != 1) {
-            unsigned long long size = st.st_blocks * 512ULL;
-            actualSize += size;
-            unsharedSize += (st.st_nlink - 1) * size;
-            continue;
-        }
-
-        printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
-
-        if (unlink(path.c_str()) == -1)
-            throw SysError(format("deleting `%1%'") % path);
-
-        state.results.bytesFreed += st.st_blocks * 512;
+    bool endOfDir = false;
+
+    while (!endOfDir) {
+	/* Read as many entries as possible at once, up to 'maxEntries'.  */
+	std::list<MiniDirEntry> entries;
+	struct dirent * dirent;
+	while (errno = 0,
+	       (entries.size() < maxEntries) && (dirent = readdir(dir))) {
+	    checkInterrupt();
+	    string name = dirent->d_name;
+	    if (name == "." || name == "..") continue;
+	    entries.emplace_back(MiniDirEntry(name, dirent->d_ino));
+	}
+
+	endOfDir = (dirent == NULL);
+
+	/* Sort entries by inode number to minimize disk seeks induced by the
+	   following 'lstat' calls.  */
+	entries.sort([] (const MiniDirEntry& e1, const MiniDirEntry& e2) {
+		return e1.second < e2.second;
+	    });
+
+	for (auto && item: entries) {
+	    checkInterrupt();
+
+	    Path path = linksDir + "/" + item.first;
+
+	    struct stat st;
+	    if (lstat(path.c_str(), &st) == -1)
+		throw SysError(format("statting `%1%'") % path);
+
+	    if (st.st_nlink != 1) {
+		unsigned long long size = st.st_blocks * 512ULL;
+		actualSize += size;
+		unsharedSize += (st.st_nlink - 1) * size;
+		continue;
+	    }
+
x+	    printMsg(lvlTalkative, format("deleting unused link `%1%'") % path);
+
+	    if (unlink(path.c_str()) == -1)
+		throw SysError(format("deleting `%1%'") % path);
+
+	    state.results.bytesFreed += st.st_blocks * 512;
+	}
     }
 
     struct stat st;

Send a report that this bug log contains spam.


debbugs.gnu.org maintainers <help-debbugs@gnu.org>. Last modified: Sun Sep 7 11:59:27 2025; Machine Name: wallace-server

GNU bug tracking system

Debbugs is free software and licensed under the terms of the GNU Public License version 2. The current version can be obtained from https://bugs.debian.org/debbugs-source/.

Copyright © 1999 Darren O. Benham, 1997,2003 nCipher Corporation Ltd, 1994-97 Ian Jackson, 2005-2017 Don Armstrong, and many other contributors.