3 files changed, 119 insertions, 0 deletions
diff --git a/scripts/debug_rebuilds/README.md b/scripts/debug_rebuilds/README.md
new file mode 100644
index 0000000..05305a9
--- /dev/null
+++ b/scripts/debug_rebuilds/README.md
@@ -0,0 +1,11 @@
+To debug unexpected Nikola rebuilds:
+
+1. In `nikola.utils.config_changed._calc_digest`, uncomment the line that says `self._write_into_debug_db(digest, data)`
+2. Create a copy of your site source.
+3. Run `python step1_build_and_dumpdb.py`. (It will delete cache/output/db, build the site twice, and write dumpdb to .py files)
+4. Run `python step2_analyze_py_files.py | tee analysis.txt`. It will compare the two .py files, using `cc_debug.sqlite3` and `{first,second}_dump.py`.
+5. Compare the produced dictionaries. Note that you will probably need a character-level diff tool, <https://prettydiff.com/> is pretty good as long as you change CSS for `li.replace` to `word-break: break-all; white-space: pre-wrap;`
+
+Copyright © 2019-2020, Chris Warrick.
+Portions Copyright © Eduardo Nafuel Schettino and Doit Contributors.
+License of .py files is MIT (same as Nikola)
diff --git a/scripts/debug_rebuilds/step1_build_and_dumpdb.py b/scripts/debug_rebuilds/step1_build_and_dumpdb.py
new file mode 100755
index 0000000..69f952d
--- /dev/null
+++ b/scripts/debug_rebuilds/step1_build_and_dumpdb.py
@@ -0,0 +1,50 @@
+import dbm
+import json
+import subprocess
+import sys
+
+
+def dbm_iter(db):
+    # try dictionary interface - ok in python2 and dumbdb
+    try:
+        return db.items()
+    except Exception:
+        # try firstkey/nextkey - ok for py3 dbm.gnu
+        def iter_gdbm(db):
+            k = db.firstkey()
+            while k is not None:
+                yield k, db[k]
+                k = db.nextkey(k)
+        return iter_gdbm(db)
+
+
+def dumpdb():
+    with dbm.open('.doit.db') as data:
+        return {key: json.loads(value_str.decode('utf-8'))
+                for key, value_str in dbm_iter(data)}
+
+
+print_ = print
+
+
+def print(*args, **kwargs):
+    print_(*args, file=sys.stdout)
+    sys.stdout.flush()
+
+
+print("==> Removing stuff...")
+subprocess.call(['rm', '-rf', '.doit.db', 'output', 'cache', 'cc_debug.sqlite3'])
+print("==> Running first build...")
+subprocess.call(['nikola', 'build'])
+print("==> Fetching database...")
+first = dumpdb()
+print("==> Running second build...")
+subprocess.call(['nikola', 'build'])
+print("==> Fetching database...")
+second = dumpdb()
+print("==> Saving dumps...")
+with open('first_dump.py', 'w', encoding='utf-8') as fh:
+    fh.write(repr(first))
+
+with open('second_dump.py', 'w', encoding='utf-8') as fh:
+    fh.write(repr(second))
diff --git a/scripts/debug_rebuilds/step2_analyze_py_files.py b/scripts/debug_rebuilds/step2_analyze_py_files.py
new file mode 100755
index 0000000..cb5c954
--- /dev/null
+++ b/scripts/debug_rebuilds/step2_analyze_py_files.py
@@ -0,0 +1,58 @@
+import sqlite3
+import sys
+print_ = print
+
+
+def print(*args, **kwargs):
+    print_(*args, file=sys.stdout)
+    sys.stdout.flush()
+
+
+with open('first_dump.py', 'r', encoding='utf-8') as fh:
+    first = eval(fh.read())
+
+with open('second_dump.py', 'r', encoding='utf-8') as fh:
+    second = eval(fh.read())
+
+if len(first) != len(second):
+    print(" [!] Databases differ in size.")
+    for k in first:
+        if k not in second:
+            print("    Item", k, "not found in second database.")
+    for k in second:
+        if k not in first:
+            print("    Item", k, "not found in first database.")
+
+conn = sqlite3.connect("cc_debug.sqlite3")
+
+
+def get_from_db(value):
+    cursor = conn.cursor()
+    try:
+        cursor.execute("SELECT json_data FROM hashes WHERE hash = ?", (value,))
+        return cursor.fetchone()[0]
+    except Exception:
+        print(" [!] Cannot find", value, "in database.")
+        return None
+
+
+if first == second:
+    print("==> Both files are identical.")
+    exit(0)
+
+VAL_KEY = '_values_:'  # yes, ends with a colon
+for k in first:
+    fk, sk = first[k], second[k]
+    try:
+        first_values, second_values = fk[VAL_KEY], sk[VAL_KEY]
+    except KeyError:
+        print(" [!] Values not found for,", k)
+        continue
+
+    if first_values != second_values:
+        print(" -> Difference:", k)
+        for vk in first_values:
+            fv, sv = first_values[vk], second_values[vk]
+            if fv != sv:
+                print("    first :", fv, get_from_db(fv))
+                print("    second:", sv, get_from_db(sv))