summaryrefslogtreecommitdiffstats
path: root/scripts/debug_rebuilds
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/debug_rebuilds')
-rw-r--r--scripts/debug_rebuilds/README.md11
-rwxr-xr-xscripts/debug_rebuilds/step1_build_and_dumpdb.py50
-rwxr-xr-xscripts/debug_rebuilds/step2_analyze_py_files.py58
3 files changed, 119 insertions, 0 deletions
diff --git a/scripts/debug_rebuilds/README.md b/scripts/debug_rebuilds/README.md
new file mode 100644
index 0000000..05305a9
--- /dev/null
+++ b/scripts/debug_rebuilds/README.md
@@ -0,0 +1,11 @@
+To debug unexpected Nikola rebuilds:
+
+1. In `nikola.utils.config_changed._calc_digest`, uncomment the line that says `self._write_into_debug_db(digest, data)`
+2. Create a copy of your site source.
+3. Run `python step1_build_and_dumpdb.py`. (It will delete cache/output/db, build the site twice, and write dumpdb to .py files)
+4. Run `python step2_analyze_py_files.py | tee analysis.txt`. It will compare the two .py files, using `cc_debug.sqlite3` and `{first,second}_dump.py`.
+5. Compare the produced dictionaries. Note that you will probably need a character-level diff tool, <https://prettydiff.com/> is pretty good as long as you change CSS for `li.replace` to `word-break: break-all; white-space: pre-wrap;`
+
+Copyright © 2019-2020, Chris Warrick.
+Portions Copyright © Eduardo Nafuel Schettino and Doit Contributors.
+License of .py files is MIT (same as Nikola)
diff --git a/scripts/debug_rebuilds/step1_build_and_dumpdb.py b/scripts/debug_rebuilds/step1_build_and_dumpdb.py
new file mode 100755
index 0000000..69f952d
--- /dev/null
+++ b/scripts/debug_rebuilds/step1_build_and_dumpdb.py
@@ -0,0 +1,50 @@
+import dbm
+import json
+import subprocess
+import sys
+
+
+def dbm_iter(db):
+ # try dictionary interface - ok in python2 and dumbdb
+ try:
+ return db.items()
+ except Exception:
+ # try firstkey/nextkey - ok for py3 dbm.gnu
+ def iter_gdbm(db):
+ k = db.firstkey()
+ while k is not None:
+ yield k, db[k]
+ k = db.nextkey(k)
+ return iter_gdbm(db)
+
+
+def dumpdb():
+ with dbm.open('.doit.db') as data:
+ return {key: json.loads(value_str.decode('utf-8'))
+ for key, value_str in dbm_iter(data)}
+
+
+print_ = print
+
+
+def print(*args, **kwargs):
+ print_(*args, file=sys.stdout)
+ sys.stdout.flush()
+
+
+print("==> Removing stuff...")
+subprocess.call(['rm', '-rf', '.doit.db', 'output', 'cache', 'cc_debug.sqlite3'])
+print("==> Running first build...")
+subprocess.call(['nikola', 'build'])
+print("==> Fetching database...")
+first = dumpdb()
+print("==> Running second build...")
+subprocess.call(['nikola', 'build'])
+print("==> Fetching database...")
+second = dumpdb()
+print("==> Saving dumps...")
+with open('first_dump.py', 'w', encoding='utf-8') as fh:
+ fh.write(repr(first))
+
+with open('second_dump.py', 'w', encoding='utf-8') as fh:
+ fh.write(repr(second))
diff --git a/scripts/debug_rebuilds/step2_analyze_py_files.py b/scripts/debug_rebuilds/step2_analyze_py_files.py
new file mode 100755
index 0000000..cb5c954
--- /dev/null
+++ b/scripts/debug_rebuilds/step2_analyze_py_files.py
@@ -0,0 +1,58 @@
+import sqlite3
+import sys
+print_ = print
+
+
+def print(*args, **kwargs):
+ print_(*args, file=sys.stdout)
+ sys.stdout.flush()
+
+
+with open('first_dump.py', 'r', encoding='utf-8') as fh:
+ first = eval(fh.read())
+
+with open('second_dump.py', 'r', encoding='utf-8') as fh:
+ second = eval(fh.read())
+
+if len(first) != len(second):
+ print(" [!] Databases differ in size.")
+ for k in first:
+ if k not in second:
+ print(" Item", k, "not found in second database.")
+ for k in second:
+ if k not in first:
+ print(" Item", k, "not found in first database.")
+
+conn = sqlite3.connect("cc_debug.sqlite3")
+
+
+def get_from_db(value):
+ cursor = conn.cursor()
+ try:
+ cursor.execute("SELECT json_data FROM hashes WHERE hash = ?", (value,))
+ return cursor.fetchone()[0]
+ except Exception:
+ print(" [!] Cannot find", value, "in database.")
+ return None
+
+
+if first == second:
+ print("==> Both files are identical.")
+ exit(0)
+
+VAL_KEY = '_values_:' # yes, ends with a colon
+for k in first:
+ fk, sk = first[k], second[k]
+ try:
+ first_values, second_values = fk[VAL_KEY], sk[VAL_KEY]
+ except KeyError:
+ print(" [!] Values not found for,", k)
+ continue
+
+ if first_values != second_values:
+ print(" -> Difference:", k)
+ for vk in first_values:
+ fv, sv = first_values[vk], second_values[vk]
+ if fv != sv:
+ print(" first :", fv, get_from_db(fv))
+ print(" second:", sv, get_from_db(sv))