diff options
Diffstat (limited to 'scripts/debug_rebuilds')
| -rw-r--r-- | scripts/debug_rebuilds/README.md | 11 | ||||
| -rwxr-xr-x | scripts/debug_rebuilds/step1_build_and_dumpdb.py | 50 | ||||
| -rwxr-xr-x | scripts/debug_rebuilds/step2_analyze_py_files.py | 58 |
3 files changed, 119 insertions, 0 deletions
diff --git a/scripts/debug_rebuilds/README.md b/scripts/debug_rebuilds/README.md new file mode 100644 index 0000000..05305a9 --- /dev/null +++ b/scripts/debug_rebuilds/README.md @@ -0,0 +1,11 @@ +To debug unexpected Nikola rebuilds: + +1. In `nikola.utils.config_changed._calc_digest`, uncomment the line that says `self._write_into_debug_db(digest, data)` +2. Create a copy of your site source. +3. Run `python step1_build_and_dumpdb.py`. (It will delete cache/output/db, build the site twice, and write dumpdb to .py files) +4. Run `python step2_analyze_py_files.py | tee analysis.txt`. It will compare the two .py files, using `cc_debug.sqlite3` and `{first,second}_dump.py`. +5. Compare the produced dictionaries. Note that you will probably need a character-level diff tool, <https://prettydiff.com/> is pretty good as long as you change CSS for `li.replace` to `word-break: break-all; white-space: pre-wrap;` + +Copyright © 2019-2020, Chris Warrick. +Portions Copyright © Eduardo Nafuel Schettino and Doit Contributors. +License of .py files is MIT (same as Nikola) diff --git a/scripts/debug_rebuilds/step1_build_and_dumpdb.py b/scripts/debug_rebuilds/step1_build_and_dumpdb.py new file mode 100755 index 0000000..69f952d --- /dev/null +++ b/scripts/debug_rebuilds/step1_build_and_dumpdb.py @@ -0,0 +1,50 @@ +import dbm +import json +import subprocess +import sys + + +def dbm_iter(db): + # try dictionary interface - ok in python2 and dumbdb + try: + return db.items() + except Exception: + # try firstkey/nextkey - ok for py3 dbm.gnu + def iter_gdbm(db): + k = db.firstkey() + while k is not None: + yield k, db[k] + k = db.nextkey(k) + return iter_gdbm(db) + + +def dumpdb(): + with dbm.open('.doit.db') as data: + return {key: json.loads(value_str.decode('utf-8')) + for key, value_str in dbm_iter(data)} + + +print_ = print + + +def print(*args, **kwargs): + print_(*args, file=sys.stdout) + sys.stdout.flush() + + +print("==> Removing stuff...") +subprocess.call(['rm', '-rf', '.doit.db', 'output', 'cache', 'cc_debug.sqlite3']) +print("==> Running first build...") +subprocess.call(['nikola', 'build']) +print("==> Fetching database...") +first = dumpdb() +print("==> Running second build...") +subprocess.call(['nikola', 'build']) +print("==> Fetching database...") +second = dumpdb() +print("==> Saving dumps...") +with open('first_dump.py', 'w', encoding='utf-8') as fh: + fh.write(repr(first)) + +with open('second_dump.py', 'w', encoding='utf-8') as fh: + fh.write(repr(second)) diff --git a/scripts/debug_rebuilds/step2_analyze_py_files.py b/scripts/debug_rebuilds/step2_analyze_py_files.py new file mode 100755 index 0000000..cb5c954 --- /dev/null +++ b/scripts/debug_rebuilds/step2_analyze_py_files.py @@ -0,0 +1,58 @@ +import sqlite3 +import sys +print_ = print + + +def print(*args, **kwargs): + print_(*args, file=sys.stdout) + sys.stdout.flush() + + +with open('first_dump.py', 'r', encoding='utf-8') as fh: + first = eval(fh.read()) + +with open('second_dump.py', 'r', encoding='utf-8') as fh: + second = eval(fh.read()) + +if len(first) != len(second): + print(" [!] Databases differ in size.") + for k in first: + if k not in second: + print(" Item", k, "not found in second database.") + for k in second: + if k not in first: + print(" Item", k, "not found in first database.") + +conn = sqlite3.connect("cc_debug.sqlite3") + + +def get_from_db(value): + cursor = conn.cursor() + try: + cursor.execute("SELECT json_data FROM hashes WHERE hash = ?", (value,)) + return cursor.fetchone()[0] + except Exception: + print(" [!] Cannot find", value, "in database.") + return None + + +if first == second: + print("==> Both files are identical.") + exit(0) + +VAL_KEY = '_values_:' # yes, ends with a colon +for k in first: + fk, sk = first[k], second[k] + try: + first_values, second_values = fk[VAL_KEY], sk[VAL_KEY] + except KeyError: + print(" [!] Values not found for,", k) + continue + + if first_values != second_values: + print(" -> Difference:", k) + for vk in first_values: + fv, sv = first_values[vk], second_values[vk] + if fv != sv: + print(" first :", fv, get_from_db(fv)) + print(" second:", sv, get_from_db(sv)) |
