diff options
| author | 2014-12-06 17:33:25 -0500 | |
|---|---|---|
| committer | 2014-12-06 17:33:25 -0500 | |
| commit | 7167ce41b61d2ba2cdb526777a4233eb84a3b66a (patch) | |
| tree | a35c14143716e1f2c98f808c81f89426045a946f /SQLiteStudio3/coreSQLiteStudio/diff | |
Imported Upstream version 2.99.6upstream/2.99.6
Diffstat (limited to 'SQLiteStudio3/coreSQLiteStudio/diff')
| -rw-r--r-- | SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.cpp | 2112 | ||||
| -rw-r--r-- | SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.h | 631 |
2 files changed, 2743 insertions, 0 deletions
diff --git a/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.cpp b/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.cpp new file mode 100644 index 0000000..4f0022e --- /dev/null +++ b/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.cpp @@ -0,0 +1,2112 @@ +/* + * Copyright 2008 Google Inc. All Rights Reserved. + * Author: fraser@google.com (Neil Fraser) + * Author: mikeslemmer@gmail.com (Mike Slemmer) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Diff Match and Patch + * http://code.google.com/p/google-diff-match-patch/ + */ + +/* + * Version slightly modified to compile with Qt 5.2 (2 minor fixes: toAscii() -> toLatin()). + */ + +#include <algorithm> +#include <limits> +// Code known to compile and run with Qt 4.3 through Qt 4.7. +#include <QtCore> +#include <time.h> +#include "diff_match_patch.h" + + +////////////////////////// +// +// Diff Class +// +////////////////////////// + + +/** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL + * @param text The text being applied + */ +Diff::Diff(Operation _operation, const QString &_text) : + operation(_operation), text(_text) { + // Construct a diff with the specified operation and text. +} + +Diff::Diff() { +} + + +QString Diff::strOperation(Operation op) { + switch (op) { + case INSERT: + return "INSERT"; + case DELETE: + return "DELETE"; + case EQUAL: + return "EQUAL"; + } + throw "Invalid operation."; +} + +/** + * Display a human-readable version of this Diff. + * @return text version + */ +QString Diff::toString() const { + QString prettyText = text; + // Replace linebreaks with Pilcrow signs. + prettyText.replace('\n', L'\u00b6'); + return QString("Diff(") + strOperation(operation) + QString(",\"") + + prettyText + QString("\")"); +} + +/** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against + * @return true or false + */ +bool Diff::operator==(const Diff &d) const { + return (d.operation == this->operation) && (d.text == this->text); +} + +bool Diff::operator!=(const Diff &d) const { + return !(operator == (d)); +} + + +///////////////////////////////////////////// +// +// Patch Class +// +///////////////////////////////////////////// + + +/** + * Constructor. Initializes with an empty list of diffs. + */ +Patch::Patch() : + start1(0), start2(0), + length1(0), length2(0) { +} + +bool Patch::isNull() const { + if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 + && diffs.size() == 0) { + return true; + } + return false; +} + + +/** + * Emmulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indicies are printed as 1-based, not 0-based. + * @return The GNU diff string + */ +QString Patch::toString() { + QString coords1, coords2; + if (length1 == 0) { + coords1 = QString::number(start1) + QString(",0"); + } else if (length1 == 1) { + coords1 = QString::number(start1 + 1); + } else { + coords1 = QString::number(start1 + 1) + QString(",") + + QString::number(length1); + } + if (length2 == 0) { + coords2 = QString::number(start2) + QString(",0"); + } else if (length2 == 1) { + coords2 = QString::number(start2 + 1); + } else { + coords2 = QString::number(start2 + 1) + QString(",") + + QString::number(length2); + } + QString text; + text = QString("@@ -") + coords1 + QString(" +") + coords2 + + QString(" @@\n"); + // Escape the body of the patch with %xx notation. + foreach (Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: + text += QString('+'); + break; + case DELETE: + text += QString('-'); + break; + case EQUAL: + text += QString(' '); + break; + } + text += QString(QUrl::toPercentEncoding(aDiff.text, " !~*'();/?:@&=+$,#")) + + QString("\n"); + } + + return text; +} + + +///////////////////////////////////////////// +// +// diff_match_patch Class +// +///////////////////////////////////////////// + +diff_match_patch::diff_match_patch() : + Diff_Timeout(1.0f), + Diff_EditCost(4), + Match_Threshold(0.5f), + Match_Distance(1000), + Patch_DeleteThreshold(0.5f), + Patch_Margin(4), + Match_MaxBits(32) { +} + + +QList<Diff> diff_match_patch::diff_main(const QString &text1, + const QString &text2) { + return diff_main(text1, text2, true); +} + +QList<Diff> diff_match_patch::diff_main(const QString &text1, + const QString &text2, bool checklines) { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if (Diff_Timeout <= 0) { + deadline = std::numeric_limits<clock_t>::max(); + } else { + deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); + } + return diff_main(text1, text2, checklines, deadline); +} + +QList<Diff> diff_match_patch::diff_main(const QString &text1, + const QString &text2, bool checklines, clock_t deadline) { + // Check for null inputs. + if (text1.isNull() || text2.isNull()) { + throw "Null inputs. (diff_main)"; + } + + // Check for equality (speedup). + QList<Diff> diffs; + if (text1 == text2) { + if (!text1.isEmpty()) { + diffs.append(Diff(EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + const QString &commonprefix = text1.left(commonlength); + QString textChopped1 = text1.mid(commonlength); + QString textChopped2 = text2.mid(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(textChopped1, textChopped2); + const QString &commonsuffix = textChopped1.right(commonlength); + textChopped1 = textChopped1.left(textChopped1.length() - commonlength); + textChopped2 = textChopped2.left(textChopped2.length() - commonlength); + + // Compute the diff on the middle block. + diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); + + // Restore the prefix and suffix. + if (!commonprefix.isEmpty()) { + diffs.prepend(Diff(EQUAL, commonprefix)); + } + if (!commonsuffix.isEmpty()) { + diffs.append(Diff(EQUAL, commonsuffix)); + } + + diff_cleanupMerge(diffs); + + return diffs; +} + + +QList<Diff> diff_match_patch::diff_compute(QString text1, QString text2, + bool checklines, clock_t deadline) { + QList<Diff> diffs; + + if (text1.isEmpty()) { + // Just add some text (speedup). + diffs.append(Diff(INSERT, text2)); + return diffs; + } + + if (text2.isEmpty()) { + // Just delete some text (speedup). + diffs.append(Diff(DELETE, text1)); + return diffs; + } + + { + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + const int i = longtext.indexOf(shorttext); + if (i != -1) { + // Shorter text is inside the longer text (speedup). + const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; + diffs.append(Diff(op, longtext.left(i))); + diffs.append(Diff(EQUAL, shorttext)); + diffs.append(Diff(op, safeMid(longtext, i + shorttext.length()))); + return diffs; + } + + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.append(Diff(DELETE, text1)); + diffs.append(Diff(INSERT, text2)); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const QStringList hm = diff_halfMatch(text1, text2); + if (hm.count() > 0) { + // A half-match was found, sort out the return data. + const QString text1_a = hm[0]; + const QString text1_b = hm[1]; + const QString text2_a = hm[2]; + const QString text2_b = hm[3]; + const QString mid_common = hm[4]; + // Send both pairs off for separate processing. + const QList<Diff> diffs_a = diff_main(text1_a, text2_a, + checklines, deadline); + const QList<Diff> diffs_b = diff_main(text1_b, text2_b, + checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.append(Diff(EQUAL, mid_common)); + diffs += diffs_b; + return diffs; + } + + // Perform a real diff. + if (checklines && text1.length() > 100 && text2.length() > 100) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); +} + + +QList<Diff> diff_match_patch::diff_lineMode(QString text1, QString text2, + clock_t deadline) { + // Scan the text on a line-by-line basis first. + const QList<QVariant> b = diff_linesToChars(text1, text2); + text1 = b[0].toString(); + text2 = b[1].toString(); + QStringList linearray = b[2].toStringList(); + + QList<Diff> diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.append(Diff(EQUAL, "")); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + + QMutableListIterator<Diff> pointer(diffs); + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while (thisDiff != NULL) { + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + pointer.previous(); + for (int j = 0; j < count_delete + count_insert; j++) { + pointer.previous(); + pointer.remove(); + } + foreach(Diff newDiff, + diff_main(text_delete, text_insert, false, deadline)) { + pointer.insert(newDiff); + } + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + break; + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + diffs.removeLast(); // Remove the dummy entry at the end. + + return diffs; +} + + +QList<Diff> diff_match_patch::diff_bisect(const QString &text1, + const QString &text2, clock_t deadline) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int max_d = (text1_length + text2_length + 1) / 2; + const int v_offset = max_d; + const int v_length = 2 * max_d; + int *v1 = new int[v_length]; + int *v2 = new int[v_length]; + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + const int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + const bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (clock() > deadline) { + break; + } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + const int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1[x1] == text2[y1]) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + const int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + delete [] v1; + delete [] v2; + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + QList<Diff> diffs; + diffs.append(Diff(DELETE, text1)); + diffs.append(Diff(INSERT, text2)); + return diffs; +} + +QList<Diff> diff_match_patch::diff_bisectSplit(const QString &text1, + const QString &text2, int x, int y, clock_t deadline) { + QString text1a = text1.left(x); + QString text2a = text2.left(y); + QString text1b = safeMid(text1, x); + QString text2b = safeMid(text2, y); + + // Compute both diffs serially. + QList<Diff> diffs = diff_main(text1a, text2a, false, deadline); + QList<Diff> diffsb = diff_main(text1b, text2b, false, deadline); + + return diffs + diffsb; +} + +QList<QVariant> diff_match_patch::diff_linesToChars(const QString &text1, + const QString &text2) { + QStringList lineArray; + QMap<QString, int> lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.append(""); + + const QString chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); + const QString chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); + + QList<QVariant> listRet; + listRet.append(QVariant::fromValue(chars1)); + listRet.append(QVariant::fromValue(chars2)); + listRet.append(QVariant::fromValue(lineArray)); + return listRet; +} + + +QString diff_match_patch::diff_linesToCharsMunge(const QString &text, + QStringList &lineArray, + QMap<QString, int> &lineHash) { + int lineStart = 0; + int lineEnd = -1; + QString line; + QString chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.length() - 1) { + lineEnd = text.indexOf('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = safeMid(text, lineStart, lineEnd + 1 - lineStart); + lineStart = lineEnd + 1; + + if (lineHash.contains(line)) { + chars += QChar(static_cast<ushort>(lineHash.value(line))); + } else { + lineArray.append(line); + lineHash.insert(line, lineArray.size() - 1); + chars += QChar(static_cast<ushort>(lineArray.size() - 1)); + } + } + return chars; +} + + + +void diff_match_patch::diff_charsToLines(QList<Diff> &diffs, + const QStringList &lineArray) { + // Qt has no mutable foreach construct. + QMutableListIterator<Diff> i(diffs); + while (i.hasNext()) { + Diff &diff = i.next(); + QString text; + for (int y = 0; y < diff.text.length(); y++) { + text += lineArray.value(static_cast<ushort>(diff.text[y].unicode())); + } + diff.text = text; + } +} + + +int diff_match_patch::diff_commonPrefix(const QString &text1, + const QString &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int n = std::min(text1.length(), text2.length()); + for (int i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } + } + return n; +} + + +int diff_match_patch::diff_commonSuffix(const QString &text1, + const QString &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int n = std::min(text1_length, text2_length); + for (int i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } + } + return n; +} + +int diff_match_patch::diff_commonOverlap(const QString &text1, + const QString &text2) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + QString text1_trunc = text1; + QString text2_trunc = text2; + if (text1_length > text2_length) { + text1_trunc = text1.right(text2_length); + } else if (text1_length < text2_length) { + text2_trunc = text2.left(text1_length); + } + const int text_length = std::min(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + QString pattern = text1_trunc.right(length); + int found = text2_trunc.indexOf(pattern); + if (found == -1) { + return best; + } + length += found; + if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length)) { + best = length; + length++; + } + } +} + +QStringList diff_match_patch::diff_halfMatch(const QString &text1, + const QString &text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return QStringList(); + } + const QString longtext = text1.length() > text2.length() ? text1 : text2; + const QString shorttext = text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return QStringList(); // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const QStringList hm1 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 3) / 4); + // Check again based on the third quarter. + const QStringList hm2 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 1) / 2); + QStringList hm; + if (hm1.isEmpty() && hm2.isEmpty()) { + return QStringList(); + } else if (hm2.isEmpty()) { + hm = hm1; + } else if (hm1.isEmpty()) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + } else { + QStringList listRet; + listRet << hm[2] << hm[3] << hm[0] << hm[1] << hm[4]; + return listRet; + } +} + + +QStringList diff_match_patch::diff_halfMatchI(const QString &longtext, + const QString &shorttext, + int i) { + // Start with a 1/4 length substring at position i as a seed. + const QString seed = safeMid(longtext, i, longtext.length() / 4); + int j = -1; + QString best_common; + QString best_longtext_a, best_longtext_b; + QString best_shorttext_a, best_shorttext_b; + while ((j = shorttext.indexOf(seed, j + 1)) != -1) { + const int prefixLength = diff_commonPrefix(safeMid(longtext, i), + safeMid(shorttext, j)); + const int suffixLength = diff_commonSuffix(longtext.left(i), + shorttext.left(j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = safeMid(shorttext, j - suffixLength, suffixLength) + + safeMid(shorttext, j, prefixLength); + best_longtext_a = longtext.left(i - suffixLength); + best_longtext_b = safeMid(longtext, i + prefixLength); + best_shorttext_a = shorttext.left(j - suffixLength); + best_shorttext_b = safeMid(shorttext, j + prefixLength); + } + } + if (best_common.length() * 2 >= longtext.length()) { + QStringList listRet; + listRet << best_longtext_a << best_longtext_b << best_shorttext_a + << best_shorttext_b << best_common; + return listRet; + } else { + return QStringList(); + } +} + + +void diff_match_patch::diff_cleanupSemantic(QList<Diff> &diffs) { + if (diffs.isEmpty()) { + return; + } + bool changes = false; + QStack<Diff> equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator<Diff> pointer(diffs); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + while (thisDiff != NULL) { + if (thisDiff->operation == EQUAL) { + // Equality found. + equalities.push(*thisDiff); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff->text; + } else { + // An insertion or deletion. + if (thisDiff->operation == INSERT) { + length_insertions2 += thisDiff->text.length(); + } else { + length_deletions2 += thisDiff->text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (!lastequality.isNull() + && (lastequality.length() + <= std::max(length_insertions1, length_deletions1)) + && (lastequality.length() + <= std::max(length_insertions2, length_deletions2))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.top()) { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue(Diff(DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.insert(Diff(INSERT, lastequality)); + + equalities.pop(); // Throw away the equality we just deleted. + if (!equalities.isEmpty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.isEmpty()) { + // There are no previous equalities, walk back to the start. + while (pointer.hasPrevious()) { + pointer.previous(); + } + } else { + // There is a safe equality we can fall back to. + thisDiff = &equalities.top(); + while (*thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = QString(); + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: <del>abcxxx</del><ins>xxxdef</ins> + // -> <del>abc</del>xxx<ins>def</ins> + // e.g: <del>xxxabc</del><ins>defxxx</ins> + // -> <ins>def</ins>xxx<del>abc</del> + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer.toFront(); + Diff *prevDiff = NULL; + thisDiff = NULL; + if (pointer.hasNext()) { + prevDiff = &pointer.next(); + if (pointer.hasNext()) { + thisDiff = &pointer.next(); + } + } + while (thisDiff != NULL) { + if (prevDiff->operation == DELETE && + thisDiff->operation == INSERT) { + QString deletion = prevDiff->text; + QString insertion = thisDiff->text; + int overlap_length1 = diff_commonOverlap(deletion, insertion); + int overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer.previous(); + pointer.insert(Diff(EQUAL, insertion.left(overlap_length1))); + prevDiff->text = + deletion.left(deletion.length() - overlap_length1); + thisDiff->text = safeMid(insertion, overlap_length1); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer.previous(); + pointer.insert(Diff(EQUAL, deletion.left(overlap_length2))); + prevDiff->operation = INSERT; + prevDiff->text = + insertion.left(insertion.length() - overlap_length2); + thisDiff->operation = DELETE; + thisDiff->text = safeMid(deletion, overlap_length2); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + prevDiff = thisDiff; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } +} + + +void diff_match_patch::diff_cleanupSemanticLossless(QList<Diff> &diffs) { + QString equality1, edit, equality2; + QString commonString; + int commonOffset; + int score, bestScore; + QString bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + QMutableListIterator<Diff> pointer(diffs); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + equality1 = prevDiff->text; + edit = thisDiff->text; + equality2 = nextDiff->text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset != 0) { + commonString = safeMid(edit, edit.length() - commonOffset); + equality1 = equality1.left(equality1.length() - commonOffset); + edit = commonString + edit.left(edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (!edit.isEmpty() && !equality2.isEmpty() + && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = safeMid(edit, 1) + equality2[0]; + equality2 = safeMid(equality2, 1); + score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (prevDiff->text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.isEmpty()) { + prevDiff->text = bestEquality1; + } else { + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + pointer.next(); // Walk past nextDiff. + } + thisDiff->text = bestEdit; + if (!bestEquality2.isEmpty()) { + nextDiff->text = bestEquality2; + } else { + pointer.remove(); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } +} + + +int diff_match_patch::diff_cleanupSemanticScore(const QString &one, + const QString &two) { + if (one.isEmpty() || two.isEmpty()) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + QChar char1 = one[one.length() - 1]; + QChar char2 = two[0]; + bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); + bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); + bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); + bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); + bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; + bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; + bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn(one) != -1; + bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn(two) != -1; + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; +} + + +// Define some regex patterns for matching boundaries. +QRegExp diff_match_patch::BLANKLINEEND = QRegExp("\\n\\r?\\n$"); +QRegExp diff_match_patch::BLANKLINESTART = QRegExp("^\\r?\\n\\r?\\n"); + + +void diff_match_patch::diff_cleanupEfficiency(QList<Diff> &diffs) { + if (diffs.isEmpty()) { + return; + } + bool changes = false; + QStack<Diff> equalities; // Stack of equalities. + QString lastequality; // Always equal to equalities.lastElement().text + QMutableListIterator<Diff> pointer(diffs); + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *safeDiff = thisDiff; + + while (thisDiff != NULL) { + if (thisDiff->operation == EQUAL) { + // Equality found. + if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { + // Candidate found. + equalities.push(*thisDiff); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff->text; + } else { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = QString(); + safeDiff = thisDiff; + } + post_ins = post_del = false; + } else { + // An insertion or deletion. + if (thisDiff->operation == DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del> + * <ins>A</ins>X<ins>C</ins><del>D</del> + * <ins>A</ins><del>B</del>X<ins>C</ins> + * <ins>A</del>X<ins>C</ins><del>D</del> + * <ins>A</ins><del>B</del>X<del>C</del> + */ + if (!lastequality.isNull() + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length() < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.top()) { + thisDiff = &pointer.previous(); + } + pointer.next(); + + // Replace equality with a delete. + pointer.setValue(Diff(DELETE, lastequality)); + // Insert a corresponding an insert. + pointer.insert(Diff(INSERT, lastequality)); + thisDiff = &pointer.previous(); + pointer.next(); + + equalities.pop(); // Throw away the equality we just deleted. + lastequality = QString(); + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } else { + if (!equalities.isEmpty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop(); + } + if (equalities.isEmpty()) { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } else { + // There is an equality we can fall back to. + thisDiff = &equalities.top(); + } + while (*thisDiff != pointer.previous()) { + // Intentionally empty loop. + } + post_ins = post_del = false; + } + + changes = true; + } + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + + if (changes) { + diff_cleanupMerge(diffs); + } +} + + +void diff_match_patch::diff_cleanupMerge(QList<Diff> &diffs) { + diffs.append(Diff(EQUAL, "")); // Add a dummy entry at the end. + QMutableListIterator<Diff> pointer(diffs); + int count_delete = 0; + int count_insert = 0; + QString text_delete = ""; + QString text_insert = ""; + Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *prevEqual = NULL; + int commonlength; + while (thisDiff != NULL) { + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + prevEqual = NULL; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + prevEqual = NULL; + break; + case EQUAL: + if (count_delete + count_insert > 1) { + bool both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + pointer.previous(); // Reverse direction. + while (count_delete-- > 0) { + pointer.previous(); + pointer.remove(); + } + while (count_insert-- > 0) { + pointer.previous(); + pointer.remove(); + } + if (both_types) { + // Factor out any common prefixies. + commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if (pointer.hasPrevious()) { + thisDiff = &pointer.previous(); + if (thisDiff->operation != EQUAL) { + throw "Previous diff should have been an equality."; + } + thisDiff->text += text_insert.left(commonlength); + pointer.next(); + } else { + pointer.insert(Diff(EQUAL, text_insert.left(commonlength))); + } + text_insert = safeMid(text_insert, commonlength); + text_delete = safeMid(text_delete, commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + thisDiff = &pointer.next(); + thisDiff->text = safeMid(text_insert, text_insert.length() + - commonlength) + thisDiff->text; + text_insert = text_insert.left(text_insert.length() + - commonlength); + text_delete = text_delete.left(text_delete.length() + - commonlength); + pointer.previous(); + } + } + // Insert the merged records. + if (!text_delete.isEmpty()) { + pointer.insert(Diff(DELETE, text_delete)); + } + if (!text_insert.isEmpty()) { + pointer.insert(Diff(INSERT, text_insert)); + } + // Step forward to the equality. + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + + } else if (prevEqual != NULL) { + // Merge this equality with the previous one. + prevEqual->text += thisDiff->text; + pointer.remove(); + thisDiff = &pointer.previous(); + pointer.next(); // Forward direction + } + count_insert = 0; + count_delete = 0; + text_delete = ""; + text_insert = ""; + prevEqual = thisDiff; + break; + } + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + if (diffs.back().text.isEmpty()) { + diffs.removeLast(); // Remove the dummy entry at the end. + } + + /* + * Second pass: look for single edits surrounded on both sides by equalities + * which can be shifted sideways to eliminate an equality. + * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC + */ + bool changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer.toFront(); + Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; + thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + if (thisDiff->text.endsWith(prevDiff->text)) { + // Shift the edit over the previous equality. + thisDiff->text = prevDiff->text + + thisDiff->text.left(thisDiff->text.length() + - prevDiff->text.length()); + nextDiff->text = prevDiff->text + nextDiff->text; + pointer.previous(); // Walk past nextDiff. + pointer.previous(); // Walk past thisDiff. + pointer.previous(); // Walk past prevDiff. + pointer.remove(); // Delete prevDiff. + pointer.next(); // Walk past thisDiff. + thisDiff = &pointer.next(); // Walk past nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } else if (thisDiff->text.startsWith(nextDiff->text)) { + // Shift the edit over the next equality. + prevDiff->text += nextDiff->text; + thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) + + nextDiff->text; + pointer.remove(); // Delete nextDiff. + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + changes = true; + } + } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = pointer.hasNext() ? &pointer.next() : NULL; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } +} + + +int diff_match_patch::diff_xIndex(const QList<Diff> &diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff.operation == DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +} + + +QString diff_match_patch::diff_prettyHtml(const QList<Diff> &diffs) { + QString html; + QString text; + foreach(Diff aDiff, diffs) { + text = aDiff.text; + text.replace("&", "&").replace("<", "<") + .replace(">", ">").replace("\n", "¶<br>"); + switch (aDiff.operation) { + case INSERT: + html += QString("<ins style=\"background:#e6ffe6;\">") + text + + QString("</ins>"); + break; + case DELETE: + html += QString("<del style=\"background:#ffe6e6;\">") + text + + QString("</del>"); + break; + case EQUAL: + html += QString("<span>") + text + QString("</span>"); + break; + } + } + return html; +} + + +QString diff_match_patch::diff_text1(const QList<Diff> &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != INSERT) { + text += aDiff.text; + } + } + return text; +} + + +QString diff_match_patch::diff_text2(const QList<Diff> &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + if (aDiff.operation != DELETE) { + text += aDiff.text; + } + } + return text; +} + + +int diff_match_patch::diff_levenshtein(const QList<Diff> &diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + foreach(Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max(insertions, deletions); + return levenshtein; +} + + +QString diff_match_patch::diff_toDelta(const QList<Diff> &diffs) { + QString text; + foreach(Diff aDiff, diffs) { + switch (aDiff.operation) { + case INSERT: { + QString encoded = QString(QUrl::toPercentEncoding(aDiff.text, + " !~*'();/?:@&=+$,#")); + text += QString("+") + encoded + QString("\t"); + break; + } + case DELETE: + text += QString("-") + QString::number(aDiff.text.length()) + + QString("\t"); + break; + case EQUAL: + text += QString("=") + QString::number(aDiff.text.length()) + + QString("\t"); + break; + } + } + if (!text.isEmpty()) { + // Strip off trailing tab character. + text = text.left(text.length() - 1); + } + return text; +} + + +QList<Diff> diff_match_patch::diff_fromDelta(const QString &text1, + const QString &delta) { + QList<Diff> diffs; + int pointer = 0; // Cursor in text1 + QStringList tokens = delta.split("\t"); + foreach(QString token, tokens) { + if (token.isEmpty()) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + QString param = safeMid(token, 1); + switch (token[0].toLatin1()) { + case '+': + param = QUrl::fromPercentEncoding(qPrintable(param)); + diffs.append(Diff(INSERT, param)); + break; + case '-': + // Fall through. + case '=': { + int n; + n = param.toInt(); + if (n < 0) { + throw QString("Negative number in diff_fromDelta: %1").arg(param); + } + QString text; + text = safeMid(text1, pointer, n); + pointer += n; + if (token[0] == QChar('=')) { + diffs.append(Diff(EQUAL, text)); + } else { + diffs.append(Diff(DELETE, text)); + } + break; + } + default: + throw QString("Invalid diff operation in diff_fromDelta: %1") + .arg(token[0]); + } + } + if (pointer != text1.length()) { + throw QString("Delta length (%1) smaller than source text length (%2)") + .arg(pointer).arg(text1.length()); + } + return diffs; +} + + + // MATCH FUNCTIONS + + +int diff_match_patch::match_main(const QString &text, const QString &pattern, + int loc) { + // Check for null inputs. + if (text.isNull() || pattern.isNull()) { + throw "Null inputs. (match_main)"; + } + + loc = std::max(0, std::min(loc, text.length())); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.isEmpty()) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() + && safeMid(text, loc, pattern.length()) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } +} + + +int diff_match_patch::match_bitap(const QString &text, const QString &pattern, + int loc) { + if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + QMap<QChar, int> s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.indexOf(pattern, loc); + if (best_loc != -1) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + // What about in the other direction? (speedup) + best_loc = text.lastIndexOf(pattern, loc + pattern.length()); + if (best_loc != -1) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + int matchmask = 1 << (pattern.length() - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + int *rd; + int *last_rd = NULL; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + int start = std::max(1, loc - bin_mid + 1); + int finish = std::min(loc + bin_mid, text.length()) + pattern.length(); + + rd = new int[finish + 2]; + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.length() <= j - 1) { + // Out of range. + charMatch = 0; + } else { + charMatch = s.value(text[j - 1], 0); + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) + | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = std::max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + delete [] last_rd; + last_rd = rd; + } + delete [] last_rd; + delete [] rd; + return best_loc; +} + + +double diff_match_patch::match_bitapScore(int e, int x, int loc, + const QString &pattern) { + const float accuracy = static_cast<float> (e) / pattern.length(); + const int proximity = qAbs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / static_cast<float> (Match_Distance)); +} + + +QMap<QChar, int> diff_match_patch::match_alphabet(const QString &pattern) { + QMap<QChar, int> s; + int i; + for (i = 0; i < pattern.length(); i++) { + QChar c = pattern[i]; + s.insert(c, 0); + } + for (i = 0; i < pattern.length(); i++) { + QChar c = pattern[i]; + s.insert(c, s.value(c) | (1 << (pattern.length() - i - 1))); + } + return s; +} + + +// PATCH FUNCTIONS + + +void diff_match_patch::patch_addContext(Patch &patch, const QString &text) { + if (text.isEmpty()) { + return; + } + QString pattern = safeMid(text, patch.start2, patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.indexOf(pattern) != text.lastIndexOf(pattern) + && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = safeMid(text, std::max(0, patch.start2 - padding), + std::min(text.length(), patch.start2 + patch.length1 + padding) + - std::max(0, patch.start2 - padding)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + QString prefix = safeMid(text, std::max(0, patch.start2 - padding), + patch.start2 - std::max(0, patch.start2 - padding)); + if (!prefix.isEmpty()) { + patch.diffs.prepend(Diff(EQUAL, prefix)); + } + // Add the suffix. + QString suffix = safeMid(text, patch.start2 + patch.length1, + std::min(text.length(), patch.start2 + patch.length1 + padding) + - (patch.start2 + patch.length1)); + if (!suffix.isEmpty()) { + patch.diffs.append(Diff(EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); +} + + +QList<Patch> diff_match_patch::patch_make(const QString &text1, + const QString &text2) { + // Check for null inputs. + if (text1.isNull() || text2.isNull()) { + throw "Null inputs. (patch_make)"; + } + + // No diffs provided, compute our own. + QList<Diff> diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + + return patch_make(text1, diffs); +} + + +QList<Patch> diff_match_patch::patch_make(const QList<Diff> &diffs) { + // No origin string provided, compute our own. + const QString text1 = diff_text1(diffs); + return patch_make(text1, diffs); +} + + +QList<Patch> diff_match_patch::patch_make(const QString &text1, + const QString &text2, + const QList<Diff> &diffs) { + // text2 is entirely unused. + return patch_make(text1, diffs); + + Q_UNUSED(text2) +} + + +QList<Patch> diff_match_patch::patch_make(const QString &text1, + const QList<Diff> &diffs) { + // Check for null inputs. + if (text1.isNull()) { + throw "Null inputs. (patch_make)"; + } + + QList<Patch> patches; + if (diffs.isEmpty()) { + return patches; // Get rid of the null case. + } + Patch patch; + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + QString prepatch_text = text1; + QString postpatch_text = text1; + foreach(Diff aDiff, diffs) { + if (patch.diffs.isEmpty() && aDiff.operation != EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case INSERT: + patch.diffs.append(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.left(char_count2) + + aDiff.text + safeMid(postpatch_text, char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.append(aDiff); + postpatch_text = postpatch_text.left(char_count2) + + safeMid(postpatch_text, char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin + && !patch.diffs.isEmpty() && !(aDiff == diffs.back())) { + // Small equality inside a patch. + patch.diffs.append(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.append(patch); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.isEmpty()) { + patch_addContext(patch, prepatch_text); + patches.append(patch); + } + + return patches; +} + + +QList<Patch> diff_match_patch::patch_deepCopy(QList<Patch> &patches) { + QList<Patch> patchesCopy; + foreach(Patch aPatch, patches) { + Patch patchCopy = Patch(); + foreach(Diff aDiff, aPatch.diffs) { + Diff diffCopy = Diff(aDiff.operation, aDiff.text); + patchCopy.diffs.append(diffCopy); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.append(patchCopy); + } + return patchesCopy; +} + + +QPair<QString, QVector<bool> > diff_match_patch::patch_apply( + QList<Patch> &patches, const QString &sourceText) { + QString text = sourceText; // Copy to preserve original. + if (patches.isEmpty()) { + return QPair<QString,QVector<bool> >(text, QVector<bool>(0)); + } + + // Deep copy the patches so that no changes are made to originals. + QList<Patch> patchesCopy = patch_deepCopy(patches); + + QString nullPadding = patch_addPadding(patchesCopy); + text = nullPadding + text + nullPadding; + patch_splitMax(patchesCopy); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + QVector<bool> results(patchesCopy.size()); + foreach(Patch aPatch, patchesCopy) { + int expected_loc = aPatch.start2 + delta; + QString text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.length() > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main(text, text1.left(Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, text1.right(Match_MaxBits), + expected_loc + text1.length() - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + QString text2; + if (end_loc == -1) { + text2 = safeMid(text, start_loc, text1.length()); + } else { + text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.left(start_loc) + diff_text2(aPatch.diffs) + + safeMid(text, start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + QList<Diff> diffs = diff_main(text1, text2, false); + if (text1.length() > Match_MaxBits + && diff_levenshtein(diffs) / static_cast<float> (text1.length()) + > Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + int index1 = 0; + foreach(Diff aDiff, aPatch.diffs) { + if (aDiff.operation != EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == INSERT) { + // Insertion + text = text.left(start_loc + index2) + aDiff.text + + safeMid(text, start_loc + index2); + } else if (aDiff.operation == DELETE) { + // Deletion + text = text.left(start_loc + index2) + + safeMid(text, start_loc + diff_xIndex(diffs, + index1 + aDiff.text.length())); + } + } + if (aDiff.operation != DELETE) { + index1 += aDiff.text.length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid(text, nullPadding.length(), text.length() + - 2 * nullPadding.length()); + return QPair<QString, QVector<bool> >(text, results); +} + + +QString diff_match_patch::patch_addPadding(QList<Patch> &patches) { + short paddingLength = Patch_Margin; + QString nullPadding = ""; + for (short x = 1; x <= paddingLength; x++) { + nullPadding += QChar((ushort)x); + } + + // Bump all the patches forward. + QMutableListIterator<Patch> pointer(patches); + while (pointer.hasNext()) { + Patch &aPatch = pointer.next(); + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch &firstPatch = patches.first(); + QList<Diff> &firstPatchDiffs = firstPatch.diffs; + if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) { + // Add nullPadding equality. + firstPatchDiffs.prepend(Diff(EQUAL, nullPadding)); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } else if (paddingLength > firstPatchDiffs.first().text.length()) { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.first(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), + paddingLength - firstDiff.text.length()) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.first(); + QList<Diff> &lastPatchDiffs = lastPatch.diffs; + if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) { + // Add nullPadding equality. + lastPatchDiffs.append(Diff(EQUAL, nullPadding)); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } else if (paddingLength > lastPatchDiffs.last().text.length()) { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.last(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.left(extraLength); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } + + return nullPadding; +} + + +void diff_match_patch::patch_splitMax(QList<Patch> &patches) { + short patch_size = Match_MaxBits; + QString precontext, postcontext; + Patch patch; + int start1, start2; + bool empty; + Operation diff_type; + QString diff_text; + QMutableListIterator<Patch> pointer(patches); + Patch bigpatch; + + if (pointer.hasNext()) { + bigpatch = pointer.next(); + } + + while (!bigpatch.isNull()) { + if (bigpatch.length1 <= patch_size) { + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + continue; + } + // Remove the big old patch. + pointer.remove(); + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = ""; + while (!bigpatch.diffs.isEmpty()) { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (!precontext.isEmpty()) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.append(Diff(EQUAL, precontext)); + } + while (!bigpatch.diffs.isEmpty() + && patch.length1 < patch_size - Patch_Margin) { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if (diff_type == INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.append(bigpatch.diffs.front()); + bigpatch.diffs.removeFirst(); + empty = false; + } else if (diff_type == DELETE && patch.diffs.size() == 1 + && patch.diffs.front().operation == EQUAL + && diff_text.length() > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.append(Diff(diff_type, diff_text)); + bigpatch.diffs.removeFirst(); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.left(std::min(diff_text.length(), + patch_size - patch.length1 - Patch_Margin)); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.append(Diff(diff_type, diff_text)); + if (diff_text == bigpatch.diffs.front().text) { + bigpatch.diffs.removeFirst(); + } else { + bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, + diff_text.length()); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = safeMid(precontext, precontext.length() - Patch_Margin); + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).left(Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + if (!postcontext.isEmpty()) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if (!patch.diffs.isEmpty() + && patch.diffs.back().operation == EQUAL) { + patch.diffs.back().text += postcontext; + } else { + patch.diffs.append(Diff(EQUAL, postcontext)); + } + } + if (!empty) { + pointer.insert(patch); + } + } + bigpatch = pointer.hasNext() ? pointer.next() : Patch(); + } +} + + +QString diff_match_patch::patch_toText(const QList<Patch> &patches) { + QString text; + foreach(Patch aPatch, patches) { + text.append(aPatch.toString()); + } + return text; +} + + +QList<Patch> diff_match_patch::patch_fromText(const QString &textline) { + QList<Patch> patches; + if (textline.isEmpty()) { + return patches; + } + QStringList text = textline.split("\n", QString::SkipEmptyParts); + Patch patch; + QRegExp patchHeader("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); + char sign; + QString line; + while (!text.isEmpty()) { + if (!patchHeader.exactMatch(text.front())) { + throw QString("Invalid patch string: %1").arg(text.front()); + } + + patch = Patch(); + patch.start1 = patchHeader.cap(1).toInt(); + if (patchHeader.cap(2).isEmpty()) { + patch.start1--; + patch.length1 = 1; + } else if (patchHeader.cap(2) == "0") { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = patchHeader.cap(2).toInt(); + } + + patch.start2 = patchHeader.cap(3).toInt(); + if (patchHeader.cap(4).isEmpty()) { + patch.start2--; + patch.length2 = 1; + } else if (patchHeader.cap(4) == "0") { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = patchHeader.cap(4).toInt(); + } + text.removeFirst(); + + while (!text.isEmpty()) { + if (text.front().isEmpty()) { + text.removeFirst(); + continue; + } + sign = text.front()[0].toLatin1(); + line = safeMid(text.front(), 1); + line = line.replace("+", "%2B"); // decode would change all "+" to " " + line = QUrl::fromPercentEncoding(qPrintable(line)); + if (sign == '-') { + // Deletion. + patch.diffs.append(Diff(DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.append(Diff(INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.append(Diff(EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw QString("Invalid patch mode '%1' in: %2").arg(sign).arg(line); + return QList<Patch>(); + } + text.removeFirst(); + } + + patches.append(patch); + + } + return patches; +} diff --git a/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.h b/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.h new file mode 100644 index 0000000..d2eb0e8 --- /dev/null +++ b/SQLiteStudio3/coreSQLiteStudio/diff/diff_match_patch.h @@ -0,0 +1,631 @@ +/* + * Copyright 2008 Google Inc. All Rights Reserved. + * Author: fraser@google.com (Neil Fraser) + * Author: mikeslemmer@gmail.com (Mike Slemmer) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Diff Match and Patch + * http://code.google.com/p/google-diff-match-patch/ + */ + +#ifndef DIFF_MATCH_PATCH_H +#define DIFF_MATCH_PATCH_H + +#include <time.h> +#include "coreSQLiteStudio_global.h" + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + * + * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): + * + * Code known to compile and run with Qt 4.3 through Qt 4.7. + * + * Here is a trivial sample program which works properly when linked with this + * library: + * + + #include <QtCore> + #include <QString> + #include <QList> + #include <QMap> + #include <QVariant> + #include "diff_match_patch.h" + int main(int argc, char **argv) { + diff_match_patch dmp; + QString str1 = QString("First string in diff"); + QString str2 = QString("Second string in diff"); + + QString strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + QPair<QString, QVector<bool> > out + = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + QString strResult = out.first; + + // here, strResult will equal str2 above. + return 0; + } + + */ + + +/**- +* The data structure representing a diff is a Linked list of Diff objects: +* {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), +* Diff(Operation.EQUAL, " world.")} +* which means: delete "Hello", add "Goodbye" and keep " world." +*/ +enum Operation { + DELETE, INSERT, EQUAL +}; + + +/** +* Class representing one diff operation. +*/ +class API_EXPORT Diff { + public: + Operation operation; + // One of: INSERT, DELETE or EQUAL. + QString text; + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff(Operation _operation, const QString &_text); + Diff(); + inline bool isNull() const; + QString toString() const; + bool operator==(const Diff &d) const; + bool operator!=(const Diff &d) const; + + static QString strOperation(Operation op); +}; + + +/** +* Class representing one patch operation. +*/ +class API_EXPORT Patch { + public: + QList<Diff> diffs; + int start1; + int start2; + int length1; + int length2; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + bool isNull() const; + QString toString(); +}; + + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ +class API_EXPORT diff_match_patch { + + friend class diff_match_patch_test; + + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int Match_Distance; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold; + // Chunk size for context length. + short Patch_Margin; + + // The number of bits in an int. + short Match_MaxBits; + + private: + // Define some regex patterns for matching boundaries. + static QRegExp BLANKLINEEND; + static QRegExp BLANKLINESTART; + + + public: + + diff_match_patch(); + + // DIFF FUNCTIONS + + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + QList<Diff> diff_main(const QString &text1, const QString &text2); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + QList<Diff> diff_main(const QString &text1, const QString &text2, bool checklines); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ + private: + QList<Diff> diff_main(const QString &text1, const QString &text2, bool checklines, clock_t deadline); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + QList<Diff> diff_compute(QString text1, QString text2, bool checklines, clock_t deadline); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + QList<Diff> diff_lineMode(QString text1, QString text2, clock_t deadline); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + protected: + QList<Diff> diff_bisect(const QString &text1, const QString &text2, clock_t deadline); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private: + QList<Diff> diff_bisectSplit(const QString &text1, const QString &text2, int x, int y, clock_t deadline); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ + protected: + QList<QVariant> diff_linesToChars(const QString &text1, const QString &text2); // return elems 0 and 1 are QString, elem 2 is QStringList + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private: + QString diff_linesToCharsMunge(const QString &text, QStringList &lineArray, + QMap<QString, int> &lineHash); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ + private: + void diff_charsToLines(QList<Diff> &diffs, const QStringList &lineArray); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public: + int diff_commonPrefix(const QString &text1, const QString &text2); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public: + int diff_commonSuffix(const QString &text1, const QString &text2); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected: + int diff_commonOverlap(const QString &text1, const QString &text2); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + protected: + QStringList diff_halfMatch(const QString &text1, const QString &text2); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private: + QStringList diff_halfMatchI(const QString &longtext, const QString &shorttext, int i); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemantic(QList<Diff> &diffs); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemanticLossless(QList<Diff> &diffs); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private: + int diff_cleanupSemanticScore(const QString &one, const QString &two); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupEfficiency(QList<Diff> &diffs); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupMerge(QList<Diff> &diffs); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public: + int diff_xIndex(const QList<Diff> &diffs, int loc); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ + public: + QString diff_prettyHtml(const QList<Diff> &diffs); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ + public: + QString diff_text1(const QList<Diff> &diffs); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ + public: + QString diff_text2(const QList<Diff> &diffs); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ + public: + int diff_levenshtein(const QList<Diff> &diffs); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ + public: + QString diff_toDelta(const QList<Diff> &diffs); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws QString If invalid input. + */ + public: + QList<Diff> diff_fromDelta(const QString &text1, const QString &delta); + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public: + int match_main(const QString &text, const QString &pattern, int loc); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected: + int match_bitap(const QString &text, const QString &pattern, int loc); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private: + double match_bitapScore(int e, int x, int loc, const QString &pattern); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected: + QMap<QChar, int> match_alphabet(const QString &pattern); + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected: + void patch_addContext(Patch &patch, const QString &text); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ + public: + QList<Patch> patch_make(const QString &text1, const QString &text2); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + QList<Patch> patch_make(const QList<Diff> &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const QString &text1, const QList<Diff> &diffs). + */ + public: + QList<Patch> patch_make(const QString &text1, const QString &text2, const QList<Diff> &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + QList<Patch> patch_make(const QString &text1, const QList<Diff> &diffs); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ + public: + QList<Patch> patch_deepCopy(QList<Patch> &patches); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ + public: + QPair<QString,QVector<bool> > patch_apply(QList<Patch> &patches, const QString &text); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ + public: + QString patch_addPadding(QList<Patch> &patches); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ + public: + void patch_splitMax(QList<Patch> &patches); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public: + QString patch_toText(const QList<Patch> &patches); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws QString If invalid input. + */ + public: + QList<Patch> patch_fromText(const QString &textline); + + /** + * A safer version of QString.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + private: + static inline QString safeMid(const QString &str, int pos) { + return (pos == str.length()) ? QString("") : str.mid(pos); + } + + /** + * A safer version of QString.mid(pos, len). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + private: + static inline QString safeMid(const QString &str, int pos, int len) { + return (pos == str.length()) ? QString("") : str.mid(pos, len); + } +}; + +#endif // DIFF_MATCH_PATCH_H |
