28 enum { minLengthToMatch = 3,
29 maxComplexity = 16 * 1024 * 1024 };
37 : text (t), start (s), length (len) {}
39 void incrementStart()
noexcept { ++text; ++start; --length; }
54 static void addDeletion (
TextDiff& td,
int index,
int length)
62 static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
69 if (ca != cb || ca == 0)
76 diffRecursively (td, a, b);
79 static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
81 int indexA = 0, indexB = 0;
82 auto len = findLongestCommonSubstring (a.text, a.length, indexA,
83 b.text, b.length, indexB);
85 if (len >= minLengthToMatch)
87 if (indexA > 0 && indexB > 0)
88 diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
89 StringRegion (b.text, b.start, indexB));
91 addDeletion (td, b.start, indexA);
93 addInsertion (td, b.text, b.start, indexB);
95 diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
96 StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
100 if (a.length > 0) addDeletion (td, b.start, a.length);
101 if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
108 if (lenA == 0 || lenB == 0)
111 if (lenA * lenB > maxComplexity)
112 return findCommonSuffix (a, lenA, indexInA,
115 auto scratchSpace =
sizeof (int) * (2 + 2 * (
size_t) lenB);
117 if (scratchSpace < 4096)
119 auto* scratch = (
int*) alloca (scratchSpace);
120 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
123 HeapBlock<int> scratch (scratchSpace);
124 return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
129 const size_t scratchSpace,
int*
const lines)
noexcept
131 zeromem (lines, scratchSpace);
134 auto* l1 = l0 + lenB + 1;
136 int loopsWithoutImprovement = 0;
139 for (
int i = 0; i < lenA; ++i)
141 auto ca = a.getAndAdvance();
144 for (
int j = 0; j < lenB; ++j)
146 if (ca != b2.getAndAdvance())
152 auto len = l0[j] + 1;
155 if (len > bestLength)
157 loopsWithoutImprovement = 0;
165 if (++loopsWithoutImprovement > 100)
171 indexInA -= bestLength - 1;
172 indexInB -= bestLength - 1;
183 while (length < lenA && length < lenB && *a == *b)
190 indexInA = lenA - length;
191 indexInB = lenB - length;
198 TextDiffHelpers::diffSkippingCommonStart (*
this, original, target);
216 return text.replaceSection (start, length, insertedText);
228 :
UnitTest (
"TextDiff class", UnitTestCategories::text)
233 juce_wchar buffer[500] = { 0 };
235 for (
int i = r.
nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
241 buffer[i] = (juce_wchar) (1 + r.
nextInt (0x10ffff - 1));
246 buffer[i] = (juce_wchar) (
'a' + r.
nextInt (3));
249 return CharPointer_UTF32 (buffer);
252 void testDiff (
const String& a,
const String& b)
255 auto result = diff.appliedTo (a);
256 expectEquals (result, b);
259 void runTest()
override
261 beginTest (
"TextDiff");
263 auto r = getRandom();
265 testDiff (String(), String());
266 testDiff (
"x", String());
267 testDiff (String(),
"x");
270 testDiff (
"xxx",
"x");
271 testDiff (
"x",
"xxx");
273 for (
int i = 1000; --i >= 0;)
275 auto s = createString (r);
276 testDiff (s, createString (r));
277 testDiff (s + createString (r), s + createString (r));
282static DiffTests diffTests;
static bool canRepresent(juce_wchar character) noexcept
Returns true if the given unicode character can be represented in this encoding.
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...
A random number generator.
int nextInt() noexcept
Returns the next random 32 bit integer.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
int length() const noexcept
Returns the number of characters in the string.
bool isEmpty() const noexcept
Returns true if the string contains no characters.
CharPointer_UTF8 CharPointerType
This is the character encoding type used internally to store the string.
Calculates and applies a sequence of changes to convert one text string into another.
TextDiff(const String &original, const String &target)
Creates a set of diffs for converting the original string into the target.
Array< Change > changes
The list of changes required to perform the transformation.
String appliedTo(String text) const
Applies this sequence of changes to the original string, producing the target string that was specifi...
This is a base class for classes that perform a unit test.
Describes a change, which can be either an insertion or deletion.
String appliedTo(const String &original) const noexcept
Returns the result of applying this change to a string.
int length
If this change is a deletion, this specifies the number of characters to delete.
int start
Specifies the character index in a string at which text should be inserted or deleted.
String insertedText
If this change is a deletion, this string will be empty; otherwise, it'll be the text that should be ...
bool isDeletion() const noexcept
Returns true if this change is a deletion, or false for an insertion.