1 module natcmp;
2 
3 nothrow @safe:
4 private enum compareMode { Undefined, String, Integer }; ///Marks the chunk type
5 /**
6  * A chunk of text, representing either a string of numbers or a string of other characters.
7  * Authors: Cameron "Herringway" Ross
8  */
9 private struct naturalCompareChunk {
10 	nothrow @safe pure:
11 	public dstring str;
12 	public compareMode mode = compareMode.Undefined;
13 	/**
14 	 * Compares two chunks.
15 	 * Integers are assumed to come before non-integers.
16 	 * Returns: 0 on failure, [-1,1] on success
17 	 */
18 	public int opCmp(ref const naturalCompareChunk b) in {
19 		import std.uni : isNumber;
20 		assert(this.mode != compareMode.Undefined, "Undefined chunk type (A)");
21 		assert(   b.mode != compareMode.Undefined, "Undefined chunk type (B)");
22 		foreach (character; str) {
23 			if (this.mode == compareMode.Integer)
24 				assert(character.isNumber(), "Non-numeric value found in number string");
25 			else
26 				assert(!character.isNumber(), "Numeric value found in non-numeric string");
27 		}
28 	} out(result) {
29 		assert(result <= 1, "Result too large");
30 		assert(result >= -1, "Result too small");
31 	} body {
32 		import std.algorithm : min, max;
33 		import std.conv : to;
34 		import std.uni : icmp;
35 		try {
36 			if ((this.mode == compareMode.Integer) && (b.mode == compareMode.String)) {
37 				return -1;
38 			} else if ((this.mode == compareMode.String) && (b.mode == compareMode.Integer)) {
39 				return 1;
40 			} else if (this.mode == compareMode.String) {
41 				return min(1, max(-1, icmp(this.str, b.str)));
42 			} else if (this.mode == compareMode.Integer) {
43 				auto int1 = to!long(this.str);
44 				auto int2 = to!long(b.str);
45 				if (int1 == int2)
46 					return min(1, max(-1, icmp(this.str, b.str)));
47 				return cast(int)max(-1,min(1,int1-int2));
48 			}
49 		} catch (Exception) {
50 			return 0;
51 		}
52 		assert(false);
53 	}
54 }
55 unittest {
56 	naturalCompareChunk chunkA;
57 	naturalCompareChunk chunkB;
58 	chunkA = naturalCompareChunk("a", compareMode.String);
59 	chunkB = naturalCompareChunk("a", compareMode.String);
60 	assertEqual(chunkA.opCmp(chunkB), 0, "Equal chunks tested as inequal");
61 	chunkA = naturalCompareChunk("a", compareMode.String);
62 	chunkB = naturalCompareChunk("b", compareMode.String);
63 	assert(chunkA < chunkB, "a > b");
64 	chunkA = naturalCompareChunk("b", compareMode.String);
65 	chunkB = naturalCompareChunk("a", compareMode.String);
66 	assert(chunkA > chunkB, "b < a");
67 	chunkA = naturalCompareChunk("1", compareMode.Integer);
68 	chunkB = naturalCompareChunk("a", compareMode.String);
69 	assert(chunkA < chunkB, "1 > a");
70 	chunkA = naturalCompareChunk("a", compareMode.String);
71 	chunkB = naturalCompareChunk("1", compareMode.Integer);
72 	assert(chunkA > chunkB, "a < 1");
73 	chunkA = naturalCompareChunk("1", compareMode.Integer);
74 	chunkB = naturalCompareChunk("2", compareMode.Integer);
75 	assert(chunkA < chunkB, "1 > 2");
76 	chunkA = naturalCompareChunk("1", compareMode.Integer);
77 	chunkB = naturalCompareChunk("3", compareMode.Integer);
78 	assertEqual(chunkA.opCmp(chunkB), -1, "(1 > 3) > 1");
79 	chunkA = naturalCompareChunk("3", compareMode.Integer);
80 	chunkB = naturalCompareChunk("1", compareMode.Integer);
81 	assertEqual(chunkA.opCmp(chunkB), 1, "(3 > 1) < -1");
82 	chunkA = naturalCompareChunk("a", compareMode.String);
83 	chunkB = naturalCompareChunk("c", compareMode.String);
84 	assertEqual(chunkA.opCmp(chunkB), -1, "(a > c) > 1");
85 	chunkA = naturalCompareChunk("c", compareMode.String);
86 	chunkB = naturalCompareChunk("a", compareMode.String);
87 	assertEqual(chunkA.opCmp(chunkB), 1, "(c > a) > 1");
88 	chunkA = naturalCompareChunk( "1", compareMode.Integer);
89 	chunkB = naturalCompareChunk("01", compareMode.Integer);
90 	assertEqual(chunkA.opCmp(chunkB), 1, "(1 > 01) > 1");
91 	chunkA = naturalCompareChunk( "01", compareMode.Integer);
92 	chunkB = naturalCompareChunk("001", compareMode.Integer);
93 	assertEqual(chunkA.opCmp(chunkB), 1, "(01 > 001) > 1");
94 }
95 /**
96  * Splits a string into component chunks. Each component is treated either as an integer or a string.
97  * Returns: A list of prepared string chunks
98  */
99 private naturalCompareChunk[] buildChunkList(inout dchar[] str) pure in {
100 	//Unsure if there are any constraints on input
101 } out(result) {
102 	foreach (chunk; result)
103 		assert(chunk.mode != compareMode.Undefined, "Undefined chunk type");
104 } body {
105 	import std.uni : isNumber;
106 	naturalCompareChunk tempChunk;
107 	naturalCompareChunk[] output;
108 	foreach (character; str) {
109 		if (character.isNumber()) {
110 			if (tempChunk.mode == compareMode.Integer)
111 				tempChunk.str ~= character;
112 			if (tempChunk.mode == compareMode.Undefined) {
113 				tempChunk.str = [character];
114 				tempChunk.mode = compareMode.Integer;
115 			}
116 			if (tempChunk.mode == compareMode.String) {
117 				output ~= tempChunk;
118 				tempChunk = naturalCompareChunk([character],compareMode.Integer);
119 			}
120 		} else {
121 			if (tempChunk.mode == compareMode.String)
122 				tempChunk.str ~= character;
123 			if (tempChunk.mode == compareMode.Undefined) {
124 				tempChunk.str = [character];
125 				tempChunk.mode = compareMode.String;
126 			}
127 			if (tempChunk.mode == compareMode.Integer) {
128 				output  ~= tempChunk;
129 				tempChunk = naturalCompareChunk([character],compareMode.String);
130 			}
131 		}
132 	}
133 	output ~= tempChunk;
134 	return output;
135 }
136 /**
137  * Compares two strings in a way that is natural to humans. 
138  * Integers come before non-integers, and integers are compared as if they were numbers instead of strings of characters.
139  * Intended for usage in opCmp overloads.
140  * Examples:
141  * --------------------
142  * struct someStruct {
143  *     dstring someText;
144  *     int opCmp(someStruct b) {
145  *          return compareNatural(this.someText, b.someText);
146  *     }
147  * }
148  * --------------------
149  * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b
150  */
151 int compareNatural(inout dchar[] a, inout dchar[] b) pure in {
152 
153 	} out(result) {
154 		assert(result <= 1, "Result too large");
155 		assert(result >= -1, "Result too small");
156 	} body {
157 	import std.algorithm : min;
158 	naturalCompareChunk[] chunkA = buildChunkList(a);
159 	naturalCompareChunk[] chunkB = buildChunkList(b);
160 	int cmpVal;
161 	foreach (index; 0..min(chunkA.length, chunkB.length)) {
162 		cmpVal = chunkA[index].opCmp(chunkB[index]);
163 		if (cmpVal != 0)
164 			return cmpVal;
165 	}
166 	if (chunkA.length > chunkB.length)
167 		return 1;
168 	if (chunkA.length < chunkB.length)
169 		return -1;
170 	return 0;
171 }
172 /**
173  * Overload for 8-bit strings
174  */
175 int compareNatural(inout char[] a, inout char[] b) pure {
176 	import std.utf : toUTF32;
177 	try {
178 		return compareNatural(a.toUTF32(), b.toUTF32());
179 	} catch (Exception) { return 0; }
180 }
181 /**
182  * Overload for 16-bit strings
183  */
184 int compareNatural(inout wchar[] a, inout wchar[] b) pure {
185 	import std.utf : toUTF32;
186 	try {
187 		return compareNatural(a.toUTF32(), b.toUTF32());
188 	} catch (Exception) { return 0; }
189 }
190 unittest {
191 	assertEqual(compareNatural("10", "1"), 1, "1 > 10");
192 	assertEqual(compareNatural("010", "1"), 1, "1 > 010");
193 	assertEqual(compareNatural("10", "01"), 1, "01 > 10");
194 	assertEqual(compareNatural("10_1", "1_1"), 1, "1_1 > 10_1");
195 	assertEqual(compareNatural("10_1", "10_2"), -1, "10_1 > 10_2");
196 	assertEqual(compareNatural("10_2", "10_1"), 1, "10_1 > 10_2");
197 	assertEqual(compareNatural("a", "a1"), -1, "a > a1");
198 	assertEqual(compareNatural("a1", "a"), 1, "a1 < a");
199 	assertEqual(compareNatural("1000", "something"), -1, "1000 > something");
200 	assertEqual(compareNatural("something", "1000"), 1, "something < 1000");
201 	assertEqual(compareNatural("十", "〇"), 1, "Japanese: 10 > 0");
202 	assertEqual(compareNatural("עֶ֫שֶׂר", "אֶ֫פֶס"), 1, "Biblical Hebrew: 10 > 0");
203 }
204 
205 /** 
206 * Natural string comparison function for use with phobos's sorting algorithm
207 * Examples:
208 * --------------------
209 * assert(sort!compareNaturalSort(array(["0", "10", "1"])) == ["0", "1", "10"]);
210 * assert(sort!compareNaturalSort(array(["a", "c", "b"])) == ["a", "b", "c"]);
211 * assert(sort!compareNaturalSort(array(["a1", "a"])) == ["a", "a1"]);
212 * --------------------
213 * Returns: true if a < b
214 */
215 bool compareNaturalSort(inout(dchar[]) a, inout(dchar[]) b) pure {
216 	return compareNatural(b,a) > 0;
217 }
218 /**
219  * Overload for 8-bit strings
220  */
221 bool compareNaturalSort(inout(char[]) a, inout(char[]) b) pure {
222 	import std.utf : toUTF32;
223 	try {
224 		return compareNaturalSort(a.toUTF32(), b.toUTF32());
225 	} catch (Exception) { return 0; }
226 }
227 /**
228  * Overload for 16-bit strings
229  */
230 bool compareNaturalSort(inout(wchar[]) a, inout(wchar[]) b) pure {
231 	import std.utf : toUTF32;
232 	try {
233 		return compareNaturalSort(a.toUTF32(), b.toUTF32());
234 	} catch (Exception) { return 0; }
235 }
236 @system unittest {
237 	import std.algorithm : sort;
238 	import std.array : array;
239 	assertEqual(compareNaturalSort("a", "b"), true);
240 	assertEqual(array(sort!compareNaturalSort(["0", "10", "1"])), ["0", "1", "10"]);
241 	assertEqual(array(sort!compareNaturalSort(["a", "c", "b"])), ["a", "b", "c"]);
242 	assertEqual(array(sort!compareNaturalSort(["a1", "a"])), ["a", "a1"]);
243 }
244 /**
245  * Compares path strings naturally. Comparing paths naturally requires path separators to be treated specially.
246  * Intended for usage in opCmp overloads.
247  * Examples:
248  * --------------------
249  * struct someStruct {
250  *     string someText;
251  *     int opCmp(someStruct b) {
252  *          return comparePathsNatural(this.someText, b.someText);
253  *     }
254  * }
255  * --------------------
256  * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b
257  */
258 int comparePathsNatural(inout(dchar[]) pathA, inout(dchar[]) pathB) pure in {
259 	import std.path : isValidPath;
260 	assert(pathA.isValidPath(), "First path is invalid");
261 	assert(pathB.isValidPath(), "Second path is invalid");	
262 } out(result) {
263 	assert(result <= 1, "Result too large");
264 	assert(result >= -1, "Result too small");
265 } body {
266 	import std.algorithm : min;
267 	import std.array : array;
268 	import std.path : pathSplitter;
269 	auto pathSplitA = array(pathSplitter(pathA));
270 	auto pathSplitB = array(pathSplitter(pathB));
271 	int outVal = 0;
272 	foreach (index; 0..min(pathSplitA.length, pathSplitB.length)) {
273 		outVal = compareNatural(pathSplitA[index], pathSplitB[index]);
274 		if (outVal != 0)
275 			break;
276 	}
277 	return outVal;
278 }
279 /**
280  * Overload for 8-bit strings
281  */
282 int comparePathsNatural(inout(char[]) pathA, inout(char[]) pathB) pure {
283 	import std.utf : toUTF32;
284 	try {
285 		return comparePathsNatural(pathA.toUTF32(), pathB.toUTF32());
286 	} catch (Exception) { return 0; }
287 }
288 /**
289  * Overload for 8-bit strings
290  */
291 int comparePathsNatural(inout(wchar[]) pathA, inout(wchar[]) pathB) pure {
292 	import std.utf : toUTF32;
293 	try {
294 		return comparePathsNatural(pathA.toUTF32(), pathB.toUTF32());
295 	} catch (Exception) { return 0; }
296 }
297 /** 
298  * Path comparison function for use with phobos's sorting algorithm
299  * Examples:
300  * --------------------
301  * assert(array(sort!comparePathsNaturalSort(["a/b/c", "a/b/e", "a/b/d"])) == ["a/b/c", "a/b/d", "a/b/e"]);
302  * assert(array(sort!comparePathsNaturalSort(["a1", "a"])) == ["a", "a1"]);
303  * assert(array(sort!comparePathsNaturalSort(["a1/b", "a/b"])) == ["a/b", "a1/b"]);
304  * --------------------
305  * Returns: true if a < b
306  */
307 bool comparePathsNaturalSort(inout(dchar[]) a, inout(dchar[]) b) pure {
308 	return comparePathsNatural(b,a) > 0;
309 }
310 
311 /**
312  * Overload for 8-bit strings
313  */
314 bool comparePathsNaturalSort(inout(char[]) a, inout(char[]) b) pure {
315 	import std.utf : toUTF32;
316 	try {
317 		return comparePathsNaturalSort(a.toUTF32(), b.toUTF32());
318 	} catch (Exception) { return 0; }
319 }
320 /**
321  * Overload for 8-bit strings
322  */
323 bool comparePathsNaturalSort(inout(wchar[]) a, inout(wchar[]) b) pure {
324 	import std.utf : toUTF32;
325 	try {
326 		return comparePathsNaturalSort(a.toUTF32(), b.toUTF32());
327 	} catch (Exception) { return 0; }
328 }
329 @system unittest {
330 	import std.algorithm : sort;
331 	import std.array : array;
332 	assertEqual(comparePathsNaturalSort("a/b", "a1/b"), true);
333 	assertEqual(array(sort!comparePathsNaturalSort(["a/b/c", "a/b/e", "a/b/d"])), ["a/b/c", "a/b/d", "a/b/e"]);
334 	assertEqual(array(sort!comparePathsNaturalSort(["a1", "a"])), ["a", "a1"]);
335 	assertEqual(array(sort!comparePathsNaturalSort(["a1/b", "a/b"])), ["a/b", "a1/b"]);
336 }
337 unittest {
338 	assertEqual(comparePathsNatural("a/b/c", "a/b/d"), -1, "Final path component sorting failed");
339 	assertEqual(comparePathsNatural("a/b/c", "a/b/d"), -1, "Final path component sorting failed");
340 	assertEqual(comparePathsNatural("a/b/c", "a/b/c"), 0, "Identical path sorting failed");
341 	assertEqual(comparePathsNatural("a/b/c", "a/b/a"), 1, "Final path component sorting failed (2)");
342 	assertEqual(comparePathsNatural("a/b/c", "a/c/c"), -1, "Middle path component sorting failed");
343 	assertEqual(comparePathsNatural("a/c/c", "a/b/c"), 1, "Middle path component sorting failed (2)");
344 	assertEqual(comparePathsNatural("a/b/c", "b/b/c"), -1, "First path component sorting failed");
345 	assertEqual(comparePathsNatural("b/b/c", "a/b/c"), 1, "First path component sorting failed (2)");
346 	assertEqual(comparePathsNatural("a/b", "a1/b"), -1, "Appended chunk sorting failed");
347 	assertEqual(comparePathsNatural("a1/b", "a/b"), 1, "Appended chunk sorting failed (2)");
348 }
349 private void assertEqual(T,U)(lazy T valA, lazy U valB, string message = "") pure {
350 	import std.string : format;
351 	try {
352 		if (valA != valB)
353 			assert(true, format("%s: %s != %s",message, valA, valB));
354 	} catch (Exception e) {
355 		assert(true, e.msg);
356 	}
357 }