1 /**
2  * Natural string comparison
3  *
4  * This module provides functions for easily sorting and comparing strings.
5  * For implementing natural comparison methods in structs, see the NaturalComparable and NaturalPathComparable templates,
6  * or the compareNatural and comparePathsNatural functions for a more manual implementation.
7  *
8  * For sorting ranges of strings, see the compareNaturalSort and comparePathsNaturalSort functions.
9  *
10  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
11  * Authors: Cameron "Herringway" Ross
12  * Copyright: 2015-2016 Cameron "Herringway" Ross
13  */
14 module natcmp;
15 private import std.traits;
16 private struct NaturalCompareChunk(T = const(dchar)[]) {
17 	enum CompareMode { String, Integer }
18 	public T str;
19 	public CompareMode mode;
20 	this(T input) pure @safe nothrow @nogc {
21 		import std.uni : isNumber;
22 		str = input;
23 		mode = input[0].isNumber ? CompareMode.Integer : CompareMode.String;
24 	}
25 	/**
26 	 * Compares two chunks.
27 	 * Integers are assumed to come before non-integers.
28 	 *
29 	 * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b
30 	 */
31 	public int opCmp(const NaturalCompareChunk b) const pure @safe in {
32 		import std.uni : isNumber;
33 		foreach (character; this.str) {
34 			if (this.mode == CompareMode.Integer)
35 				assert(character.isNumber(), "Non-numeric value found in number string");
36 			else
37 				assert(!character.isNumber(), "Numeric value found in non-numeric string");
38 		}
39 	} out(result) {
40 		assert(result <= 1, "Result too large");
41 		assert(result >= -1, "Result too small");
42 	} body {
43 		import std.algorithm : clamp, cmp;
44 		import std.conv : to;
45 		import std.uni : icmp;
46 		if (this.mode != b.mode) {
47 			if ((this.mode == CompareMode.Integer) && (b.mode == CompareMode.String))
48 				return -1;
49 			else
50 				return 1;
51 		} else {
52 			final switch(this.mode) {
53 				case CompareMode.String:
54 					return clamp(icmp(this.str, b.str), -1, 1);
55 				case CompareMode.Integer:
56 					immutable int1 = this.str.to!long;
57 					immutable int2 = b.str.to!long;
58 					if (int1 == int2)
59 						return clamp(icmp(this.str, b.str), -1, 1);
60 					if (int1 > int2)
61 						return 1;
62 					return -1;
63 			}
64 		}
65 	}
66 	public bool opEquals(const NaturalCompareChunk b) const pure @safe {
67 		import std.conv : to;
68 		import std.uni : icmp;
69 		if (this.mode != b.mode)
70 			return false;
71 		else {
72 			final switch(this.mode) {
73 				case CompareMode.String:
74 					return icmp(this.str,b.str) == 0;
75 				case CompareMode.Integer:
76 					return this.str == b.str;
77 			}
78 		}
79 	}
80 	alias str this;
81 	//TODO: implement proper toHash for completeness
82 }
83 @safe pure unittest {
84 	import std.algorithm : equal;
85 	NaturalCompareChunk!dstring chunkA;
86 	NaturalCompareChunk!dstring chunkB;
87 	chunkA = NaturalCompareChunk!dstring("a");
88 	chunkB = NaturalCompareChunk!dstring("a");
89 	assert((chunkA >= chunkB) && (chunkA <= chunkB), "a == a");
90 	chunkA = NaturalCompareChunk!dstring("a");
91 	chunkB = NaturalCompareChunk!dstring("b");
92 	assert(chunkA < chunkB, "a > b");
93 	chunkA = NaturalCompareChunk!dstring("b");
94 	chunkB = NaturalCompareChunk!dstring("a");
95 	assert(chunkA > chunkB, "b < a");
96 	chunkA = NaturalCompareChunk!dstring("1");
97 	chunkB = NaturalCompareChunk!dstring("a");
98 	assert(chunkA < chunkB, "1 > a");
99 	chunkA = NaturalCompareChunk!dstring("a");
100 	chunkB = NaturalCompareChunk!dstring("1");
101 	assert(chunkA > chunkB, "a < 1");
102 	chunkA = NaturalCompareChunk!dstring("1");
103 	chunkB = NaturalCompareChunk!dstring("2");
104 	assert(chunkA < chunkB, "1 > 2");
105 	chunkA = NaturalCompareChunk!dstring("3");
106 	chunkB = NaturalCompareChunk!dstring("1");
107 	assert(chunkA > chunkB, "1 > 3");
108 	chunkA = NaturalCompareChunk!dstring("3");
109 	chunkB = NaturalCompareChunk!dstring("1");
110 	assert(chunkA > chunkB, "3 > 1");
111 	chunkA = NaturalCompareChunk!dstring("c");
112 	chunkB = NaturalCompareChunk!dstring("a");
113 	assert(chunkA > chunkB, "a > c");
114 	chunkA = NaturalCompareChunk!dstring("c");
115 	chunkB = NaturalCompareChunk!dstring("a");
116 	assert(chunkA > chunkB, "c > a");
117 	chunkA = NaturalCompareChunk!dstring( "1");
118 	chunkB = NaturalCompareChunk!dstring("01");
119 	assert(chunkA > chunkB, "1 > 01");
120 	chunkA = NaturalCompareChunk!dstring( "01");
121 	chunkB = NaturalCompareChunk!dstring("001");
122 	assert(chunkA > chunkB, "01 > 001");
123 	auto testAssoc = [chunkA: "a", chunkB: "b"];
124 	assert(testAssoc[chunkA] != testAssoc[chunkB]);
125 }
126 /**
127  * Compares two strings in a way that is natural to humans.
128  * Integers come before non-integers, and integers are compared as if they were numbers instead of strings of characters.
129  * Best used in opCmp overloads.
130  *
131  * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b
132  */
133 int compareNatural(T)(const T a, const T b) if (isSomeString!T) out(result) {
134 		assert(result <= 1, "Result too large");
135 		assert(result >= -1, "Result too small");
136 } body {
137 	import std.algorithm : min, map, chunkBy;
138 	import std.uni : isNumber;
139 	import std.range : walkLength, zip;
140 	import std.conv : to;
141 	auto chunksA = a
142 		.chunkBy!isNumber
143 		.map!(x => NaturalCompareChunk!T(x[1].to!T));
144 	auto chunksB = b
145 		.chunkBy!isNumber
146 		.map!(x => NaturalCompareChunk!T(x[1].to!T));
147 	foreach (chunkA, chunkB; zip(chunksA, chunksB)) {
148 		if (chunkA != chunkB)
149 			return chunkA > chunkB ? 1 : -1;
150 	}
151 	if (!chunksA.empty && chunksB.empty)
152 		return 1;
153 	if (chunksA.empty && !chunksB.empty)
154 		return -1;
155 	return 0;
156 }
157 @system pure unittest {
158 	assert(compareNatural("10", "1") == 1, "1 > 10");
159 	assert(compareNatural("010", "1") == 1, "1 > 010");
160 	assert(compareNatural("10", "01") == 1, "01 > 10");
161 	assert(compareNatural("10_1", "1_1") == 1, "1_1 > 10_1");
162 	assert(compareNatural("10_1", "10_2") == -1, "10_1 > 10_2");
163 	assert(compareNatural("10_2", "10_1") == 1, "10_1 > 10_2");
164 	assert(compareNatural("a", "a1") == -1, "a > a1");
165 	assert(compareNatural("a1", "a") == 1, "a1 < a");
166 	assert(compareNatural("1000", "something") == -1, "1000 > something");
167 	assert(compareNatural("something", "1000") == 1, "something < 1000");
168 	assert(compareNatural("十", "〇") == 1, "Japanese: 10 > 0");
169 	assert(compareNatural("עֶ֫שֶׂר", "אֶ֫פֶס") == 1, "Biblical Hebrew: 10 > 0");
170 	assert(compareNatural("PG10", "Pg11") != compareNatural("Pg11", "PG10"));
171 	assert(compareNatural("01", "1") != compareNatural("1", "01"));
172 }
173 private mixin template NaturalComparableCommon(alias comparator, alias T) if (!isCallable!T || (functionAttributes!T & (FunctionAttribute.safe | FunctionAttribute.nothrow_ | FunctionAttribute.const_))) {
174 	import std.string : split;
175 	alias __NaturalComparabletype = typeof(this);
176 	enum __NaturalComparablemember = T.stringof.split(".")[$-1].split("(")[0];
177     int opCmp(const __NaturalComparabletype b) const {
178     	return comparator(T, __traits(getMember, b, __NaturalComparablemember));
179     }
180     bool opEquals(const __NaturalComparabletype b) const {
181     	return T == __traits(getMember, b, __NaturalComparablemember);
182     }
183     size_t toHash() const nothrow {
184     	return hashOf(T);
185     }
186 }
187 /**
188  * Automatically generates natural-comparing opCmp, opEquals and toHash methods for a particular property or method.
189  * Methods must be @safe, nothrow, and const.
190  */
191 mixin template NaturalComparable(alias T) {
192 	mixin NaturalComparableCommon!(compareNatural, T);
193 }
194 ///
195 @system pure unittest {
196 	struct SomeStruct {
197 		dstring someText;
198 		mixin NaturalComparable!someText;
199 	}
200 	struct SomeStructWithFunction {
201 		dstring _value;
202 		dstring something() const nothrow @safe {
203 			return _value;
204 		}
205 		mixin NaturalComparable!something;
206 	}
207 	assert(SomeStruct("100") > SomeStruct("2"));
208 	assert(SomeStruct("100") == SomeStruct("100"));
209 	assert(SomeStructWithFunction("100") > SomeStructWithFunction("2"));
210 	assert(SomeStructWithFunction("100") == SomeStructWithFunction("100"));
211 }
212 @system pure unittest {
213 	struct SomeStruct {
214 		dstring someText;
215 		mixin NaturalComparable!someText;
216 	}
217 	struct SomeStructWithFunction {
218 		dstring _value;
219 		dstring something() const nothrow @safe {
220 			return _value;
221 		}
222 		mixin NaturalComparable!something;
223 	}
224 	{
225 		auto data = [SomeStruct("1") : "test", SomeStruct("2"): "test2"];
226 		assert(data[SomeStruct("1")] == "test");
227 		assert(data[SomeStruct("2")] == "test2");
228 	}
229 	{
230 		auto data = [SomeStructWithFunction("1") : "test", SomeStructWithFunction("2"): "test2"];
231 		assert(data[SomeStructWithFunction("1")] == "test");
232 		assert(data[SomeStructWithFunction("2")] == "test2");
233 	}
234 }
235 
236 /**
237 * Natural string comparison function for use with phobos's sorting algorithm
238 *
239 * Returns: true if a < b
240 */
241 bool compareNaturalSort(T)(const T a, const T b) if (isSomeString!T) {
242 	return compareNatural(a,b) < 0;
243 }
244 ///
245 @system pure unittest {
246 	import std.algorithm : sort, equal;
247 	import std.array : array;
248 	assert(compareNaturalSort("a", "b") == true);
249 	assert(equal(sort!compareNaturalSort(["0", "10", "1"]), ["0", "1", "10"]));
250 	assert(equal(sort!compareNaturalSort(["a", "c", "b"]), ["a", "b", "c"]));
251 	assert(equal(sort!compareNaturalSort(["a1", "a"]), ["a", "a1"]));
252 }
253 /**
254  * Compares path strings naturally. Comparing paths naturally requires path separators to be treated specially.
255  * Intended for usage in opCmp overloads.
256  *
257  * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b
258  */
259 int comparePathsNatural(T)(const T pathA, const T pathB) if (isSomeString!T) in {
260 	import std.path : isValidPath;
261 	import std.utf : toUTF8;
262 	assert(pathA.isValidPath(), ("First path ("~pathA~") is invalid").toUTF8);
263 	assert(pathB.isValidPath(), ("Second path ("~pathB~") is invalid").toUTF8);
264 } out(result) {
265 	assert(result <= 1, "Result too large");
266 	assert(result >= -1, "Result too small");
267 } body {
268 	import std.path : pathSplitter;
269 	import std.range : zip;
270 	int outVal = 0;
271 	foreach (componentA, componentB; zip(pathSplitter(pathA), pathSplitter(pathB))) {
272 		outVal = compareNatural(componentA, componentB);
273 		if (outVal != 0)
274 			break;
275 	}
276 	return outVal;
277 }
278 @system pure unittest {
279 	import std.path : buildPath;
280 	assert(comparePathsNatural("a/b/c", "a/b/d") == -1, "Final path component sorting failed");
281 	assert(comparePathsNatural("a/b/c", "a/b/d") == -1, "Final path component sorting failed");
282 	assert(comparePathsNatural("a/b/c", "a/b/c") == 0, "Identical path sorting failed");
283 	assert(comparePathsNatural("a/b/c", "a/b/a") == 1, "Final path component sorting failed (2)");
284 	assert(comparePathsNatural("a/b/c", "a/c/c") == -1, "Middle path component sorting failed");
285 	assert(comparePathsNatural("a/c/c", "a/b/c") == 1, "Middle path component sorting failed (2)");
286 	assert(comparePathsNatural("a/b/c", "b/b/c") == -1, "First path component sorting failed");
287 	assert(comparePathsNatural("b/b/c", "a/b/c") == 1, "First path component sorting failed (2)");
288 	assert(comparePathsNatural("a/b", "a1/b") == -1, "Appended chunk sorting failed");
289 	assert(comparePathsNatural("a1/b", "a/b") == 1, "Appended chunk sorting failed (2)");
290 	assert(comparePathsNatural(buildPath("a", "b", "c"), buildPath("a", "b", "d")) == -1, "failure to sort rangified path");
291 }
292 /**
293  * Automatically generates natural-path-comparing opCmp, opEquals and toHash methods for a particular property or method.
294  * Methods must be @safe, nothrow, and const.
295  */
296 mixin template NaturalPathComparable(alias T) {
297 	mixin NaturalComparableCommon!(comparePathsNatural, T);
298 }
299 ///
300 @system pure unittest {
301 	struct SomePathStruct {
302 		dstring somePathText;
303 		mixin NaturalPathComparable!somePathText;
304 	}
305 	struct SomePathStructWithFunction {
306 		dstring _value;
307 		dstring path() const @safe nothrow {
308 			return _value;
309 		}
310 		mixin NaturalPathComparable!path;
311 	}
312 	assert(SomePathStruct("a/b/c") < SomePathStruct("a/b/d"));
313 	assert(SomePathStructWithFunction("a/b/c") < SomePathStructWithFunction("a/b/d"));
314 }
315 /**
316  * Path comparison function for use with phobos's sorting algorithm
317  *
318  * Returns: true if a < b
319  */
320 bool comparePathsNaturalSort(T)(const T a, const T b) if (isSomeString!T) {
321 	return comparePathsNatural(b,a) > 0;
322 }
323 ///
324 @system pure unittest {
325 	import std.algorithm : sort, equal;
326 	import std.array : array;
327 	assert(comparePathsNaturalSort("a/b", "a1/b") == true);
328 	assert(equal(sort!comparePathsNaturalSort(["a/b/c", "a/b/e", "a/b/d"]), ["a/b/c", "a/b/d", "a/b/e"]));
329 	assert(equal(sort!comparePathsNaturalSort(["a1", "a"]), ["a", "a1"]));
330 	assert(equal(sort!comparePathsNaturalSort(["a1/b", "a/b"]), ["a/b", "a1/b"]));
331 }