1 /** 2 * Natural string comparison 3 * 4 * This module provides functions for easily sorting and comparing strings. 5 * For implementing natural comparison methods in structs, see the NaturalComparable and NaturalPathComparable templates, 6 * or the compareNatural and comparePathsNatural functions for a more manual implementation. 7 * 8 * For sorting ranges of strings, see the compareNaturalSort and comparePathsNaturalSort functions. 9 * 10 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 11 * Authors: Cameron "Herringway" Ross 12 * Copyright: 2015-2016 Cameron "Herringway" Ross 13 */ 14 module natcmp; 15 private import std.traits; 16 private struct NaturalCompareChunk(T = const(dchar)[]) { 17 enum CompareMode { String, Integer } 18 public T str; 19 public CompareMode mode; 20 this(T input) pure @safe nothrow @nogc { 21 import std.uni : isNumber; 22 str = input; 23 mode = input[0].isNumber ? CompareMode.Integer : CompareMode.String; 24 } 25 /** 26 * Compares two chunks. 27 * Integers are assumed to come before non-integers. 28 * 29 * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b 30 */ 31 public int opCmp(const NaturalCompareChunk b) const pure @safe in { 32 import std.uni : isNumber; 33 foreach (character; this.str) { 34 if (this.mode == CompareMode.Integer) 35 assert(character.isNumber(), "Non-numeric value found in number string"); 36 else 37 assert(!character.isNumber(), "Numeric value found in non-numeric string"); 38 } 39 } out(result) { 40 assert(result <= 1, "Result too large"); 41 assert(result >= -1, "Result too small"); 42 } body { 43 import std.algorithm : clamp, cmp; 44 import std.conv : to; 45 import std.uni : icmp; 46 if (this.mode != b.mode) { 47 if ((this.mode == CompareMode.Integer) && (b.mode == CompareMode.String)) 48 return -1; 49 else 50 return 1; 51 } else { 52 final switch(this.mode) { 53 case CompareMode.String: 54 return clamp(icmp(this.str, b.str), -1, 1); 55 case CompareMode.Integer: 56 immutable int1 = this.str.to!long; 57 immutable int2 = b.str.to!long; 58 if (int1 == int2) 59 return clamp(icmp(this.str, b.str), -1, 1); 60 if (int1 > int2) 61 return 1; 62 return -1; 63 } 64 } 65 } 66 public bool opEquals(const NaturalCompareChunk b) const pure @safe { 67 import std.conv : to; 68 import std.uni : icmp; 69 if (this.mode != b.mode) 70 return false; 71 else { 72 final switch(this.mode) { 73 case CompareMode.String: 74 return icmp(this.str,b.str) == 0; 75 case CompareMode.Integer: 76 return this.str == b.str; 77 } 78 } 79 } 80 alias str this; 81 //TODO: implement proper toHash for completeness 82 } 83 @safe pure unittest { 84 import std.algorithm : equal; 85 NaturalCompareChunk!dstring chunkA; 86 NaturalCompareChunk!dstring chunkB; 87 chunkA = NaturalCompareChunk!dstring("a"); 88 chunkB = NaturalCompareChunk!dstring("a"); 89 assert((chunkA >= chunkB) && (chunkA <= chunkB), "a == a"); 90 chunkA = NaturalCompareChunk!dstring("a"); 91 chunkB = NaturalCompareChunk!dstring("b"); 92 assert(chunkA < chunkB, "a > b"); 93 chunkA = NaturalCompareChunk!dstring("b"); 94 chunkB = NaturalCompareChunk!dstring("a"); 95 assert(chunkA > chunkB, "b < a"); 96 chunkA = NaturalCompareChunk!dstring("1"); 97 chunkB = NaturalCompareChunk!dstring("a"); 98 assert(chunkA < chunkB, "1 > a"); 99 chunkA = NaturalCompareChunk!dstring("a"); 100 chunkB = NaturalCompareChunk!dstring("1"); 101 assert(chunkA > chunkB, "a < 1"); 102 chunkA = NaturalCompareChunk!dstring("1"); 103 chunkB = NaturalCompareChunk!dstring("2"); 104 assert(chunkA < chunkB, "1 > 2"); 105 chunkA = NaturalCompareChunk!dstring("3"); 106 chunkB = NaturalCompareChunk!dstring("1"); 107 assert(chunkA > chunkB, "1 > 3"); 108 chunkA = NaturalCompareChunk!dstring("3"); 109 chunkB = NaturalCompareChunk!dstring("1"); 110 assert(chunkA > chunkB, "3 > 1"); 111 chunkA = NaturalCompareChunk!dstring("c"); 112 chunkB = NaturalCompareChunk!dstring("a"); 113 assert(chunkA > chunkB, "a > c"); 114 chunkA = NaturalCompareChunk!dstring("c"); 115 chunkB = NaturalCompareChunk!dstring("a"); 116 assert(chunkA > chunkB, "c > a"); 117 chunkA = NaturalCompareChunk!dstring( "1"); 118 chunkB = NaturalCompareChunk!dstring("01"); 119 assert(chunkA > chunkB, "1 > 01"); 120 chunkA = NaturalCompareChunk!dstring( "01"); 121 chunkB = NaturalCompareChunk!dstring("001"); 122 assert(chunkA > chunkB, "01 > 001"); 123 auto testAssoc = [chunkA: "a", chunkB: "b"]; 124 assert(testAssoc[chunkA] != testAssoc[chunkB]); 125 } 126 /** 127 * Compares two strings in a way that is natural to humans. 128 * Integers come before non-integers, and integers are compared as if they were numbers instead of strings of characters. 129 * Best used in opCmp overloads. 130 * 131 * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b 132 */ 133 int compareNatural(T)(const T a, const T b) if (isSomeString!T) out(result) { 134 assert(result <= 1, "Result too large"); 135 assert(result >= -1, "Result too small"); 136 } body { 137 import std.algorithm : min, map, chunkBy; 138 import std.uni : isNumber; 139 import std.range : walkLength, zip; 140 import std.conv : to; 141 auto chunksA = a 142 .chunkBy!isNumber 143 .map!(x => NaturalCompareChunk!T(x[1].to!T)); 144 auto chunksB = b 145 .chunkBy!isNumber 146 .map!(x => NaturalCompareChunk!T(x[1].to!T)); 147 foreach (chunkA, chunkB; zip(chunksA, chunksB)) { 148 if (chunkA != chunkB) 149 return chunkA > chunkB ? 1 : -1; 150 } 151 if (!chunksA.empty && chunksB.empty) 152 return 1; 153 if (chunksA.empty && !chunksB.empty) 154 return -1; 155 return 0; 156 } 157 @system pure unittest { 158 assert(compareNatural("10", "1") == 1, "1 > 10"); 159 assert(compareNatural("010", "1") == 1, "1 > 010"); 160 assert(compareNatural("10", "01") == 1, "01 > 10"); 161 assert(compareNatural("10_1", "1_1") == 1, "1_1 > 10_1"); 162 assert(compareNatural("10_1", "10_2") == -1, "10_1 > 10_2"); 163 assert(compareNatural("10_2", "10_1") == 1, "10_1 > 10_2"); 164 assert(compareNatural("a", "a1") == -1, "a > a1"); 165 assert(compareNatural("a1", "a") == 1, "a1 < a"); 166 assert(compareNatural("1000", "something") == -1, "1000 > something"); 167 assert(compareNatural("something", "1000") == 1, "something < 1000"); 168 assert(compareNatural("十", "〇") == 1, "Japanese: 10 > 0"); 169 assert(compareNatural("עֶ֫שֶׂר", "אֶ֫פֶס") == 1, "Biblical Hebrew: 10 > 0"); 170 assert(compareNatural("PG10", "Pg11") != compareNatural("Pg11", "PG10")); 171 assert(compareNatural("01", "1") != compareNatural("1", "01")); 172 } 173 private mixin template NaturalComparableCommon(alias comparator, alias T) if (!isCallable!T || (functionAttributes!T & (FunctionAttribute.safe | FunctionAttribute.nothrow_ | FunctionAttribute.const_))) { 174 import std.string : split; 175 alias __NaturalComparabletype = typeof(this); 176 enum __NaturalComparablemember = T.stringof.split(".")[$-1].split("(")[0]; 177 int opCmp(const __NaturalComparabletype b) const { 178 return comparator(T, __traits(getMember, b, __NaturalComparablemember)); 179 } 180 bool opEquals(const __NaturalComparabletype b) const { 181 return T == __traits(getMember, b, __NaturalComparablemember); 182 } 183 size_t toHash() const nothrow { 184 return hashOf(T); 185 } 186 } 187 /** 188 * Automatically generates natural-comparing opCmp, opEquals and toHash methods for a particular property or method. 189 * Methods must be @safe, nothrow, and const. 190 */ 191 mixin template NaturalComparable(alias T) { 192 mixin NaturalComparableCommon!(compareNatural, T); 193 } 194 /// 195 @system pure unittest { 196 struct SomeStruct { 197 dstring someText; 198 mixin NaturalComparable!someText; 199 } 200 struct SomeStructWithFunction { 201 dstring _value; 202 dstring something() const nothrow @safe { 203 return _value; 204 } 205 mixin NaturalComparable!something; 206 } 207 assert(SomeStruct("100") > SomeStruct("2")); 208 assert(SomeStruct("100") == SomeStruct("100")); 209 assert(SomeStructWithFunction("100") > SomeStructWithFunction("2")); 210 assert(SomeStructWithFunction("100") == SomeStructWithFunction("100")); 211 } 212 @system pure unittest { 213 struct SomeStruct { 214 dstring someText; 215 mixin NaturalComparable!someText; 216 } 217 struct SomeStructWithFunction { 218 dstring _value; 219 dstring something() const nothrow @safe { 220 return _value; 221 } 222 mixin NaturalComparable!something; 223 } 224 { 225 auto data = [SomeStruct("1") : "test", SomeStruct("2"): "test2"]; 226 assert(data[SomeStruct("1")] == "test"); 227 assert(data[SomeStruct("2")] == "test2"); 228 } 229 { 230 auto data = [SomeStructWithFunction("1") : "test", SomeStructWithFunction("2"): "test2"]; 231 assert(data[SomeStructWithFunction("1")] == "test"); 232 assert(data[SomeStructWithFunction("2")] == "test2"); 233 } 234 } 235 236 /** 237 * Natural string comparison function for use with phobos's sorting algorithm 238 * 239 * Returns: true if a < b 240 */ 241 bool compareNaturalSort(T)(const T a, const T b) if (isSomeString!T) { 242 return compareNatural(a,b) < 0; 243 } 244 /// 245 @system pure unittest { 246 import std.algorithm : sort, equal; 247 import std.array : array; 248 assert(compareNaturalSort("a", "b") == true); 249 assert(equal(sort!compareNaturalSort(["0", "10", "1"]), ["0", "1", "10"])); 250 assert(equal(sort!compareNaturalSort(["a", "c", "b"]), ["a", "b", "c"])); 251 assert(equal(sort!compareNaturalSort(["a1", "a"]), ["a", "a1"])); 252 } 253 /** 254 * Compares path strings naturally. Comparing paths naturally requires path separators to be treated specially. 255 * Intended for usage in opCmp overloads. 256 * 257 * Returns: -1 if a comes before b, 0 if a and b are equal, 1 if a comes after b 258 */ 259 int comparePathsNatural(T)(const T pathA, const T pathB) if (isSomeString!T) in { 260 import std.path : isValidPath; 261 import std.utf : toUTF8; 262 assert(pathA.isValidPath(), ("First path ("~pathA~") is invalid").toUTF8); 263 assert(pathB.isValidPath(), ("Second path ("~pathB~") is invalid").toUTF8); 264 } out(result) { 265 assert(result <= 1, "Result too large"); 266 assert(result >= -1, "Result too small"); 267 } body { 268 import std.path : pathSplitter; 269 import std.range : zip; 270 int outVal = 0; 271 foreach (componentA, componentB; zip(pathSplitter(pathA), pathSplitter(pathB))) { 272 outVal = compareNatural(componentA, componentB); 273 if (outVal != 0) 274 break; 275 } 276 return outVal; 277 } 278 @system pure unittest { 279 import std.path : buildPath; 280 assert(comparePathsNatural("a/b/c", "a/b/d") == -1, "Final path component sorting failed"); 281 assert(comparePathsNatural("a/b/c", "a/b/d") == -1, "Final path component sorting failed"); 282 assert(comparePathsNatural("a/b/c", "a/b/c") == 0, "Identical path sorting failed"); 283 assert(comparePathsNatural("a/b/c", "a/b/a") == 1, "Final path component sorting failed (2)"); 284 assert(comparePathsNatural("a/b/c", "a/c/c") == -1, "Middle path component sorting failed"); 285 assert(comparePathsNatural("a/c/c", "a/b/c") == 1, "Middle path component sorting failed (2)"); 286 assert(comparePathsNatural("a/b/c", "b/b/c") == -1, "First path component sorting failed"); 287 assert(comparePathsNatural("b/b/c", "a/b/c") == 1, "First path component sorting failed (2)"); 288 assert(comparePathsNatural("a/b", "a1/b") == -1, "Appended chunk sorting failed"); 289 assert(comparePathsNatural("a1/b", "a/b") == 1, "Appended chunk sorting failed (2)"); 290 assert(comparePathsNatural(buildPath("a", "b", "c"), buildPath("a", "b", "d")) == -1, "failure to sort rangified path"); 291 } 292 /** 293 * Automatically generates natural-path-comparing opCmp, opEquals and toHash methods for a particular property or method. 294 * Methods must be @safe, nothrow, and const. 295 */ 296 mixin template NaturalPathComparable(alias T) { 297 mixin NaturalComparableCommon!(comparePathsNatural, T); 298 } 299 /// 300 @system pure unittest { 301 struct SomePathStruct { 302 dstring somePathText; 303 mixin NaturalPathComparable!somePathText; 304 } 305 struct SomePathStructWithFunction { 306 dstring _value; 307 dstring path() const @safe nothrow { 308 return _value; 309 } 310 mixin NaturalPathComparable!path; 311 } 312 assert(SomePathStruct("a/b/c") < SomePathStruct("a/b/d")); 313 assert(SomePathStructWithFunction("a/b/c") < SomePathStructWithFunction("a/b/d")); 314 } 315 /** 316 * Path comparison function for use with phobos's sorting algorithm 317 * 318 * Returns: true if a < b 319 */ 320 bool comparePathsNaturalSort(T)(const T a, const T b) if (isSomeString!T) { 321 return comparePathsNatural(b,a) > 0; 322 } 323 /// 324 @system pure unittest { 325 import std.algorithm : sort, equal; 326 import std.array : array; 327 assert(comparePathsNaturalSort("a/b", "a1/b") == true); 328 assert(equal(sort!comparePathsNaturalSort(["a/b/c", "a/b/e", "a/b/d"]), ["a/b/c", "a/b/d", "a/b/e"])); 329 assert(equal(sort!comparePathsNaturalSort(["a1", "a"]), ["a", "a1"])); 330 assert(equal(sort!comparePathsNaturalSort(["a1/b", "a/b"]), ["a/b", "a1/b"])); 331 }