1 /** 2 * Provides a utility class that preserves the minimal memory requirements of a string and combines 3 * it with the ability to access characters by grapheme, e.g. a visible character. 4 */ 5 module gstring; 6 7 import std.traits : isSomeChar, isSomeString; 8 import std.uni : Grapheme, byGrapheme; 9 import std.range : isInputRange, ElementType; 10 11 public import std.string : CaseSensitive; 12 13 @safe: 14 15 /// An alias for GString using `char` (UTF-8). 16 alias CGString = GString!char; 17 18 /// An alias for GString using `wchar` (UTF-16). 19 alias WGString = GString!wchar; 20 21 /// An alias for GString using `dchar` (UTF-32). 22 alias DGString = GString!dchar; 23 24 /** 25 * A `GString`, or grapheme-string, is a unicode-aware string supporting simple array-index 26 * operations that operate on graphemes (displayable characters). 27 * 28 * For example, consider the string "R̆um". At what array index is the character 'u'? Because this is 29 * a UTF-8 encoded string, it is actually at index 3, not 1. "R̆" is composed of 2 unicode 30 * code-points, "R" and " ̆ " (combining breve). The "R" code-point is encoded in a single byte, but 31 * the combining breve requires two bytes. These two code-points are combined into a single visible 32 * character (grapheme), "R̆". 33 * 34 * This type permits simple array-index operations based on graphemes, that is, the visible position 35 * of characters. It uses a string as its undrelying storage without an additional memory footprint. 36 * However, array-like operations tend to have `O(n)` complexity, because the type must compute the 37 * offset of each grapheme. Thus, `GString` is best used with smaller strings, especially those that 38 * need to perform operations by character position, such as in user interfaces. 39 * 40 * If memory is not an issue, and array-like operations by Grapheme are needed for very large 41 * strings, it is better to use `dchar[]` or `Grapheme[]`. 42 * 43 * Params: 44 * CharT = Controls the type of encoding to use. `char` => UTF-8, `wchar` => UTF-16, and 45 * `dchar` => UTF-32. 46 */ 47 struct GString(CharT) 48 if (isSomeChar!CharT) { 49 alias StringT = immutable(CharT)[]; 50 51 /// The underlying stored string using UTF encoding according to CharT. 52 StringT rawString; 53 54 this(StringT str) { 55 this.rawString = str; 56 } 57 58 this(R)(R str) 59 if (isSomeString!R) { 60 this(toStringT(str)); 61 } 62 63 /** 64 * A GString can be used with libraries that operate on normal strings, however, they are treated 65 * like normal strings, and these libraries should be used with caution. 66 */ 67 alias rawString this; 68 69 /// 70 unittest { 71 import std.string : capitalize; 72 auto t1 = typeof(this)("test"); 73 assert(capitalize(t1) == "Test"); 74 } 75 76 private static StringT toStringT(R)(R str) 77 if (isSomeString!R) { 78 static if (is(CharT == char)) { 79 import std.utf : toUTF8; 80 return str.toUTF8(); 81 } else static if (is(CharT == wchar)) { 82 import std.utf : toUTF16; 83 return str.toUTF16(); 84 } else static if (is(CharT == dchar)) { 85 import std.utf : toUTF32; 86 return str.toUTF32(); 87 } 88 } 89 90 private static StringT toStringT(C)(C ch) 91 if (isSomeChar!C) { 92 static if (is(CharT == char)) { 93 import std.conv : text; 94 return toStringT(text(ch)); 95 } else static if (is(CharT == wchar)) { 96 import std.conv : wtext; 97 return toStringT(wtext(ch)); 98 } else static if (is(CharT == dchar)) { 99 import std.conv : dtext; 100 return toStringT(dtext(ch)); 101 } 102 } 103 104 string toString() const { 105 import std.utf : toUTF8; 106 return toUTF8(rawString); 107 } 108 109 wstring toWString() const { 110 import std.utf : toUTF16; 111 return toUTF16(rawString); 112 } 113 114 dstring toDString() const { 115 import std.utf : toUTF32; 116 return toUTF32(rawString); 117 } 118 119 /** 120 * Returns the i'th displayable character (Grapheme) within the `GString`. 121 * 122 * Complexity: `O(i)`. Each Grapheme is composed of a variable number of code-points, and 123 * code-poinst are composed of a variable number of characters (code-units). 124 */ 125 Grapheme opIndex(size_t i) { 126 import std.range : drop; 127 return rawString.byGrapheme.drop(i).front; 128 } 129 130 /// 131 unittest { 132 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ."); 133 assert(t1[3].length == 1); 134 assert(t1[3][0] == 't'); 135 assert(t1[5].length == 2); 136 assert(t1[5][0] == 'R' && t1[5][1] == '\u0306'); 137 assert(t1[6].length == 2); 138 assert(t1[6][0] == 'a' && t1[6][1] == '\u0307'); 139 } 140 141 /** 142 * Returns a range of Graphemes representing the original string. 143 * 144 * To retrieve a string, use `toString()`, `toWString()`, or `toDString()`. 145 * 146 * Complexity: `O(length)` 147 */ 148 Grapheme[] opIndex() { 149 import std.array : array; 150 return rawString.byGrapheme.array; 151 } 152 153 /** 154 * Creates a data structure to represent the 0'th index during array slicing, 155 * e.g. `gstring[i..j]`. 156 */ 157 size_t[2] opSlice(size_t dim = 0)(size_t i, size_t j) 158 { 159 return [i, j]; 160 } 161 162 Grapheme[] opIndex()(size_t[2] slice) { 163 import std.range : drop, take, array; 164 return rawString.byGrapheme.drop(slice[0]).take(slice[1]-slice[0]).array; 165 } 166 167 unittest { 168 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ"); 169 assert(t1[2..7] == typeof(this)("st R̆ȧ")[]); 170 } 171 172 /// Replaces a single grapheme in the GString. 173 void opIndexAssign(R)(R str, size_t i) 174 if (isSomeString!R) { 175 opIndexAssign(Grapheme(str), i); 176 } 177 178 /// ditto 179 void opIndexAssign(C)(C ch, size_t i) 180 if (isSomeChar!C) { 181 opIndexAssign(Grapheme(ch), i); 182 } 183 184 /// ditto 185 void opIndexAssign(Grapheme g, size_t i) { 186 import std.uni : graphemeStride; 187 import std.array : array; 188 189 ptrdiff_t charIdx = 0; 190 for (int idx = 0; idx < i; idx++, charIdx += rawString.graphemeStride(charIdx)) {} 191 192 ptrdiff_t nextCharIdx = charIdx + rawString.graphemeStride(charIdx); 193 rawString = rawString[0..charIdx] ~ toStringT(g[].array) ~ rawString[nextCharIdx..$]; 194 } 195 196 /// 197 unittest { 198 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ"); 199 t1[6] = Grapheme("a"); 200 assert(t1.rawString == "Test R̆am͆b̪õ"); // Reduces string size. 201 t1[6] = Grapheme("ȧ"); 202 assert(t1.rawString == "Test R̆ȧm͆b̪õ"); // Increases string size. 203 t1[7] = "m"; 204 assert(t1.rawString == "Test R̆ȧmb̪õ"); 205 t1[8] = 'b'; 206 assert(t1.rawString == "Test R̆ȧmbõ"); 207 } 208 209 /// Enables the use of `foreach (Grapheme; ...)` statements. 210 int opApply(scope int delegate (Grapheme) @safe dg) { 211 foreach (grapheme; rawString.byGrapheme) { 212 int result = dg(grapheme); 213 if (result) 214 return result; 215 } 216 return 0; 217 } 218 219 /// 220 unittest { 221 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ"); 222 StringT nakedString; 223 StringT nakedExpected = "Test Rambo"; 224 foreach (g; t1) { 225 nakedString ~= g[0]; 226 } 227 assert(nakedString == nakedExpected); 228 } 229 230 /// Enables the use of `foreach (size_t, Grapheme; ...)` statements. 231 int opApply(scope int delegate (size_t index, Grapheme) @safe dg) { 232 size_t index = 0; 233 foreach (grapheme; rawString.byGrapheme) { 234 int result = dg(index, grapheme); 235 if (result) 236 return result; 237 index++; 238 } 239 return 0; 240 } 241 242 /// 243 unittest { 244 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ"); 245 StringT nakedString; 246 StringT nakedExpected = "Test Rambo"; 247 size_t isum = 0; 248 foreach (i, g; t1) { 249 isum += i; 250 nakedString ~= g[0]; 251 } 252 assert(isum == 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9); 253 assert(nakedString == nakedExpected); 254 } 255 256 /// A binary operator to allow appending with a Grapheme. 257 typeof(this) opBinary(string op : "~")(Grapheme g) { 258 import std.array : array; 259 return typeof(this)(rawString ~ toStringT(g[].array)); 260 } 261 262 /// ditto 263 typeof(this) opBinaryRight(string op : "~")(Grapheme g) { 264 import std.array : array; 265 return typeof(this)(toStringT(g[].array) ~ rawString); 266 } 267 268 /// 269 unittest { 270 auto t1 = typeof(this)("Test "); 271 auto t2 = t1 ~ Grapheme("A"); 272 assert(t2.rawString == "Test A"); 273 auto t3 = Grapheme("B") ~ t1; 274 assert(t3.rawString == "BTest "); 275 } 276 277 /// A binary operator to allow appending with arbitrary strings. 278 typeof(this) opBinary(string op : "~", R)(R str) 279 if (isSomeString!R) { 280 return typeof(this)(rawString ~ toStringT(str)); 281 } 282 283 /// ditto 284 typeof(this) opBinaryRight(string op : "~", R)(R str) 285 if (isSomeString!R) { 286 return typeof(this)(toStringT(str) ~ rawString); 287 } 288 289 /// 290 unittest { 291 auto t1 = typeof(this)("Test "); 292 auto t2 = t1 ~ "A"; 293 assert(t2.rawString == "Test A"); 294 auto t3 = t1 ~ "B"w; 295 assert(t3.rawString == "Test B"); 296 auto t4 = t1 ~ "C"d; 297 assert(t4.rawString == "Test C"); 298 auto t5 = "D"w ~ t1; 299 assert(t5.rawString == "DTest "); 300 } 301 302 /// A binary operator supporting appending a single character. 303 typeof(this) opBinary(string op : "~", C)(C ch) 304 if (isSomeChar!C) { 305 import std.conv : to; 306 return typeof(this)(rawString ~ to!CharT(ch)); 307 } 308 309 /// ditto 310 typeof(this) opBinaryRight(string op : "~", C)(C ch) 311 if (isSomeChar!C) { 312 import std.conv : to; 313 return typeof(this)(to!CharT(ch) ~ rawString); 314 } 315 316 /// 317 unittest { 318 auto t1 = typeof(this)("Test "); 319 auto t2 = t1 ~ 'A'; 320 assert(t2.rawString == "Test A"); 321 auto t3 = t1 ~ cast(wchar) 'B'; 322 assert(t3.rawString == "Test B"); 323 auto t4 = t1 ~ cast(dchar) 'C'; 324 assert(t4.rawString == "Test C"); 325 auto t5 = cast(dchar) 'D' ~ t1; 326 assert(t5.rawString == "DTest "); 327 } 328 329 /// Allows appending to an existing GString using the `~=` operator. 330 void opOpAssign(string op : "~", R)(R str) 331 if (isSomeString!R) { 332 this.rawString ~= toStringT(str); 333 } 334 335 /// ditto 336 void opOpAssign(string op : "~", C)(C ch) 337 if (isSomeChar!C) { 338 this.rawString ~= toStringT(ch); 339 } 340 341 /// ditto 342 void opOpAssign(string op : "~")(Grapheme g) { 343 import std.array : array; 344 this.rawString ~= toStringT(g[].array); 345 } 346 347 /// 348 unittest { 349 auto t1 = typeof(this)("Test"); 350 t1 ~= "R᪶"; 351 assert(t1.rawString == "TestR᪶"); 352 t1 ~= 'ö'; 353 assert(t1.rawString == "TestR᪶ö"); 354 t1 ~= Grapheme("b"); 355 assert(t1.rawString == "TestR᪶öb"); 356 } 357 358 /// Finds the grapheme-index of the given search string. 359 ptrdiff_t indexOf(R)(R str) 360 if (isSomeString!R) { 361 import std.algorithm : startsWith; 362 import std.uni : graphemeStride; 363 364 StringT target = toStringT(str); 365 ptrdiff_t index = 0; 366 for (ptrdiff_t i = 0; index < rawString.length; index += rawString.graphemeStride(index), i++) { 367 if (rawString[index..$].startsWith(target)) { 368 return i; 369 } 370 } 371 return -1; 372 } 373 374 /// 375 unittest { 376 auto t1 = typeof(this)("Test R̆ȧm͆b̪õ R̆ȧm͆b̪õ"); 377 assert(t1.indexOf("ȧ") == 6); 378 assert(t1.indexOf("m̪") == -1); 379 } 380 381 } 382 383 /// 384 unittest { 385 auto t1 = CGString("Test R̆ȧm͆b̪õ"); 386 assert(t1.indexOf("ȧ") == 6); 387 assert(t1.indexOf("m̪") == -1); 388 t1[9] = "í"; 389 assert(t1.toString() == "Test R̆ȧm͆b̪í"); 390 }