Python-like slicing and handling UTF-8 strings as a bonus

FG home at fgda.pl
Sat Dec 29 14:25:15 PST 2012


Slices are great but not really what I had expected, coming from Python.
I've seen code like s[a..$-b] used without checking the values, just to end up 
with a Range violation. But there are 3 constraints to check here:
	a >= 0 && a + b <= s.length && b >= 0

That's way too much coding for a simple program/script that shortens a string, 
before it prints it on a screen. If I can't write s[0..80] without fear, then 
let there at least be a function that does it like Python would.

Additionally, as strings are UTF-8-encoded, I'd like such a function to give me 
proper substrings, without multibyte characters cut in the middle, where 
s[0..80] would mean 80 characters on the screen and not 80 bytes.

I would envision it being part of std.string eventually.
Forgive me if such a function already exists -- I couldn't find it.
I also still don't speak D too well, so don't laugh. :)




import std.array, std.range, std.stdio;


auto getSlice(T)(T[] s, ptrdiff_t start, ptrdiff_t end = ptrdiff_t.max)
pure @safe
{
     bool start_from_back, end_from_back;
     size_t full_len = s.length;
     ptrdiff_t len;
     if (full_len > ptrdiff_t.max)
         len = ptrdiff_t.max;
     else len = cast(ptrdiff_t) full_len;
     if (end < 0)
     {
         end_from_back = true;
         end += len;
     }
     if (end > len) end = len;
     if (start < 0)
     {
         if (0 - start >= len)
             start = 0;
         else
         {
             start += len;
             start_from_back = true;
         }
     }
     if (start < 0) start = 0;
     if (start > end || start >= len || end <= 0)
         return s[0..0];

     static if(is(T == char) || is(T == immutable(char)) ||
             is(T : wchar) || is(T : immutable(wchar)))
     {
         ptrdiff_t real_start = -1, real_end = -1, loop, last_pos;
         if (!start_from_back || !end_from_back)
         {
             foreach (ptrdiff_t i, dchar c; s)
             {
                 if (!start_from_back && loop >= start && real_start < 0)
                     real_start = i;
                 if (!end_from_back && loop >= end && real_end < 0)
                     real_end = i;
                 if ((start_from_back || real_start > -1) &&
                         (end_from_back || real_end > -1 || end == len))
                     break;
                 loop++;
             }
         }
         start -= len;
         end -= len;
         loop = -1;
         if (start_from_back || end_from_back)
         {
             foreach_reverse (ptrdiff_t i, dchar c; s)
             {
                 if (start_from_back && loop <= start && real_start < 0)
                     real_start = i;
                 if (end_from_back && loop <= end && real_end < 0)
                     real_end = i;
                 if ((!start_from_back || real_start > -1) &&
                         (!end_from_back || real_end > -1))
                     break;
                 loop--;
             }
         }
         if (real_end < 0) real_end = (end_from_back ? 0 : len);
         if (real_start < 0) real_start = (start_from_back ? 0 : len);
         if (real_start > real_end) real_start = real_end = 0;
         return s[real_start..real_end];
     }
     else return s[start..end];
}

unittest {
     string s = "okrągły stół";
     dstring d = "okrągły stół"d;
     auto t = [0, 1, 2, 3, 4];
     assert(t.getSlice(0, -1) == [0, 1, 2, 3]);
     assert(t.getSlice(1, -2) == [1, 2]);
     assert(t.getSlice(-4, -2) == [1, 2]);
     assert(t.getSlice(-5, 7) == [0, 1, 2, 3, 4]);
     assert(s.getSlice(0, 0) == "");
     assert(s.getSlice(0, 1) == "o");
     assert(s.getSlice(0) == s);
     assert(s.getSlice(8) == "stół");
     assert(s.getSlice(8, -1) == "stó");
     assert(s.getSlice(8, -2) == "st");
     assert(s.getSlice(8, -4) == "");
     assert(s.getSlice(10, 11) == "ó");
     assert(s.getSlice(10, -1) == "ó");
     assert(s.getSlice(10, 12) == "ół");
     assert(s.getSlice(11, 12) == "ł");
     assert(s.getSlice(11, 15) == "ł");
     assert(d.getSlice(0, 0) == ""d);
     assert(d.getSlice(0, 1) == "o"d);
     assert(d.getSlice(0) == d);
     assert(d.getSlice(8) == "stół"d);
     assert(d.getSlice(8, -1) == "stó"d);
     assert(d.getSlice(8, -2) == "st"d);
     assert(d.getSlice(8, -4) == ""d);
     assert(d.getSlice(10, 11) == "ó"d);
     assert(d.getSlice(10, -1) == "ó"d);
     assert(d.getSlice(10, 12) == "ół"d);
     assert(d.getSlice(11, 12) == "ł"d);
     assert(d.getSlice(11, 15) == "ł"d);
     assert(d.getSlice(11, 15) == "ł"d);
}




More information about the Digitalmars-d mailing list