Why is std.regex slow, well here is one reason!

Richard Andrew Cattermole (Rikki) richard at cattermole.co.nz
Thu Feb 23 17:06:30 UTC 2023


As well all know std.regex slows down our builds even if all 
you're doing is importing it.

So on Discord we were chatting and I got annoyed about it enough 
to look into it (which as we all know is a good way to make me do 
something about it).

To start off with lets do some base timings with dmd.

Here is my test module, disable the regex call as required.

```d
import std.regex;

void main() {
     auto r = regex(`[a-z]`); // remove me
}
```

To compile this its 2.2s, to compile it without the regex call 
its 1.2s.

Okay that's quite a big jump but at least we're using it. Now on 
to modifying std.regex or should I say std.uni.

That's right, we will be modifying std.uni not std.regex!

All we need to do is add ``-version=std_uni_bootstrap`` to our 
call to dmd to get this working and apply the changes at the end 
of this post.

Now the times are 1.2s and 0.9s.

Why is turning on bootstrap version in std.uni decreasing compile 
times so significantly? This is almost certainly because of the 
Unicode tables being compressed. std.regex is triggering 
decompression and bringing a whole pile of logic that wouldn't be 
required otherwise. Which costs an awful lot CPU and ram during 
CTFE. newCTFE anyone?





If you want to repeat, you'll need the below changes to std.uni 
(just add at bottom of file).

```d
public:
version(std_uni_bootstrap) {
     int icmp(S1, S2)(S1 r1, S2 r2) { return 0;}
     dchar toLower()(dchar c) { return c; }
     dchar toUpper()(dchar c) { return c; }
     void toLowerInPlace(C)(ref C[] s){}
     void toUpperInPlace(C)(ref C[] s){}
     size_t graphemeStride(C)(const scope C[] input, size_t index) 
{return 0;}
     bool isGraphical()(dchar c) { return false;}
     struct unicode {
         static @property auto opDispatch(string name)() {
             return CodepointSet.init;
         }

         static CodepointSet parseSet(Range)(ref Range range, bool 
casefold=false) {
             return CodepointSet.init;
         }

         static CodepointSet parsePropertySpec(Range)(ref Range p,
         bool negated, bool casefold) {
          return CodepointSet.init;
         }
         static dchar parseControlCode(Parser)(ref Parser p) {
         return 0;
         }
     }
     alias Escapables = AliasSeq!('[', ']', '\\', '^', '$', '.', 
'|', '?', ',', '-',
     ';', ':', '#', '&', '%', '/', '<', '>', '`',  '*', '+', '(', 
')', '{', '}',  '~');

     struct Stack(T) {
     @safe:
     T[] data;
     @property bool empty(){ return data.empty; }

     @property size_t length(){ return data.length; }

     void push(T val){ data ~= val;  }

     @trusted T pop()
     {
         assert(!empty);
         auto val = data[$ - 1];
         data = data[0 .. $ - 1];
         if (!__ctfe)
             cast(void) data.assumeSafeAppend();
         return val;
     }

     @property ref T top()
     {
         assert(!empty);
         return data[$ - 1];
     }
     }

     bool isAlpha()(dchar c) {return false;}
     CodepointSet wordCharacter()() { return CodepointSet.init;}
     dchar parseUniHex(Range)(ref Range str, size_t maxDigit) {
         return 0;
     }
     auto simpleCaseFoldings()(dchar ch) {
             static struct Range
     {
     @safe pure nothrow:
         uint idx; //if == uint.max, then read c.
         union
         {
             dchar c; // == 0 - empty range
             uint len;
         }
         @property bool isSmall() const { return idx == uint.max; }

         this(dchar ch)
         {
             idx = uint.max;
             c = ch;
         }

         this(uint start, uint size)
         {
             idx = start;
             len = size;
         }

         @property dchar front() const
         {
             return 0;
         }

         @property bool empty() const
         {
             if (isSmall)
             {
                 return c == 0;
             }
             return len == 0;
         }

         @property size_t length() const
         {
             if (isSmall)
             {
                 return c == 0 ? 0 : 1;
             }
             return len;
         }

         void popFront()
         {
             if (isSmall)
                 c = 0;
             else
             {
                 idx++;
                 len--;
             }
         }
     }
     return Range.init;
     }
}
```






More information about the Digitalmars-d mailing list