[Issue 18378] std.regex causes major slowdown in compilation times

d-bugmail at puremagic.com d-bugmail at puremagic.com
Sat Jan 19 14:54:49 UTC 2019


https://issues.dlang.org/show_bug.cgi?id=18378

--- Comment #2 from anonymous4 <dfj1esp02 at sneakemail.com> ---
Proof of concept for Adam's code:
//
https://github.com/adamdruppe/arsd/blob/ff68e1cf004861dcf256fce996bec851c7c0e208/cgi.d

struct Uri {
        import std.conv, std.string;
        // scheme//userinfo at host:port/path?query#fragment

        string scheme; /// e.g. "http" in "http://example.com/"
        string userinfo; /// the username (and possibly a password) in the uri
        string host; /// the domain name
        int port; /// port number, if given. Will be zero if a port was not
explicitly given
        string path; /// e.g. "/folder/file.html" in
"http://example.com/folder/file.html"
        string query; /// the stuff after the ? in a uri
        string fragment; /// the stuff after the # in a uri.

        /// Breaks down a uri string to its components
        this(string uri) {
                reparse(uri);
        }

        private void reparse(string uri) {
                //import std.regex;
                // from RFC 3986

                // the ctRegex triples the compile time and makes ugly errors
for no real benefit
                // it was a nice experiment but just not worth it.
                // enum ctr =
ctRegex!r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?";
                auto ctr =
regex(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?");

                auto m = match(uri, ctr);
                if(m) {
                        scheme = m.captures[2];
                        auto authority = m.captures[4];

                        auto idx = authority.indexOf("@");
                        if(idx != -1) {
                                userinfo = authority[0 .. idx];
                                authority = authority[idx + 1 .. $];
                        }

                        idx = authority.indexOf(":");
                        if(idx == -1) {
                                port = 0; // 0 means not specified; we should
use the default for the scheme
                                host = authority;
                        } else {
                                host = authority[0 .. idx];
                                port = to!int(authority[idx + 1 .. $]);
                        }

                        path = m.captures[5];
                        query = m.captures[7];
                        fragment = m.captures[9];
                }
                // uriInvalidated = false;
        }
}

import std=std.regex;

StringRegex regex(string pattern, const char[] flags=null)
{
        return StringRegex(std.regexImpl(pattern,flags));
}

struct StringRegex
{
        alias typeof(std.regexImpl("")) Type;
        Type re;
}

RegexMatch match(string input, StringRegex re)
{
        return RegexMatch(std.match(input,re.re));
}

struct RegexMatch
{
        alias std.RegexMatch!string Type;
        Type mre;
        this(this){}
        ~this(){}
        bool opCast() const { return !mre.empty; }
        inout(Captures) captures() inout { return inout Captures(mre.captures);
}
}

struct Captures
{
        alias std.Captures!string Type;
        Type cre;
        string opIndex(size_t i) const { return cre[i]; }
}

As a non-templated interface it can be provided with an interface file.

--


More information about the Digitalmars-d-bugs mailing list