Table lookups - this is pretty definitive

ixid via Digitalmars-d digitalmars-d at puremagic.com
Thu Apr 17 11:07:23 PDT 2014


On Thursday, 17 April 2014 at 16:27:26 UTC, Alix Pexton wrote:
> I added a lookup scheme of my own, its not as fast as Walters 
> (in fact its the slowest without -inline - release -O) but it 
> uses 1 bit per entry in the table instead of a whole byte so 
> you can have lots and lots of different tables. I'm even 
> reasonably sure that it works correctly!
>
>
> ===============================
> import core.stdc.stdlib;
> import core.stdc.string;
>
> import std.algorithm;
> import std.array;
> import std.ascii;
> import std.datetime;
> import std.range;
> import std.stdio;
> import std.traits;
>
> bool isIdentifierChar0(ubyte c)
> {
>     return isAlphaNum(c) || c == '_' || c == '$';
> }
>
> bool isIdentifierChar1(ubyte c)
> {
>     return ((c >= '0' || c == '$') &&
>             (c <= '9' || c >= 'A')  &&
>             (c <= 'Z' || c >= 'a' || c == '_') &&
>             (c <= 'z'));
> }
>
> immutable bool[256] tab2;
> static this()
> {
>     for (size_t u = 0; u < 0x100; ++u)
>     {
>         tab2[u] = isIdentifierChar0(cast(ubyte)u);
>     }
> }
>
> bool isIdentifierChar2(ubyte c)
> {
>     return tab2[c];
> }
>
> immutable ulong[4] tab3;
> static this()
> {
>     for (size_t u = 0; u < 0x100; ++u)
>     {
>         if (isIdentifierChar0(cast(ubyte)u))
>         {
>             auto sub = u >>> 6;
>             auto b = u & 0x3f;
>             auto mask = 0x01L << b;
>             tab3[sub] |= mask;
>         }
>     }
> }
>
> bool isIdentifierChar3(ubyte c)
> {
> 	auto sub = c >>> 6;
> 	c &= 0x3f;
> 	auto mask = 0x01L << c;
> 	return (tab3[sub] & mask) > 0;
> }
>
> int f0()
> {
>     int x;
>     for (uint u = 0; u < 0x100; ++u)
>     {
>         x += isIdentifierChar0(cast(ubyte)u);
>     }
>     return x;
> }
>
> int f1()
> {
>     int x;
>     for (uint u = 0; u < 0x100; ++u)
>     {
>         x += isIdentifierChar1(cast(ubyte)u);
>     }
>     return x;
> }
>
> int f2()
> {
>     int x;
>     for (uint u = 0; u < 0x100; ++u)
>     {
>         x += isIdentifierChar2(cast(ubyte)u);
>     }
>     return x;
> }
>
> int f3()
> {
>     int x;
>     for (uint u = 0; u < 0x100; ++u)
>     {
>         x += isIdentifierChar3(cast(ubyte)u);
>     }
>     return x;
> }
>
> void main()
> {
>     auto r = benchmark!(f0, f1, f2, f3)(10_000);
>     writefln("Milliseconds %s %s %s %s", r[0].msecs, 
> r[1].msecs, r[2].msecs, r[3].msecs);
> }

I feel like there must be a way of making a fast bit look up but 
my version is only moderate in speed. You can get all the bits 
you need on two 64 bit registers or one SSE register. I haven't 
tried bt, does that work with a 64 bit register?


More information about the Digitalmars-d mailing list