mmap file performance

Andy Valencia dont at spam.me
Thu Apr 11 00:24:44 UTC 2024


I wrote a "count newlines" based on mapped files.  It used about 
twice the CPU of the version which just read 1 meg at a time.  I 
thought something was amiss (needless slice indirection or 
something), so I wrote the code in C.  It had the same CPU usage 
as the D version.  So...mapped files, not so much.  Not D's 
fault.  And writing it in C made me realize how much easier it is 
to code in D!

The D version:

import std.stdio : writeln;
import std.mmfile : MmFile;

const uint CHUNKSZ = 65536;

size_t
countnl(ref shared char[] data)
{
     size_t res = 0;

     foreach (c; data) {
         if (c == '\n') {
             res += 1;
         }
     }
     return res;
}

void
usage(in string progname)
{
     import core.stdc.stdlib : exit;
     import std.stdio : stderr;

     stderr.writeln("Usage is: ", progname, " %s <file> ...");
     exit(1);
}

public:
void
main(string[] argv)
{
     if (argv.length < 2) {
         usage(argv[0]);
     }
     foreach(mn; argv[1 .. $]) {
         auto mf = new MmFile(mn);
         auto data = cast(shared char[])mf.opSlice();
         size_t res;
         res = countnl(data);
         writeln(mn, ": ", res);
     }
}

And the C one (no performance gain over D):

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>

static unsigned long
countnl(int fd, char *nm)
{
     char *buf, *p;
     struct stat st;
     unsigned int cnt;
     unsigned long res;

     if (fstat(fd, &st) < 0) {
         perror(nm);
         return(0);
     }

     cnt = st.st_size;
     buf = mmap(0, cnt, PROT_READ, MAP_SHARED, fd, 0);
     if (buf == MAP_FAILED) {
         perror(nm);
         return(0);
     }
     res = 0L;
     for (p = buf; cnt; cnt -= 1) {
         if (*p++ == '\n') {
             res += 1L;
         }
     }
     munmap(buf, st.st_size);
     return(res);
}

int
main(int argc, char **argv)
{
     int x;

     for (x = 1; x < argc; ++x) {
         unsigned long res;
         char *nm = argv[x];

         int fd = open(nm, O_RDONLY);
         if (fd < 0) {
             perror(nm);
             continue;
         }
         res = countnl(fd, nm);
         close(fd);
         printf("%s: %uld\n", nm, res);
     }
}



More information about the Digitalmars-d-learn mailing list