Can I speed up this log parsing script further?

Fri Jun 9 07:19:48 PDT 2017

import std.stdio;
import std.array: appender, array;
import std.algorithm : findSplit, splitter, joiner, canFind, map;
import std.typecons : tuple, Tuple;
import std.conv : to;
import std.range : dropOne, dropExactly, takeExactly, chain;

alias push_type = Tuple!(int, char[], int, bool, bool);
alias npush_type = Tuple!(char[], int, char[]);

void read_log(string filename) {
    File file = File(filename, "r");
    auto npushed = appender!(npush_type[])();
    auto pushed = appender!(push_type[])();
    foreach (line; file.byLine) {
        if (auto findResult = line.findSplit(" SYNC_PUSH: ")) {
            auto rel = findResult[2];
            auto att = rel.splitter(" ");

            auto firstVal = att.front.to!int;
            auto secondVal = att.dropExactly(2).takeExactly(2).joiner("
").to!(char[]).dup;
            auto thirdVal = att.dropExactly(5).front.to!int;
            auto fourthVal = findResult[2].canFind("PA-SOC_POP");
            auto fifthVal = findResult[2].canFind("CU-SOC_POP");
            pushed.put(tuple(firstVal, secondVal, thirdVal, fourthVal,
fifthVal));
            continue;
        }
        if (auto findResult = line.findSplit(" SOC_NOT_PUSHED: ")) {
            auto leftPart = findResult[0].splitter(" ").dropExactly(2)
                                                       .takeExactly(2);
            auto rightPart = findResult[2].splitter(" ").takeExactly(2);
            auto firstVal = chain(leftPart.front,
leftPart.dropOne.front).to!(char[]);
            auto thirdVal = rightPart.front.to!(char[]).dup;
            auto secondVal = rightPart.dropOne.front.to!int;
            npushed.put(tuple(firstVal, secondVal, thirdVal));
            continue;
        }
    }
    // Doing more stuff with these arrays later. For now, just printing
lengths
    writeln(npushed.data.length);
    writeln(pushed.data.length);
}

On Fri, Jun 9, 2017 at 12:01 PM, uncorroded via Digitalmars-d-learn <
digitalmars-d-learn at puremagic.com> wrote:

> On Friday, 9 June 2017 at 08:58:38 UTC, Daniel Kozak wrote:
>
> There is no difference in speed because you do not process your data
>>>
>> lazily, so you make many allocations, so this is main reason why it is so
>> slow. I could improve that, but I will need to see some example data, which
>> you are trying to parse.
>>
>> But some rules,
>> 1.) instead of ~= you shoud use std.array.appender
>> 2.) instead of std.string.split you could use std.algorithm.splitter or
>> std.algorithm.findSplit
>> 3.) instead of indexOf I would use std.algorithm.startsWith (in case it is
>> on the begining of the line)
>>
>
> Thanks everyone for the tips.
> The log file itself is 52 MB but I have added a sample in pastebin (
> https://pastebin.com/vj778PK4 ). Will try the suggestions today evening.
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.puremagic.com/pipermail/digitalmars-d-learn/attachments/20170609/98112a46/attachment.html>