std.string phobos

pc peng2cheng2 at yahoo.com
Sun Sep 13 10:53:15 PDT 2009


pc Wrote:

> dsimcha Wrote:
> 
> > == Quote from pc (peng2cheng2 at yahoo.com)'s article
> > > Is there a way to make the functions in std.string, such as replace, pure? Many
> > pure functions are going to  want to use these. Also, could some of them be
> > executable at compile time?
> > > For me, using D2.032, this did not compile.
> > > pure string replaceXX(string str){
> > >   return replace(str,"XX","X");
> > > }
> > > If I am missing something, help!
> > 
> > For a function in D to pass the compiler checks for purity, it must only call
> > functions that are *marked as* being pure.  If a function is not marked as pure
> > but is de facto pure, it won't work.  For example:
> > 
> > uint nPlus2(uint n) pure {
> >     return nPlus1( nPlus1( n));  // Not pure.
> > }
> > 
> > uint nPlus1(uint n) {
> >     return n + 1;
> > }
> > 
> > Many functions that are, in fact, pure, have not been annotated as such yet in
> > Phobos, since pure was implemented fairly recently.  If you want to help out, this
> > is fairly low hanging fruit.
> > 
> > Also, purity is very restrictive right now and is designed partly with thread
> > safety in mind.  A function that truly has no side effects from an observable
> > behavior in a single thread point of view won't necessarily pass the compiler as pure:
> > 
> > __gshared uint foo;
> > 
> > /* wasteTime() is impure even though it has no observable side
> >  * effects in a single thread because it still (at least
> >  * temporarily) manipulates global state, and thus could
> >  * cause problems in multithreaded code.  Furthermore, even if
> >  * it were thread safe, it would be hard to prove for all but
> >  * the simplest cases that functions like these have no
> >  * observable side effects.*/
> > void wasteTime() pure {  // Won't compile.
> >    foo++;
> >    foo--;
> > }
> 
> Thank you for the helpful comments.
> 
> Re helping out, I would like to help, but at this stage I feel that I need to learn much much more before I can be of any use. (I am a recently retired international income tax consultant). If I get up to speed, I will certainly help.
> 
> I was thinking that it would be good if std.string was completely templated to work for char, wchar and dchar (My main hobby is learning Chinese, so I have an interest in unicode.) I also thought the functions should be pure. The first step in this direction, and to learn D2, was to write
> 
>       immutable(T)[][] csvSplit(T)(immutable(T)[], T sep=',', T quote='"');
> 
> This worked out pretty well for string, wstring and dstring. (copy attached). I take no credit for anything clever in the code (its all based on a lisp program written by Alain Picard that is availble on the web -- it was by far the easiest to understand).
> 
> Here's the catch -- I could not make csvSplit pure. The inner functions were referencing cvsSplits local variables.  I think that the problem only occurs in templates. The following isolates the issue:
> 
> 
> 
> import std.stdio;
> 
> /*
>   ATTEMPT TO USE NESTED "PURE" FUNCTIONS IN A TEMPLATE.
> 
>   All works fine unless you uncomment the third line in main. If you
>   do, dmd 2.032 yeilds:
> 
>   pure.d(35): Error: pure nested function 'bar' cannot access mutable
>   data 'fooState'
> 
>   pure.d(36): Error: pure nested function 'bar' cannot access mutable
>   data 'y'
> 
>   pure.d(47): Error: template instance pure.fooPT!(char) error
>   instantiating
> */
> 
> 
> //"pure" inner function, with concrete types - ok
> pure string foo(string x, string y){
>   
>   string fooState;
> 
>   string bar(string x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> }
> 
> //potentially pure (?) templated version not labled as pure - ok
> immutable(T)[] fooT(T)(immutable(T)[] x, immutable(T)[] y){
> 
>   immutable(T)[] fooState;
> 
>   immutable(T)[] bar(immutable(T)[] x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> 
> }
> 
> //attempt to make templated version pure - no dice
> pure immutable(T)[] fooPT(T)(immutable(T)[] x, immutable(T)[] y){
> 
>   immutable(T)[] fooState;
> 
>   immutable(T)[] bar(immutable(T)[] x){
>     fooState = "hello ";
>     return x ~ y;
>   }
> 
>   return fooState ~ bar(x);
> 
> }
> 
> 
> void main(){
>   writeln(foo("p", "c"));
>   writeln(fooT("p", "c"));
>   //writeln(fooPT("p", "c"));
> 
> 
// Alain Picard -- the  states
// figure how to acknowledge
// put in my string util module
// 

module dcsv;
import std.string;
version(unittest){import std.conv;}

private enum {OUTSIDE_FIELD, IN_FIELD, IN_QUOTED_FIELD, AFTER_ENDING_QUOTE}


/* csvSplit
 *
 * Splits a line into its csv formatted fields
 * Strips leading and trailing whitespace from fields, unless quoted
 * The line can be string, wstring or dstring
 *
 */ 

public immutable(T)[][] csvSplit(T)(immutable(T)[] line, 
				    T fieldSepChar=',',
				    T quoteChar = '"')
{
  alias immutable(T)[] tstring;
  immutable int EOL = -1;
  tstring nullField = "";
  int state;
  int fieldBeg;
  int charPos;
  tstring[] fields;
  bool fieldHasDoubleQuoteChars;
  int numTrailingWhitespaceChars;

  tstring reduceDoubles(T)(tstring str, T q){
    tstring ret;
    bool afterQuote = false;
    foreach(T c;str){
      if (c==q) {
	if (afterQuote){
	  afterQuote = false;
	  continue;
	}
	else
	  afterQuote = true;
      }
      ret ~= c;
    }
    return ret;
  }
    
  bool isWhitespace(T c){    
    return (c != quoteChar) && (c==' ' || c=='\t');
  }
   
  void putField (int end){
    end = end - numTrailingWhitespaceChars;
    tstring str = line[fieldBeg..end];
    if (fieldHasDoubleQuoteChars)
      str = reduceDoubles(str, quoteChar);
    fields ~= str;
    state = OUTSIDE_FIELD;
  }

  bool parseOutsideField(){
    if (charPos==EOL) {
      if (fields.length > 0) //skip all blank lines
	fields ~= nullField; //emit last "" field
      return false;
    }
    fieldHasDoubleQuoteChars = false;
    T c = line[charPos];
    if (isWhitespace(c)){} //just skip it
    else if (c == fieldSepChar)
      fields ~= nullField; // emit "" field
    else if (c == quoteChar){
      state = IN_QUOTED_FIELD;
      fieldBeg = charPos + 1;
      numTrailingWhitespaceChars = 0;
    }
    else {
      state = IN_FIELD;
      fieldBeg = charPos;
      numTrailingWhitespaceChars = 0;
    }
    return true;
  }

  bool parseInField(){
    //in the midst of an unquoted field
    if (charPos==EOL) {
      putField(line.length);
      return false;
    }
    T c = line[charPos];
    if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else if (c == fieldSepChar)
      putField(charPos);
    else if (c == quoteChar)
      throw new Exception("Unexpected quote in unquoted field");
    return true;
  }

  bool parseInQuotedField(){
    //in the midst of a quoted field
    if (charPos==EOL)
      throw new Exception("Unbalanced initial \".");
    T c = line[charPos];
    if (c == quoteChar)
      state = AFTER_ENDING_QUOTE;
    return true;
  }

  bool parseAfterEndingQuote(){
    if (charPos==EOL) {
      putField(line.length-1);
      return false;
    }
    T c = line[charPos];
    if (c == quoteChar){
      fieldHasDoubleQuoteChars = true; //false alarm
      state = IN_QUOTED_FIELD; //continue parsing quoted field
    }    
    else if (c == fieldSepChar)
      putField(charPos-1);
    else if (isWhitespace(c))
      numTrailingWhitespaceChars++;
    else 
      throw new Exception("Unexpected char after end of quoted field.");
    return true;
  }

  bool parse(){
    bool ret;
    switch (state){
    case OUTSIDE_FIELD:
      ret = parseOutsideField(); break;
    case IN_FIELD:
      ret = parseInField(); break;
    case IN_QUOTED_FIELD:
      ret = parseInQuotedField(); break;
    case AFTER_ENDING_QUOTE:
      ret = parseAfterEndingQuote(); break;
    default:
      ret = false;
    }
    return ret;
  }

  //nullField = to!(tstring)("");
  state = OUTSIDE_FIELD;
  while (charPos<line.length && parse()){
    charPos++;
  }
  charPos = EOL;
  parse();
  return cast(immutable(T)[][])fields;
}

version(unittest){

  immutable(T)[] joinfields(T)(immutable(T)[] line){
    immutable(T)[] ret = "|";
    foreach(s;csvSplit(line))
      ret = ret ~ s ~"|";
    return ret;
  }

  void csvAssert(string line, string joined){
    assert(joinfields(line)==joined, line);
    auto wline = to!(wstring)(line);
    auto wjoined = to!(wstring)(joined);
    assert(joinfields(wline)==wjoined, line);
    auto dline = to!(dstring)(line);
    auto djoined = to!(dstring)(joined);
    assert(joinfields(dline)==djoined, line);
  }
}


unittest {
  csvAssert(`,`,`|||`);
  csvAssert(`,a`,`||a|`);
  csvAssert(`a,`,`|a||`);
  csvAssert(`a`,`|a|`);
  csvAssert(`a, `,`|a||`);
  csvAssert(` a,`,`|a||`);
  csvAssert(`a,,`,`|a|||`);
  csvAssert(`a,"b b"`,`|a|b b|`);
  csvAssert(`a,"b b" `,`|a|b b|`);
  csvAssert(`a, "b b"`,`|a|b b|`);
  csvAssert(`a,"b""c""b" `,`|a|b"c"b|`);
  csvAssert(`a,"b""c""" `,`|a|b"c"|`);
  csvAssert(`a,"""c""b" `,`|a|"c"b|`);
}

I added a dos version and a littel benchmark program using csvSplit. Also, note that this is a draft.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dcsv_dos.d
Type: text/x-dsrc
Size: 5559 bytes
Desc: not available
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090913/d57fda1c/attachment.d>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dcsv_ut.d
Type: text/x-dsrc
Size: 531 bytes
Desc: not available
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090913/d57fda1c/attachment-0001.d>


More information about the Digitalmars-d mailing list