std.string phobos
pc
peng2cheng2 at yahoo.com
Sun Sep 13 10:53:15 PDT 2009
pc Wrote:
> dsimcha Wrote:
>
> > == Quote from pc (peng2cheng2 at yahoo.com)'s article
> > > Is there a way to make the functions in std.string, such as replace, pure? Many
> > pure functions are going to want to use these. Also, could some of them be
> > executable at compile time?
> > > For me, using D2.032, this did not compile.
> > > pure string replaceXX(string str){
> > > return replace(str,"XX","X");
> > > }
> > > If I am missing something, help!
> >
> > For a function in D to pass the compiler checks for purity, it must only call
> > functions that are *marked as* being pure. If a function is not marked as pure
> > but is de facto pure, it won't work. For example:
> >
> > uint nPlus2(uint n) pure {
> > return nPlus1( nPlus1( n)); // Not pure.
> > }
> >
> > uint nPlus1(uint n) {
> > return n + 1;
> > }
> >
> > Many functions that are, in fact, pure, have not been annotated as such yet in
> > Phobos, since pure was implemented fairly recently. If you want to help out, this
> > is fairly low hanging fruit.
> >
> > Also, purity is very restrictive right now and is designed partly with thread
> > safety in mind. A function that truly has no side effects from an observable
> > behavior in a single thread point of view won't necessarily pass the compiler as pure:
> >
> > __gshared uint foo;
> >
> > /* wasteTime() is impure even though it has no observable side
> > * effects in a single thread because it still (at least
> > * temporarily) manipulates global state, and thus could
> > * cause problems in multithreaded code. Furthermore, even if
> > * it were thread safe, it would be hard to prove for all but
> > * the simplest cases that functions like these have no
> > * observable side effects.*/
> > void wasteTime() pure { // Won't compile.
> > foo++;
> > foo--;
> > }
>
> Thank you for the helpful comments.
>
> Re helping out, I would like to help, but at this stage I feel that I need to learn much much more before I can be of any use. (I am a recently retired international income tax consultant). If I get up to speed, I will certainly help.
>
> I was thinking that it would be good if std.string was completely templated to work for char, wchar and dchar (My main hobby is learning Chinese, so I have an interest in unicode.) I also thought the functions should be pure. The first step in this direction, and to learn D2, was to write
>
> immutable(T)[][] csvSplit(T)(immutable(T)[], T sep=',', T quote='"');
>
> This worked out pretty well for string, wstring and dstring. (copy attached). I take no credit for anything clever in the code (its all based on a lisp program written by Alain Picard that is availble on the web -- it was by far the easiest to understand).
>
> Here's the catch -- I could not make csvSplit pure. The inner functions were referencing cvsSplits local variables. I think that the problem only occurs in templates. The following isolates the issue:
>
>
>
> import std.stdio;
>
> /*
> ATTEMPT TO USE NESTED "PURE" FUNCTIONS IN A TEMPLATE.
>
> All works fine unless you uncomment the third line in main. If you
> do, dmd 2.032 yeilds:
>
> pure.d(35): Error: pure nested function 'bar' cannot access mutable
> data 'fooState'
>
> pure.d(36): Error: pure nested function 'bar' cannot access mutable
> data 'y'
>
> pure.d(47): Error: template instance pure.fooPT!(char) error
> instantiating
> */
>
>
> //"pure" inner function, with concrete types - ok
> pure string foo(string x, string y){
>
> string fooState;
>
> string bar(string x){
> fooState = "hello ";
> return x ~ y;
> }
>
> return fooState ~ bar(x);
> }
>
> //potentially pure (?) templated version not labled as pure - ok
> immutable(T)[] fooT(T)(immutable(T)[] x, immutable(T)[] y){
>
> immutable(T)[] fooState;
>
> immutable(T)[] bar(immutable(T)[] x){
> fooState = "hello ";
> return x ~ y;
> }
>
> return fooState ~ bar(x);
>
> }
>
> //attempt to make templated version pure - no dice
> pure immutable(T)[] fooPT(T)(immutable(T)[] x, immutable(T)[] y){
>
> immutable(T)[] fooState;
>
> immutable(T)[] bar(immutable(T)[] x){
> fooState = "hello ";
> return x ~ y;
> }
>
> return fooState ~ bar(x);
>
> }
>
>
> void main(){
> writeln(foo("p", "c"));
> writeln(fooT("p", "c"));
> //writeln(fooPT("p", "c"));
>
>
// Alain Picard -- the states
// figure how to acknowledge
// put in my string util module
//
module dcsv;
import std.string;
version(unittest){import std.conv;}
private enum {OUTSIDE_FIELD, IN_FIELD, IN_QUOTED_FIELD, AFTER_ENDING_QUOTE}
/* csvSplit
*
* Splits a line into its csv formatted fields
* Strips leading and trailing whitespace from fields, unless quoted
* The line can be string, wstring or dstring
*
*/
public immutable(T)[][] csvSplit(T)(immutable(T)[] line,
T fieldSepChar=',',
T quoteChar = '"')
{
alias immutable(T)[] tstring;
immutable int EOL = -1;
tstring nullField = "";
int state;
int fieldBeg;
int charPos;
tstring[] fields;
bool fieldHasDoubleQuoteChars;
int numTrailingWhitespaceChars;
tstring reduceDoubles(T)(tstring str, T q){
tstring ret;
bool afterQuote = false;
foreach(T c;str){
if (c==q) {
if (afterQuote){
afterQuote = false;
continue;
}
else
afterQuote = true;
}
ret ~= c;
}
return ret;
}
bool isWhitespace(T c){
return (c != quoteChar) && (c==' ' || c=='\t');
}
void putField (int end){
end = end - numTrailingWhitespaceChars;
tstring str = line[fieldBeg..end];
if (fieldHasDoubleQuoteChars)
str = reduceDoubles(str, quoteChar);
fields ~= str;
state = OUTSIDE_FIELD;
}
bool parseOutsideField(){
if (charPos==EOL) {
if (fields.length > 0) //skip all blank lines
fields ~= nullField; //emit last "" field
return false;
}
fieldHasDoubleQuoteChars = false;
T c = line[charPos];
if (isWhitespace(c)){} //just skip it
else if (c == fieldSepChar)
fields ~= nullField; // emit "" field
else if (c == quoteChar){
state = IN_QUOTED_FIELD;
fieldBeg = charPos + 1;
numTrailingWhitespaceChars = 0;
}
else {
state = IN_FIELD;
fieldBeg = charPos;
numTrailingWhitespaceChars = 0;
}
return true;
}
bool parseInField(){
//in the midst of an unquoted field
if (charPos==EOL) {
putField(line.length);
return false;
}
T c = line[charPos];
if (isWhitespace(c))
numTrailingWhitespaceChars++;
else if (c == fieldSepChar)
putField(charPos);
else if (c == quoteChar)
throw new Exception("Unexpected quote in unquoted field");
return true;
}
bool parseInQuotedField(){
//in the midst of a quoted field
if (charPos==EOL)
throw new Exception("Unbalanced initial \".");
T c = line[charPos];
if (c == quoteChar)
state = AFTER_ENDING_QUOTE;
return true;
}
bool parseAfterEndingQuote(){
if (charPos==EOL) {
putField(line.length-1);
return false;
}
T c = line[charPos];
if (c == quoteChar){
fieldHasDoubleQuoteChars = true; //false alarm
state = IN_QUOTED_FIELD; //continue parsing quoted field
}
else if (c == fieldSepChar)
putField(charPos-1);
else if (isWhitespace(c))
numTrailingWhitespaceChars++;
else
throw new Exception("Unexpected char after end of quoted field.");
return true;
}
bool parse(){
bool ret;
switch (state){
case OUTSIDE_FIELD:
ret = parseOutsideField(); break;
case IN_FIELD:
ret = parseInField(); break;
case IN_QUOTED_FIELD:
ret = parseInQuotedField(); break;
case AFTER_ENDING_QUOTE:
ret = parseAfterEndingQuote(); break;
default:
ret = false;
}
return ret;
}
//nullField = to!(tstring)("");
state = OUTSIDE_FIELD;
while (charPos<line.length && parse()){
charPos++;
}
charPos = EOL;
parse();
return cast(immutable(T)[][])fields;
}
version(unittest){
immutable(T)[] joinfields(T)(immutable(T)[] line){
immutable(T)[] ret = "|";
foreach(s;csvSplit(line))
ret = ret ~ s ~"|";
return ret;
}
void csvAssert(string line, string joined){
assert(joinfields(line)==joined, line);
auto wline = to!(wstring)(line);
auto wjoined = to!(wstring)(joined);
assert(joinfields(wline)==wjoined, line);
auto dline = to!(dstring)(line);
auto djoined = to!(dstring)(joined);
assert(joinfields(dline)==djoined, line);
}
}
unittest {
csvAssert(`,`,`|||`);
csvAssert(`,a`,`||a|`);
csvAssert(`a,`,`|a||`);
csvAssert(`a`,`|a|`);
csvAssert(`a, `,`|a||`);
csvAssert(` a,`,`|a||`);
csvAssert(`a,,`,`|a|||`);
csvAssert(`a,"b b"`,`|a|b b|`);
csvAssert(`a,"b b" `,`|a|b b|`);
csvAssert(`a, "b b"`,`|a|b b|`);
csvAssert(`a,"b""c""b" `,`|a|b"c"b|`);
csvAssert(`a,"b""c""" `,`|a|b"c"|`);
csvAssert(`a,"""c""b" `,`|a|"c"b|`);
}
I added a dos version and a littel benchmark program using csvSplit. Also, note that this is a draft.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dcsv_dos.d
Type: text/x-dsrc
Size: 5559 bytes
Desc: not available
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090913/d57fda1c/attachment.d>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dcsv_ut.d
Type: text/x-dsrc
Size: 531 bytes
Desc: not available
URL: <http://lists.puremagic.com/pipermail/digitalmars-d/attachments/20090913/d57fda1c/attachment-0001.d>
More information about the Digitalmars-d
mailing list