// Alain Picard -- the states // figure how to acknowledge // put in my string util module // module dcsv; import std.string; version(unittest){import std.conv;} private enum {OUTSIDE_FIELD, IN_FIELD, IN_QUOTED_FIELD, AFTER_ENDING_QUOTE} /* csvSplit * * Splits a line into its csv formatted fields * Strips leading and trailing whitespace from fields, unless quoted * The line can be string, wstring or dstring * */ public immutable(T)[][] csvSplit(T)(immutable(T)[] line, T fieldSepChar=',', T quoteChar = '"') { alias immutable(T)[] tstring; immutable int EOL = -1; tstring nullField = ""; int state; int fieldBeg; int charPos; tstring[] fields; bool fieldHasDoubleQuoteChars; int numTrailingWhitespaceChars; tstring reduceDoubles(T)(tstring str, T q){ tstring ret; bool afterQuote = false; foreach(T c;str){ if (c==q) { if (afterQuote){ afterQuote = false; continue; } else afterQuote = true; } ret ~= c; } return ret; } bool isWhitespace(T c){ return (c != quoteChar) && (c==' ' || c=='\t'); } void putField (int end){ end = end - numTrailingWhitespaceChars; tstring str = line[fieldBeg..end]; if (fieldHasDoubleQuoteChars) str = reduceDoubles(str, quoteChar); fields ~= str; state = OUTSIDE_FIELD; } bool parseOutsideField(){ if (charPos==EOL) { if (fields.length > 0) //skip all blank lines fields ~= nullField; //emit last "" field return false; } fieldHasDoubleQuoteChars = false; T c = line[charPos]; if (isWhitespace(c)){} //just skip it else if (c == fieldSepChar) fields ~= nullField; // emit "" field else if (c == quoteChar){ state = IN_QUOTED_FIELD; fieldBeg = charPos + 1; numTrailingWhitespaceChars = 0; } else { state = IN_FIELD; fieldBeg = charPos; numTrailingWhitespaceChars = 0; } return true; } bool parseInField(){ //in the midst of an unquoted field if (charPos==EOL) { putField(line.length); return false; } T c = line[charPos]; if (isWhitespace(c)) numTrailingWhitespaceChars++; else if (c == fieldSepChar) putField(charPos); else if (c == quoteChar) throw new Exception("Unexpected quote in unquoted field"); return true; } bool parseInQuotedField(){ //in the midst of a quoted field if (charPos==EOL) throw new Exception("Unbalanced initial \"."); T c = line[charPos]; if (c == quoteChar) state = AFTER_ENDING_QUOTE; return true; } bool parseAfterEndingQuote(){ if (charPos==EOL) { putField(line.length-1); return false; } T c = line[charPos]; if (c == quoteChar){ fieldHasDoubleQuoteChars = true; //false alarm state = IN_QUOTED_FIELD; //continue parsing quoted field } else if (c == fieldSepChar) putField(charPos-1); else if (isWhitespace(c)) numTrailingWhitespaceChars++; else throw new Exception("Unexpected char after end of quoted field."); return true; } bool parse(){ bool ret; switch (state){ case OUTSIDE_FIELD: ret = parseOutsideField(); break; case IN_FIELD: ret = parseInField(); break; case IN_QUOTED_FIELD: ret = parseInQuotedField(); break; case AFTER_ENDING_QUOTE: ret = parseAfterEndingQuote(); break; default: ret = false; } return ret; } //nullField = to!(tstring)(""); state = OUTSIDE_FIELD; while (charPos