module fmtconv; import tango.io.FileConst; import tango.io.Stdout; import tango.text.Ascii; import Int = tango.text.convert.Integer : toUtf8; import Float = tango.text.convert.Float; const NL = FileConst.NewlineString; void main(char[][] cmdArgs) { args = cmdArgs[1..$]; if (args.length != 1) return Stderr("Just one argument, please.").newline.print("This limitation might be removed sooner or later").newline; formatConv(args[0]); doOutput(); } import tango.core.Exception; import tango.text.Util; unittest { void doAssert(size_t line, bool b, char[] msg) { if (b) Stderr("Test on line ")(line)(" succeeded.").newline; else Stderr("Test on line ")(line)(" failed. ")(msg).newline; } void check(size_t line, char[] i, char[] o) { formatConv(i); auto r = output[0].fmtString; if (r) doAssert(line, r == o, "'" ~ i ~ "' became '" ~ r ~ "', expected '" ~ o ~ "'"); else Stderr("'" ~ i ~ "' failed, expected '" ~ o ~ "'").newline; output.length = 0; } check(__LINE__, "%s", "{}"); check(__LINE__, "%b", "{:b}"); check(__LINE__, "%d", "{:d}"); check(__LINE__, "%o", "{:o}"); check(__LINE__, "%x", "{:x}"); check(__LINE__, "%X", "{:X}"); check(__LINE__, "%f", "{:6f}"); // not as simple as {:6e}, Tango doesn't emit an exponent if it's zero, Phobos does check(__LINE__, "%e", null); // very complicated check(__LINE__, "%g", null); check(__LINE__, "%a", null); // FIRK. {:2d} means "if arg.length < 2, pad with zeros to fill, else if > 2, take only the last 2" // whereas %2d means "if arg.length < 2, pad with zeros to fill" check(__LINE__, "%4.2d", "{,4:2d}"); assert (0); } void formatConv(char[] s) { .s = s; .pos = 0; .flags = Flags.NONE; while (next() != char.init) { void addStr(char[] s...) { if (!(output.length && output[$-1].type == Output.Type.JUST_PRINT)) output ~= Output(Output.Type.JUST_PRINT); foreach (c; s) output[$-1].fmtString ~= c; } if (c == '{') { addStr("{{"); continue; } else if (c != '%') { addStr(c); continue; } try extractMetadata(); catch (Exception e) return Stderr("Error: ")(e)(" at around position ")(pos).newline; if (flags == Flags.PUT) addStr(c); else { void addFmt(char[] s...) { if (!(output.length && output[$-1].type == Output.Type.JUST_FORMAT)) output ~= Output(Output.Type.JUST_FORMAT, "{"); foreach (c; s) output[$-1].fmtString ~= c; } theArg = next("ARG"); if (handleTrickyCases()) continue; char[] formatString; if (width) { if (flags & Flags.LEFT_JUSTIFY) width *= -1; if (flags & Flags.LEADING_ZEROS) formatString ~= toUtf8(width); else addFmt("," ~ toUtf8(width)); } if (floatingType() && !precision) precision = 6; if (precision) formatString ~= toUtf8(precision); switch (type) { case Type.DECIMAL: formatString ~= 'd'; break; case Type.BINARY: formatString ~= 'b'; break; case Type.OCTAL: formatString ~= 'o'; break; case Type.HEX: formatString ~= upper('x'); break; case Type.FLOAT_NORMAL: formatString ~= 'f'; break; case Type.FLOAT_E_NOTATION: formatString ~= 'e'; break; default: break; } if (formatString) addFmt(":" ~ formatString); addFmt('}'); } } } void doOutput() { bool inString = false; bool stdoutOpen = false; bool first = true; char[][] queue; void open() { if (!stdoutOpen) { assert (!inString); if (first) { Stdout("Stdout"); first = false; } Stdout(".format("); stdoutOpen = true; } } void close() { if (stdoutOpen) { if (inString) { Stdout('"'); inString = false; } foreach (arg; queue) Stdout(", ")(arg); queue.length = 0; Stdout(')'); stdoutOpen = false; } else assert (queue.length == 0); } void append(char[] s, bool string) { if (string) { open(); if (!inString) { Stdout('"'); inString = true; } Stdout(s); } else { // don't want to use ~ // so just use a separate .format call for this close(); open(); Stdout(s); } } foreach (op; output) { switch (op.type) { case Output.Type.JUST_FORMAT: queue ~= op.args; case Output.Type.JUST_PRINT: append(op.fmtString, true); break; // non-constant format string case Output.Type.OWN_FORMAT: append(op.fmtString, false); queue ~= op.args; close(); break; default: assert (false); } /+ if (op.type == Output.Type.OWN_FORMAT) append(op.string, false); else if (op.type == Output.Type.STANDALONE) { if (inString) { Stdout('"'); inString = false; } Stdout(");").newline.print(op.string).newline.print("Stdout("); needAppend = false; } else append(op.string, true);+/ } close(); Stdout(';').newline; } struct Output { enum Type : ubyte { JUST_PRINT, // string: no argument needed JUST_FORMAT, // just pass a format string and an argument normally OWN_FORMAT, // the format string isn't constant, can't embed it in string normally NEED_TMP // like OWN_FORMAT, but also needs a temporary variable } Type type; char[] fmtString; // with JUST_PRINT, not really a format string... char[][] args; } Output[] output; /+ still need: ALT_FORMAT, UPPER_CASE, PREFIX_SPACE ALT_FORMAT matters if type is: Octal Hex any floating UPPER_CASE matters if type is: Hex any floating PREFIX_SPACE matters only if PREFIX_PLUS is off and the number is positive +/ // the function that handles cases that lead to Output.Type.OWN_FORMAT // handletrickyflags needs to handle absolutely all flags in some cases // maybe it should modify the flags to turn off everything it's taken care of? // or just return a bool if it's done with this? // in the variable width + variable precision spaghetti // the problem is that even if the width or precision are not used by the type // the arguments have to be taken, for instance: // writefln("%.*s", 5, "foo"); // the 5 is ignored in the above // output a warning and just fix it by removing reference to the precision // however, do note for instance: // writefln("%.*s", foo, bar); // if bar above is numeric, foo _is_ used // have an option like --assume-s-means-string which can simplify the above // but don't do it by default /+ the only types which don't use precision are actually: bool arrays so we can't use such a switch, even %.4s means something the only thing we could do is have extensions like %y and %z for bool and arrays but that's stupid +/ // that actually makes complicated cases really tricky // since if it's %.*s for instance you need to do like: // static if (isIntegerType(typeof(_ARG_0))) // use tango.text.convert.Integer // else static if (isRealType( // etc... // OH, SNAP. // %f %g %e %a %s all handle complex and imaginary // Tango doesn't handle them at all. // lovely! bool handleTrickyCases() { if (flags & Flags.VARIABLE_WIDTH) { auto width = next("WIDTH"); if (flags & Flags.VARIABLE_PRECISION) { auto precision = next("PRECISION"); char prefix = getPrefix(); if (prefix) { auto t = next("tmp", false); char[] style = "Style.Signed"; switch (type) { case Type.BINARY: style = "Style.Binary"; case Type.DECIMAL: // taken care of varwidth, varprec, type, uppercase, prefix // altformat doesn't apply // still leftjustify, leadingzeros char[] intFlags = "Flags." ~ (prefix == '+' ? "Plus" : "Space"); if (flags & Flags.LEADING_ZEROS) intFlags ~= " | Flags.ZERO"; /+output ~= Output( Output.Type.OWN_FORMAT, // how to pad with leading zeros? // the func fills the buffer with zeros // but we have a varying minimum and maximum width // if we don't pad with leading zeros, the result is truncated to the right width // if we do pad, "tango.text.convert.Integer.format(new char[0100], " ~theArg~ ", " ~style~ ", " ~intFlags~ ");" );+/ output ~= Output( Output.Type.JUST_PRINT, "" ); break; case Type.FLOAT_E_NOTATION: case Type.FLOAT_NORMAL: case Type.FLOAT_VARIABLE: case Type.FLOAT_HEX: output ~= Output( Output.Type.JUST_PRINT, "" ); break; case Type.ANY: output ~= Output( Output.Type.NEED_TMP, "auto " ~t~ " = " "Stdout.layout.convert(Stdout.layout.convert(\"{{:{}}\", " ~precision~ "), " ~theArg~ ");" ~ NL ~ "Stdout.format(Stdout.layout.convert(" "Stdout.layout.convert(\"{{,{}}\", " ~width~ " - " ~t~ ".length), '" ~prefix~ "')).format(" ~t~ ");" ); break; case Type.OCTAL, Type.HEX: // can't have a prefix default: assert (0); } } else { // no prefix, much easier output ~= Output( Output.Type.OWN_FORMAT, "Stdout.layout.convert(Stdout.layout.convert(\"{{,{}:{}}\", " ~width~ ", " ~precision~ "), " ~theArg~ ")" ); } } else { // variable width, not variable precision // %f has default precision 6 char prefix = getPrefix(); output ~= Output( Output.Type.JUST_PRINT, "" ); } } else { if (flags & Flags.VARIABLE_PRECISION) { auto precision = next("PRECISION"); char prefix = getPrefix(); output ~= Output( Output.Type.JUST_PRINT, "" ); } else { // not variable width, not variable precision: no problem return false; } } return true; } char getPrefix() { if (flags & Flags.PREFIX_PLUS) return '+'; else if (flags & Flags.PREFIX_SPACE) return ' '; else return 0; } char[] theArg; char[] next(char[] s, bool prefix = true) { static uint[char[]] a; auto p = s in a; uint n; if (p) n = ++*p; else { a[s] = 0; n = 1; } return Stdout.layout.convert(prefix ? "_{}_{}" : "{}_{}", s, n); } char[][] args; size_t j; char[] nextArg() { if (j == args.length) return null; else return args[j++]; } char[] upper(char[] s...) { if (flags & Flags.UPPER_CASE) return toUpper(s); else return s.dup; } enum Flags : ushort { NONE = 0, LEFT_JUSTIFY = 1 << 0, PREFIX_PLUS = 1 << 1, PREFIX_SPACE = 1 << 2, ALT_FORMAT = 1 << 3, LEADING_ZEROS = 1 << 4, UPPER_CASE = 1 << 5, VARIABLE_WIDTH = 1 << 6, VARIABLE_PRECISION = 1 << 7, PUT = ushort.max } enum Type : ubyte { ANY, BINARY, DECIMAL, OCTAL, HEX, FLOAT_E_NOTATION, FLOAT_NORMAL, FLOAT_VARIABLE, FLOAT_HEX } Flags flags; Type type; bool floatingType() { return ( type == Type.FLOAT_E_NOTATION || type == Type.FLOAT_NORMAL || type == Type.FLOAT_VARIABLE || type == Type.FLOAT_HEX ); } int width, precision; //////////////////////////////// // stuff for reading the original format string char[] s; size_t pos = 0; char c; char next() { if (pos == s.length) return char.init; else return (c = s[pos++]); } char nextFmt() { if (next() == char.init) throw new Exception("Invalid format string"); else return c; } void extractMetadata() { assert (c == '%'); // flags loop: for (;;) switch (next()) { case '-': flags |= Flags.LEFT_JUSTIFY; break; case '+': flags |= Flags.PREFIX_PLUS; break; case '#': flags |= Flags.ALT_FORMAT; break; case '0': flags |= Flags.LEADING_ZEROS; break; case ' ': flags |= Flags.PREFIX_SPACE; break; case '%': if (!flags) return (flags = Flags.PUT); default: break loop; } if (flags & Flags.PREFIX_PLUS) flags &= ~Flags.PREFIX_SPACE; if (flags & Flags.LEFT_JUSTIFY) flags &= ~Flags.LEADING_ZEROS; // width if (c == '*') { flags |= Flags.VARIABLE_WIDTH; next(); } else if (c >= '0' && c <= '9') width = getInt(); // precision if (c == '.') { flags &= ~Flags.LEADING_ZEROS; if (next() == '*') { flags |= Flags.VARIABLE_PRECISION; next(); } else if (c >= '0' && c <= '9') precision = getInt(); } // type if (c >= 'A' && c <= 'Z') { flags |= Flags.UPPER_CASE; c += 'a' - 'A'; } switch (c) { case 's': type = Type.ANY; break; case 'b': type = Type.BINARY; break; case 'd': type = Type.DECIMAL; break; case 'o': type = Type.OCTAL; break; case 'x': type = Type.HEX; break; case 'e': type = Type.FLOAT_E_NOTATION; break; case 'f': type = Type.FLOAT_NORMAL; break; case 'g': type = Type.FLOAT_VARIABLE; break; case 'a': type = Type.FLOAT_HEX; break; default: throw new Exception("Unidentified format char '" ~ c ~ "'"); } if (type == Type.BINARY || type == Type.OCTAL || type == Type.HEX) flags &= ~(Flags.PREFIX_PLUS | Flags.PREFIX_SPACE); } int getInt() { int n = 0; for (; c >= '0' && c <= '9'; c = next()) { n *= 10; n += (c - '0'); if (n < 0) throw new Exception("Int overflow"); } return n; }