[Issue 5221] entity.c: Merge Walter's list with Thomas'

d-bugmail at puremagic.com d-bugmail at puremagic.com
Tue Nov 16 06:09:11 PST 2010


http://d.puremagic.com/issues/show_bug.cgi?id=5221



--- Comment #2 from Iain Buclaw <ibuclaw at ubuntu.com> 2010-11-16 06:07:53 PST ---
(From update of attachment 815)
diff -ur src.orig/entity.c src/entity.c
--- src.orig/entity.c    2010-03-31 01:26:18.000000000 +0100
+++ src/entity.c    2010-11-16 14:01:58.423055202 +0000
@@ -9,6 +9,7 @@


 #include <string.h>
+#include <ctype.h>

 /*********************************************
  * Convert from named entity to its encoding.
@@ -23,7 +24,6 @@
     unsigned short value;
 };

-#if IN_GCC
 static NameId namesA[]={
         "Aacgr",        0x0386,
         "aacgr",        0x03AC,
@@ -42,7 +42,9 @@
         "agr",          0x03B1,
         "Agrave",       0x00C0,
         "agrave",       0x00E0,
+        "alefsym",      0x2135,
         "aleph",        0x2135,
+        "Alpha",        0x0391,
         "alpha",        0x03B1,
         "Amacr",        0x0100,
         "amacr",        0x0101,
@@ -76,9 +78,11 @@
         "bcong",        0x224C,
         "Bcy",          0x0411,
         "bcy",          0x0431,
+        "bdquo",        0x201E,
         "becaus",       0x2235,
         "bepsi",        0x220D,
         "bernou",       0x212C,
+        "Beta",         0x0392,
         "beta",         0x03B2,
         "beth",         0x2136,
         "Bgr",          0x0392,
@@ -162,6 +166,7 @@
         "CHcy",         0x0427,
         "chcy",         0x0447,
         "check",        0x2713,
+        "Chi",          0x03A7,
         "chi",          0x03C7,
         "cir",          0x25CB,
         "circ",         0x005E,
@@ -178,6 +183,7 @@
         "coprod",       0x2210,
         "copy",         0x00A9,
         "copysr",       0x2117,
+        "crarr",        0x21B5,
         "cross",        0x2717,
         "cuepr",        0x22DE,
         "cuesc",        0x22DF,
@@ -281,17 +287,21 @@
         "Eogon",        0x0118,
         "eogon",        0x0119,
         "epsi",         0x220A,
+        "Epsilon",      0x0395,
+        "epsilon",      0x03B5,
         "epsis",        0x220A,
         "epsiv",        0x03B5,
         "equals",       0x003D,
         "equiv",        0x2261,
         "erDot",        0x2253,
         "esdot",        0x2250,
+        "Eta",          0x0397,
         "eta",          0x03B7,
         "ETH",          0x00D0,
         "eth",          0x00F0,
         "Euml",         0x00CB,
         "euml",         0x00EB,
+        "euro",         0x20AC,
         "excl",         0x0021,
         "exist",        0x2203,
         NULL,           0
@@ -325,6 +335,7 @@
         "frac56",       0x215A,
         "frac58",       0x215D,
         "frac78",       0x215E,
+        "frasl",        0x2044,
         "frown",        0x2322,
         NULL,           0
 };
@@ -425,6 +436,7 @@
         "iocy",         0x0451,
         "Iogon",        0x012E,
         "iogon",        0x012F,
+        "Iota",         0x0399,
         "iota",         0x03B9,
         "iquest",       0x00BF,
         "isin",         0x220A,
@@ -450,6 +462,7 @@
 };

 static NameId namesK[]={
+        "Kappa",        0x039A,
         "kappa",        0x03BA,
         "kappav",       0x03F0,
         "Kcedil",       0x0136,
@@ -523,7 +536,9 @@
         "lozf",         0x2726,
         "lpar",         0x0028,
         "lrarr2",       0x21C6,
+        "lrm",          0x200E,
         "lrhar2",       0x21CB,
+        "lsaquo",       0x2039,
         "lsh",          0x21B0,
         "lsim",         0x2272,
         "lsqb",         0x005B,
@@ -561,6 +576,7 @@
         "mldr",         0x2026,
         "mnplus",       0x2213,
         "models",       0x22A7,
+        "Mu",           0x039C,
         "mu",           0x03BC,
         "mumap",        0x22B8,
         NULL,           0
@@ -573,8 +589,7 @@
         "nap",          0x2249,
         "napos",        0x0149,
         "natur",        0x266E,
-//      "nbsp",         0x00A0,
-        "nbsp",         32,    // make non-breaking space appear as space
+        "nbsp",         0x00A0,
         "Ncaron",       0x0147,
         "ncaron",       0x0148,
         "Ncedil",       0x0145,
@@ -631,6 +646,7 @@
         "nsupE",        0x2289,
         "Ntilde",       0x00D1,
         "ntilde",       0x00F1,
+        "Nu",           0x039D,
         "nu",           0x03BD,
         "num",          0x0023,
         "numero",       0x2116,
@@ -671,10 +687,13 @@
         "ohgr",         0x03C9,
         "ohm",          0x2126,
         "olarr",        0x21BA,
+        "oline",        0x203E,
         "Omacr",        0x014C,
         "omacr",        0x014D,
         "Omega",        0x03A9,
         "omega",        0x03C9,
+        "Omicron",      0x039F,
+        "omicron",      0x03BF,
         "ominus",       0x2296,
         "oplus",        0x2295,
         "or",           0x2228,
@@ -709,6 +728,7 @@
         "PHgr",         0x03A6,
         "phgr",         0x03C6,
         "Phi",          0x03A6,
+        "phi",          0x03C6,
         "phis",         0x03C6,
         "phiv",         0x03D5,
         "phmmat",       0x2133,
@@ -780,13 +800,16 @@
         "rgr",          0x03C1,
         "rhard",        0x21C1,
         "rharu",        0x21C0,
+        "Rho",          0x03A1,
         "rho",          0x03C1,
         "rhov",         0x03F1,
         "ring",         0x02DA,
         "rlarr2",       0x21C4,
         "rlhar2",       0x21CC,
+        "rlm",          0x200F,
         "rpar",         0x0029,
         "rpargt",       0xE291,
+        "rsaquo",       0x203A,
         "rsh",          0x21B1,
         "rsqb",         0x005D,
         "rsquo",        0x2019,
@@ -804,6 +827,7 @@
         "Sacute",       0x015A,
         "sacute",       0x015B,
         "samalg",       0x2210,
+        "sbquo",        0x201A,
         "sbsol",        0xFE68,
         "sc",           0x227B,
         "scap",         0x227F,
@@ -839,6 +863,7 @@
         "shy",          0x00AD,
         "Sigma",        0x03A3,
         "sigma",        0x03C3,
+        "sigmaf",       0x03C2,
         "sigmav",       0x03C2,
         "sim",          0x223C,
         "sime",         0x2243,
@@ -886,6 +911,7 @@

 static NameId namesT[]={
         "target",       0x2316,
+        "Tau",          0x03A4,
         "tau",          0x03C4,
         "Tcaron",       0x0164,
         "tcaron",       0x0165,
@@ -899,7 +925,9 @@
         "tgr",          0x03C4,
         "there4",       0x2234,
         "Theta",        0x0398,
+        "theta",        0x03B8,
         "thetas",       0x03B8,
+        "thetasym",     0x03D1,
         "thetav",       0x03D1,
         "THgr",         0x0398,
         "thgr",         0x03B8,
@@ -961,8 +989,11 @@
         "Uogon",        0x0172,
         "uogon",        0x0173,
         "uplus",        0x228E,
+        "Upsi",         0x03A5,
         "upsi",         0x03C5,
-        "Upsi",         0x03D2,
+        "upsih",        0x03D2,
+        "Upsilon",      0x03A5,
+        "upsilon",      0x03C5,
         "urcorn",       0x231D,
         "urcrop",       0x230E,
         "Uring",        0x016E,
@@ -1052,11 +1083,14 @@
         "zcy",          0x0437,
         "Zdot",         0x017B,
         "zdot",         0x017C,
+        "Zeta",         0x0396,
         "zeta",         0x03B6,
         "Zgr",          0x0396,
         "zgr",          0x03B6,
         "ZHcy",         0x0416,
         "zhcy",         0x0436,
+        "zwj",          0x200D,
+        "zwnj",         0x200C,
         NULL, 0
 };

@@ -1070,297 +1104,17 @@
 int HtmlNamedEntity(unsigned char *p, int length)
 {
     int tableIndex = tolower(*p) - 'a';
-    if (tableIndex >= 0 && tableIndex < 26) {
+    if (tableIndex >= 0 && tableIndex < 26)
+    {
         NameId* names = namesTable[tableIndex];
         int i;

-        for (i = 0; names[i].name; i++){
-                if (strncmp(names[i].name, (char *)p, length) == 0){
-                        return names[i].value;
-                }
+        for (i = 0; names[i].name; i++)
+        {
+            if (strncmp(names[i].name, (char *)p, length) == 0)
+                return names[i].value;
         }
     }
-    error("unrecognized character entity \"%.*s\"", length, p);
-    return -1;
-}
-
-#else //TODO: Merge Walter's list with Thomas'
-
-static NameId names[] =
-{
-    // Entities
-    "quot",     34,
-    "amp",      38,
-    "lt",       60,
-    "gt",       62,
-
-    "OElig",    338,
-    "oelig",    339,
-    "Scaron",   352,
-    "scaron",   353,
-    "Yuml",     376,
-    "circ",     710,
-    "tilde",    732,
-    "ensp",     8194,
-    "emsp",     8195,
-    "thinsp",   8201,
-    "zwnj",     8204,
-    "zwj",      8205,
-    "lrm",      8206,
-    "rlm",      8207,
-    "ndash",    8211,
-    "mdash",    8212,
-    "lsquo",    8216,
-    "rsquo",    8217,
-    "sbquo",    8218,
-    "ldquo",    8220,
-    "rdquo",    8221,
-    "bdquo",    8222,
-    "dagger",   8224,
-    "Dagger",   8225,
-    "permil",   8240,
-    "lsaquo",   8249,
-    "rsaquo",   8250,
-    "euro",     8364,
-
-    // Latin-1 (ISO-8859-1) Entities
-    "nbsp",     160,
-    "iexcl",    161,
-    "cent",     162,
-    "pound",    163,
-    "curren",   164,
-    "yen",      165,
-    "brvbar",   166,
-    "sect",     167,
-    "uml",      168,
-    "copy",     169,
-    "ordf",     170,
-    "laquo",    171,
-    "not",      172,
-    "shy",      173,
-    "reg",      174,
-    "macr",     175,
-    "deg",      176,
-    "plusmn",   177,
-    "sup2",     178,
-    "sup3",     179,
-    "acute",    180,
-    "micro",    181,
-    "para",     182,
-    "middot",   183,
-    "cedil",    184,
-    "sup1",     185,
-    "ordm",     186,
-    "raquo",    187,
-    "frac14",   188,
-    "frac12",   189,
-    "frac34",   190,
-    "iquest",   191,
-    "Agrave",   192,
-    "Aacute",   193,
-    "Acirc",    194,
-    "Atilde",   195,
-    "Auml",     196,
-    "Aring",    197,
-    "AElig",    198,
-    "Ccedil",   199,
-    "Egrave",   200,
-    "Eacute",   201,
-    "Ecirc",    202,
-    "Euml",     203,
-    "Igrave",   204,
-    "Iacute",   205,
-    "Icirc",    206,
-    "Iuml",     207,
-    "ETH",      208,
-    "Ntilde",   209,
-    "Ograve",   210,
-    "Oacute",   211,
-    "Ocirc",    212,
-    "Otilde",   213,
-    "Ouml",     214,
-    "times",    215,
-    "Oslash",   216,
-    "Ugrave",   217,
-    "Uacute",   218,
-    "Ucirc",    219,
-    "Uuml",     220,
-    "Yacute",   221,
-    "THORN",    222,
-    "szlig",    223,
-    "agrave",   224,
-    "aacute",   225,
-    "acirc",    226,
-    "atilde",   227,
-    "auml",     228,
-    "aring",    229,
-    "aelig",    230,
-    "ccedil",   231,
-    "egrave",   232,
-    "eacute",   233,
-    "ecirc",    234,
-    "euml",     235,
-    "igrave",   236,
-    "iacute",   237,
-    "icirc",    238,
-    "iuml",     239,
-    "eth",      240,
-    "ntilde",   241,
-    "ograve",   242,
-    "oacute",   243,
-    "ocirc",    244,
-    "otilde",   245,
-    "ouml",     246,
-    "divide",   247,
-    "oslash",   248,
-    "ugrave",   249,
-    "uacute",   250,
-    "ucirc",    251,
-    "uuml",     252,
-    "yacute",   253,
-    "thorn",    254,
-    "yuml",     255,
-
-        // Symbols and Greek letter entities
-    "fnof",     402,
-    "Alpha",    913,
-    "Beta",     914,
-    "Gamma",    915,
-    "Delta",    916,
-    "Epsilon",  917,
-    "Zeta",     918,
-    "Eta",      919,
-    "Theta",    920,
-    "Iota",     921,
-    "Kappa",    922,
-    "Lambda",   923,
-    "Mu",       924,
-    "Nu",       925,
-    "Xi",       926,
-    "Omicron",  927,
-    "Pi",       928,
-    "Rho",      929,
-    "Sigma",    931,
-    "Tau",      932,
-    "Upsilon",  933,
-    "Phi",      934,
-    "Chi",      935,
-    "Psi",      936,
-    "Omega",    937,
-    "alpha",    945,
-    "beta",     946,
-    "gamma",    947,
-    "delta",    948,
-    "epsilon",  949,
-    "zeta",     950,
-    "eta",      951,
-    "theta",    952,
-    "iota",     953,
-    "kappa",    954,
-    "lambda",   955,
-    "mu",       956,
-    "nu",       957,
-    "xi",       958,
-    "omicron",  959,
-    "pi",       960,
-    "rho",      961,
-    "sigmaf",   962,
-    "sigma",    963,
-    "tau",      964,
-    "upsilon",  965,
-    "phi",      966,
-    "chi",      967,
-    "psi",      968,
-    "omega",    969,
-    "thetasym", 977,
-    "upsih",    978,
-    "piv",      982,
-    "bull",     8226,
-    "hellip",   8230,
-    "prime",    8242,
-    "Prime",    8243,
-    "oline",    8254,
-    "frasl",    8260,
-    "weierp",   8472,
-    "image",    8465,
-    "real",     8476,
-    "trade",    8482,
-    "alefsym",  8501,
-    "larr",     8592,
-    "uarr",     8593,
-    "rarr",     8594,
-    "darr",     8595,
-    "harr",     8596,
-    "crarr",    8629,
-    "lArr",     8656,
-    "uArr",     8657,
-    "rArr",     8658,
-    "dArr",     8659,
-    "hArr",     8660,
-    "forall",   8704,
-    "part",     8706,
-    "exist",    8707,
-    "empty",    8709,
-    "nabla",    8711,
-    "isin",     8712,
-    "notin",    8713,
-    "ni",       8715,
-    "prod",     8719,
-    "sum",      8721,
-    "minus",    8722,
-    "lowast",   8727,
-    "radic",    8730,
-    "prop",     8733,
-    "infin",    8734,
-    "ang",      8736,
-    "and",      8743,
-    "or",       8744,
-    "cap",      8745,
-    "cup",      8746,
-    "int",      8747,
-    "there4",   8756,
-    "sim",      8764,
-    "cong",     8773,
-    "asymp",    8776,
-    "ne",       8800,
-    "equiv",    8801,
-    "le",       8804,
-    "ge",       8805,
-    "sub",      8834,
-    "sup",      8835,
-    "nsub",     8836,
-    "sube",     8838,
-    "supe",     8839,
-    "oplus",    8853,
-    "otimes",   8855,
-    "perp",     8869,
-    "sdot",     8901,
-    "lceil",    8968,
-    "rceil",    8969,
-    "lfloor",   8970,
-    "rfloor",   8971,
-    "lang",     9001,
-    "rang",     9002,
-    "loz",      9674,
-    "spades",   9824,
-    "clubs",    9827,
-    "hearts",   9829,
-    "diams",    9830,
-};
-
-int HtmlNamedEntity(unsigned char *p, int length)
-{
-    int i;
-
-    // BUG: this is a dumb, slow linear search
-    for (i = 0; i < sizeof(names) / sizeof(names[0]); i++)
-    {
-        // Entries are case sensitive
-        if (memcmp(names[i].name, (char *)p, length) == 0 &&
-            !names[i].name[length])
-            return names[i].value;
-    }
     return -1;
 }

-#endif

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------


More information about the Digitalmars-d-bugs mailing list