[Issue 5221] entity.c: Merge Walter's list with Thomas'
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Tue Nov 16 06:09:11 PST 2010
http://d.puremagic.com/issues/show_bug.cgi?id=5221
--- Comment #2 from Iain Buclaw <ibuclaw at ubuntu.com> 2010-11-16 06:07:53 PST ---
(From update of attachment 815)
diff -ur src.orig/entity.c src/entity.c
--- src.orig/entity.c 2010-03-31 01:26:18.000000000 +0100
+++ src/entity.c 2010-11-16 14:01:58.423055202 +0000
@@ -9,6 +9,7 @@
#include <string.h>
+#include <ctype.h>
/*********************************************
* Convert from named entity to its encoding.
@@ -23,7 +24,6 @@
unsigned short value;
};
-#if IN_GCC
static NameId namesA[]={
"Aacgr", 0x0386,
"aacgr", 0x03AC,
@@ -42,7 +42,9 @@
"agr", 0x03B1,
"Agrave", 0x00C0,
"agrave", 0x00E0,
+ "alefsym", 0x2135,
"aleph", 0x2135,
+ "Alpha", 0x0391,
"alpha", 0x03B1,
"Amacr", 0x0100,
"amacr", 0x0101,
@@ -76,9 +78,11 @@
"bcong", 0x224C,
"Bcy", 0x0411,
"bcy", 0x0431,
+ "bdquo", 0x201E,
"becaus", 0x2235,
"bepsi", 0x220D,
"bernou", 0x212C,
+ "Beta", 0x0392,
"beta", 0x03B2,
"beth", 0x2136,
"Bgr", 0x0392,
@@ -162,6 +166,7 @@
"CHcy", 0x0427,
"chcy", 0x0447,
"check", 0x2713,
+ "Chi", 0x03A7,
"chi", 0x03C7,
"cir", 0x25CB,
"circ", 0x005E,
@@ -178,6 +183,7 @@
"coprod", 0x2210,
"copy", 0x00A9,
"copysr", 0x2117,
+ "crarr", 0x21B5,
"cross", 0x2717,
"cuepr", 0x22DE,
"cuesc", 0x22DF,
@@ -281,17 +287,21 @@
"Eogon", 0x0118,
"eogon", 0x0119,
"epsi", 0x220A,
+ "Epsilon", 0x0395,
+ "epsilon", 0x03B5,
"epsis", 0x220A,
"epsiv", 0x03B5,
"equals", 0x003D,
"equiv", 0x2261,
"erDot", 0x2253,
"esdot", 0x2250,
+ "Eta", 0x0397,
"eta", 0x03B7,
"ETH", 0x00D0,
"eth", 0x00F0,
"Euml", 0x00CB,
"euml", 0x00EB,
+ "euro", 0x20AC,
"excl", 0x0021,
"exist", 0x2203,
NULL, 0
@@ -325,6 +335,7 @@
"frac56", 0x215A,
"frac58", 0x215D,
"frac78", 0x215E,
+ "frasl", 0x2044,
"frown", 0x2322,
NULL, 0
};
@@ -425,6 +436,7 @@
"iocy", 0x0451,
"Iogon", 0x012E,
"iogon", 0x012F,
+ "Iota", 0x0399,
"iota", 0x03B9,
"iquest", 0x00BF,
"isin", 0x220A,
@@ -450,6 +462,7 @@
};
static NameId namesK[]={
+ "Kappa", 0x039A,
"kappa", 0x03BA,
"kappav", 0x03F0,
"Kcedil", 0x0136,
@@ -523,7 +536,9 @@
"lozf", 0x2726,
"lpar", 0x0028,
"lrarr2", 0x21C6,
+ "lrm", 0x200E,
"lrhar2", 0x21CB,
+ "lsaquo", 0x2039,
"lsh", 0x21B0,
"lsim", 0x2272,
"lsqb", 0x005B,
@@ -561,6 +576,7 @@
"mldr", 0x2026,
"mnplus", 0x2213,
"models", 0x22A7,
+ "Mu", 0x039C,
"mu", 0x03BC,
"mumap", 0x22B8,
NULL, 0
@@ -573,8 +589,7 @@
"nap", 0x2249,
"napos", 0x0149,
"natur", 0x266E,
-// "nbsp", 0x00A0,
- "nbsp", 32, // make non-breaking space appear as space
+ "nbsp", 0x00A0,
"Ncaron", 0x0147,
"ncaron", 0x0148,
"Ncedil", 0x0145,
@@ -631,6 +646,7 @@
"nsupE", 0x2289,
"Ntilde", 0x00D1,
"ntilde", 0x00F1,
+ "Nu", 0x039D,
"nu", 0x03BD,
"num", 0x0023,
"numero", 0x2116,
@@ -671,10 +687,13 @@
"ohgr", 0x03C9,
"ohm", 0x2126,
"olarr", 0x21BA,
+ "oline", 0x203E,
"Omacr", 0x014C,
"omacr", 0x014D,
"Omega", 0x03A9,
"omega", 0x03C9,
+ "Omicron", 0x039F,
+ "omicron", 0x03BF,
"ominus", 0x2296,
"oplus", 0x2295,
"or", 0x2228,
@@ -709,6 +728,7 @@
"PHgr", 0x03A6,
"phgr", 0x03C6,
"Phi", 0x03A6,
+ "phi", 0x03C6,
"phis", 0x03C6,
"phiv", 0x03D5,
"phmmat", 0x2133,
@@ -780,13 +800,16 @@
"rgr", 0x03C1,
"rhard", 0x21C1,
"rharu", 0x21C0,
+ "Rho", 0x03A1,
"rho", 0x03C1,
"rhov", 0x03F1,
"ring", 0x02DA,
"rlarr2", 0x21C4,
"rlhar2", 0x21CC,
+ "rlm", 0x200F,
"rpar", 0x0029,
"rpargt", 0xE291,
+ "rsaquo", 0x203A,
"rsh", 0x21B1,
"rsqb", 0x005D,
"rsquo", 0x2019,
@@ -804,6 +827,7 @@
"Sacute", 0x015A,
"sacute", 0x015B,
"samalg", 0x2210,
+ "sbquo", 0x201A,
"sbsol", 0xFE68,
"sc", 0x227B,
"scap", 0x227F,
@@ -839,6 +863,7 @@
"shy", 0x00AD,
"Sigma", 0x03A3,
"sigma", 0x03C3,
+ "sigmaf", 0x03C2,
"sigmav", 0x03C2,
"sim", 0x223C,
"sime", 0x2243,
@@ -886,6 +911,7 @@
static NameId namesT[]={
"target", 0x2316,
+ "Tau", 0x03A4,
"tau", 0x03C4,
"Tcaron", 0x0164,
"tcaron", 0x0165,
@@ -899,7 +925,9 @@
"tgr", 0x03C4,
"there4", 0x2234,
"Theta", 0x0398,
+ "theta", 0x03B8,
"thetas", 0x03B8,
+ "thetasym", 0x03D1,
"thetav", 0x03D1,
"THgr", 0x0398,
"thgr", 0x03B8,
@@ -961,8 +989,11 @@
"Uogon", 0x0172,
"uogon", 0x0173,
"uplus", 0x228E,
+ "Upsi", 0x03A5,
"upsi", 0x03C5,
- "Upsi", 0x03D2,
+ "upsih", 0x03D2,
+ "Upsilon", 0x03A5,
+ "upsilon", 0x03C5,
"urcorn", 0x231D,
"urcrop", 0x230E,
"Uring", 0x016E,
@@ -1052,11 +1083,14 @@
"zcy", 0x0437,
"Zdot", 0x017B,
"zdot", 0x017C,
+ "Zeta", 0x0396,
"zeta", 0x03B6,
"Zgr", 0x0396,
"zgr", 0x03B6,
"ZHcy", 0x0416,
"zhcy", 0x0436,
+ "zwj", 0x200D,
+ "zwnj", 0x200C,
NULL, 0
};
@@ -1070,297 +1104,17 @@
int HtmlNamedEntity(unsigned char *p, int length)
{
int tableIndex = tolower(*p) - 'a';
- if (tableIndex >= 0 && tableIndex < 26) {
+ if (tableIndex >= 0 && tableIndex < 26)
+ {
NameId* names = namesTable[tableIndex];
int i;
- for (i = 0; names[i].name; i++){
- if (strncmp(names[i].name, (char *)p, length) == 0){
- return names[i].value;
- }
+ for (i = 0; names[i].name; i++)
+ {
+ if (strncmp(names[i].name, (char *)p, length) == 0)
+ return names[i].value;
}
}
- error("unrecognized character entity \"%.*s\"", length, p);
- return -1;
-}
-
-#else //TODO: Merge Walter's list with Thomas'
-
-static NameId names[] =
-{
- // Entities
- "quot", 34,
- "amp", 38,
- "lt", 60,
- "gt", 62,
-
- "OElig", 338,
- "oelig", 339,
- "Scaron", 352,
- "scaron", 353,
- "Yuml", 376,
- "circ", 710,
- "tilde", 732,
- "ensp", 8194,
- "emsp", 8195,
- "thinsp", 8201,
- "zwnj", 8204,
- "zwj", 8205,
- "lrm", 8206,
- "rlm", 8207,
- "ndash", 8211,
- "mdash", 8212,
- "lsquo", 8216,
- "rsquo", 8217,
- "sbquo", 8218,
- "ldquo", 8220,
- "rdquo", 8221,
- "bdquo", 8222,
- "dagger", 8224,
- "Dagger", 8225,
- "permil", 8240,
- "lsaquo", 8249,
- "rsaquo", 8250,
- "euro", 8364,
-
- // Latin-1 (ISO-8859-1) Entities
- "nbsp", 160,
- "iexcl", 161,
- "cent", 162,
- "pound", 163,
- "curren", 164,
- "yen", 165,
- "brvbar", 166,
- "sect", 167,
- "uml", 168,
- "copy", 169,
- "ordf", 170,
- "laquo", 171,
- "not", 172,
- "shy", 173,
- "reg", 174,
- "macr", 175,
- "deg", 176,
- "plusmn", 177,
- "sup2", 178,
- "sup3", 179,
- "acute", 180,
- "micro", 181,
- "para", 182,
- "middot", 183,
- "cedil", 184,
- "sup1", 185,
- "ordm", 186,
- "raquo", 187,
- "frac14", 188,
- "frac12", 189,
- "frac34", 190,
- "iquest", 191,
- "Agrave", 192,
- "Aacute", 193,
- "Acirc", 194,
- "Atilde", 195,
- "Auml", 196,
- "Aring", 197,
- "AElig", 198,
- "Ccedil", 199,
- "Egrave", 200,
- "Eacute", 201,
- "Ecirc", 202,
- "Euml", 203,
- "Igrave", 204,
- "Iacute", 205,
- "Icirc", 206,
- "Iuml", 207,
- "ETH", 208,
- "Ntilde", 209,
- "Ograve", 210,
- "Oacute", 211,
- "Ocirc", 212,
- "Otilde", 213,
- "Ouml", 214,
- "times", 215,
- "Oslash", 216,
- "Ugrave", 217,
- "Uacute", 218,
- "Ucirc", 219,
- "Uuml", 220,
- "Yacute", 221,
- "THORN", 222,
- "szlig", 223,
- "agrave", 224,
- "aacute", 225,
- "acirc", 226,
- "atilde", 227,
- "auml", 228,
- "aring", 229,
- "aelig", 230,
- "ccedil", 231,
- "egrave", 232,
- "eacute", 233,
- "ecirc", 234,
- "euml", 235,
- "igrave", 236,
- "iacute", 237,
- "icirc", 238,
- "iuml", 239,
- "eth", 240,
- "ntilde", 241,
- "ograve", 242,
- "oacute", 243,
- "ocirc", 244,
- "otilde", 245,
- "ouml", 246,
- "divide", 247,
- "oslash", 248,
- "ugrave", 249,
- "uacute", 250,
- "ucirc", 251,
- "uuml", 252,
- "yacute", 253,
- "thorn", 254,
- "yuml", 255,
-
- // Symbols and Greek letter entities
- "fnof", 402,
- "Alpha", 913,
- "Beta", 914,
- "Gamma", 915,
- "Delta", 916,
- "Epsilon", 917,
- "Zeta", 918,
- "Eta", 919,
- "Theta", 920,
- "Iota", 921,
- "Kappa", 922,
- "Lambda", 923,
- "Mu", 924,
- "Nu", 925,
- "Xi", 926,
- "Omicron", 927,
- "Pi", 928,
- "Rho", 929,
- "Sigma", 931,
- "Tau", 932,
- "Upsilon", 933,
- "Phi", 934,
- "Chi", 935,
- "Psi", 936,
- "Omega", 937,
- "alpha", 945,
- "beta", 946,
- "gamma", 947,
- "delta", 948,
- "epsilon", 949,
- "zeta", 950,
- "eta", 951,
- "theta", 952,
- "iota", 953,
- "kappa", 954,
- "lambda", 955,
- "mu", 956,
- "nu", 957,
- "xi", 958,
- "omicron", 959,
- "pi", 960,
- "rho", 961,
- "sigmaf", 962,
- "sigma", 963,
- "tau", 964,
- "upsilon", 965,
- "phi", 966,
- "chi", 967,
- "psi", 968,
- "omega", 969,
- "thetasym", 977,
- "upsih", 978,
- "piv", 982,
- "bull", 8226,
- "hellip", 8230,
- "prime", 8242,
- "Prime", 8243,
- "oline", 8254,
- "frasl", 8260,
- "weierp", 8472,
- "image", 8465,
- "real", 8476,
- "trade", 8482,
- "alefsym", 8501,
- "larr", 8592,
- "uarr", 8593,
- "rarr", 8594,
- "darr", 8595,
- "harr", 8596,
- "crarr", 8629,
- "lArr", 8656,
- "uArr", 8657,
- "rArr", 8658,
- "dArr", 8659,
- "hArr", 8660,
- "forall", 8704,
- "part", 8706,
- "exist", 8707,
- "empty", 8709,
- "nabla", 8711,
- "isin", 8712,
- "notin", 8713,
- "ni", 8715,
- "prod", 8719,
- "sum", 8721,
- "minus", 8722,
- "lowast", 8727,
- "radic", 8730,
- "prop", 8733,
- "infin", 8734,
- "ang", 8736,
- "and", 8743,
- "or", 8744,
- "cap", 8745,
- "cup", 8746,
- "int", 8747,
- "there4", 8756,
- "sim", 8764,
- "cong", 8773,
- "asymp", 8776,
- "ne", 8800,
- "equiv", 8801,
- "le", 8804,
- "ge", 8805,
- "sub", 8834,
- "sup", 8835,
- "nsub", 8836,
- "sube", 8838,
- "supe", 8839,
- "oplus", 8853,
- "otimes", 8855,
- "perp", 8869,
- "sdot", 8901,
- "lceil", 8968,
- "rceil", 8969,
- "lfloor", 8970,
- "rfloor", 8971,
- "lang", 9001,
- "rang", 9002,
- "loz", 9674,
- "spades", 9824,
- "clubs", 9827,
- "hearts", 9829,
- "diams", 9830,
-};
-
-int HtmlNamedEntity(unsigned char *p, int length)
-{
- int i;
-
- // BUG: this is a dumb, slow linear search
- for (i = 0; i < sizeof(names) / sizeof(names[0]); i++)
- {
- // Entries are case sensitive
- if (memcmp(names[i].name, (char *)p, length) == 0 &&
- !names[i].name[length])
- return names[i].value;
- }
return -1;
}
-#endif
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list