Bug Summary

File:HTMLparser.c
Location:line 2563, column 6
Description:Value stored to 'out' is never read

Annotated Source Code

1/*
2 * HTMLparser.c : an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9#define IN_LIBXML
10#include "libxml.h"
11#ifdef LIBXML_HTML_ENABLED
12
13#include <string.h>
14#ifdef HAVE_CTYPE_H1
15#include <ctype.h>
16#endif
17#ifdef HAVE_STDLIB_H1
18#include <stdlib.h>
19#endif
20#ifdef HAVE_SYS_STAT_H1
21#include <sys/stat.h>
22#endif
23#ifdef HAVE_FCNTL_H1
24#include <fcntl.h>
25#endif
26#ifdef HAVE_UNISTD_H1
27#include <unistd.h>
28#endif
29#ifdef HAVE_ZLIB_H1
30#include <zlib.h>
31#endif
32
33#include <libxml/xmlmemory.h>
34#include <libxml/tree.h>
35#include <libxml/parser.h>
36#include <libxml/parserInternals.h>
37#include <libxml/xmlerror.h>
38#include <libxml/HTMLparser.h>
39#include <libxml/HTMLtree.h>
40#include <libxml/entities.h>
41#include <libxml/encoding.h>
42#include <libxml/valid.h>
43#include <libxml/xmlIO.h>
44#include <libxml/globals.h>
45#include <libxml/uri.h>
46
47#define HTML_MAX_NAMELEN1000 1000
48#define HTML_PARSER_BIG_BUFFER_SIZE1000 1000
49#define HTML_PARSER_BUFFER_SIZE100 100
50
51/* #define DEBUG */
52/* #define DEBUG_PUSH */
53
54static int htmlOmittedDefaultValue = 1;
55
56xmlChar * htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
57 xmlChar end, xmlChar end2, xmlChar end3);
58static void htmlParseComment(htmlParserCtxtPtr ctxt);
59
60/************************************************************************
61 * *
62 * Some factorized error routines *
63 * *
64 ************************************************************************/
65
66/**
67 * htmlErrMemory:
68 * @ctxt: an HTML parser context
69 * @extra: extra informations
70 *
71 * Handle a redefinition of attribute error
72 */
73static void
74htmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
75{
76 if ((ctxt != NULL((void*)0)) && (ctxt->disableSAX != 0) &&
77 (ctxt->instate == XML_PARSER_EOF))
78 return;
79 if (ctxt != NULL((void*)0)) {
80 ctxt->errNo = XML_ERR_NO_MEMORY;
81 ctxt->instate = XML_PARSER_EOF;
82 ctxt->disableSAX = 1;
83 }
84 if (extra)
85 __xmlRaiseError(NULL((void*)0), NULL((void*)0), NULL((void*)0), ctxt, NULL((void*)0), XML_FROM_PARSER,
86 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL((void*)0), 0, extra,
87 NULL((void*)0), NULL((void*)0), 0, 0,
88 "Memory allocation failed : %s\n", extra);
89 else
90 __xmlRaiseError(NULL((void*)0), NULL((void*)0), NULL((void*)0), ctxt, NULL((void*)0), XML_FROM_PARSER,
91 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL((void*)0), 0, NULL((void*)0),
92 NULL((void*)0), NULL((void*)0), 0, 0, "Memory allocation failed\n");
93}
94
95/**
96 * htmlParseErr:
97 * @ctxt: an HTML parser context
98 * @error: the error number
99 * @msg: the error message
100 * @str1: string infor
101 * @str2: string infor
102 *
103 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
104 */
105static void
106htmlParseErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
107 const char *msg, const xmlChar *str1, const xmlChar *str2)
108{
109 if ((ctxt != NULL((void*)0)) && (ctxt->disableSAX != 0) &&
110 (ctxt->instate == XML_PARSER_EOF))
111 return;
112 if (ctxt != NULL((void*)0))
113 ctxt->errNo = error;
114 __xmlRaiseError(NULL((void*)0), NULL((void*)0), NULL((void*)0), ctxt, NULL((void*)0), XML_FROM_HTML, error,
115 XML_ERR_ERROR, NULL((void*)0), 0,
116 (const char *) str1, (const char *) str2,
117 NULL((void*)0), 0, 0,
118 msg, str1, str2);
119 if (ctxt != NULL((void*)0))
120 ctxt->wellFormed = 0;
121}
122
123/**
124 * htmlParseErrInt:
125 * @ctxt: an HTML parser context
126 * @error: the error number
127 * @msg: the error message
128 * @val: integer info
129 *
130 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
131 */
132static void
133htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
134 const char *msg, int val)
135{
136 if ((ctxt != NULL((void*)0)) && (ctxt->disableSAX != 0) &&
137 (ctxt->instate == XML_PARSER_EOF))
138 return;
139 if (ctxt != NULL((void*)0))
140 ctxt->errNo = error;
141 __xmlRaiseError(NULL((void*)0), NULL((void*)0), NULL((void*)0), ctxt, NULL((void*)0), XML_FROM_HTML, error,
142 XML_ERR_ERROR, NULL((void*)0), 0, NULL((void*)0), NULL((void*)0),
143 NULL((void*)0), val, 0, msg, val);
144 if (ctxt != NULL((void*)0))
145 ctxt->wellFormed = 0;
146}
147
148/************************************************************************
149 * *
150 * Parser stacks related functions and macros *
151 * *
152 ************************************************************************/
153
154/**
155 * htmlnamePush:
156 * @ctxt: an HTML parser context
157 * @value: the element name
158 *
159 * Pushes a new element name on top of the name stack
160 *
161 * Returns 0 in case of error, the index in the stack otherwise
162 */
163static int
164htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value)
165{
166 if ((ctxt->html < 3) && (xmlStrEqualxmlStrEqual__internal_alias(value, BAD_CAST(xmlChar *) "head")))
167 ctxt->html = 3;
168 if ((ctxt->html < 10) && (xmlStrEqualxmlStrEqual__internal_alias(value, BAD_CAST(xmlChar *) "body")))
169 ctxt->html = 10;
170 if (ctxt->nameNr >= ctxt->nameMax) {
171 ctxt->nameMax *= 2;
172 ctxt->nameTab = (const xmlChar * *)
173 xmlRealloc((xmlChar * *)ctxt->nameTab,
174 ctxt->nameMax *
175 sizeof(ctxt->nameTab[0]));
176 if (ctxt->nameTab == NULL((void*)0)) {
177 htmlErrMemory(ctxt, NULL((void*)0));
178 return (0);
179 }
180 }
181 ctxt->nameTab[ctxt->nameNr] = value;
182 ctxt->name = value;
183 return (ctxt->nameNr++);
184}
185/**
186 * htmlnamePop:
187 * @ctxt: an HTML parser context
188 *
189 * Pops the top element name from the name stack
190 *
191 * Returns the name just removed
192 */
193static const xmlChar *
194htmlnamePop(htmlParserCtxtPtr ctxt)
195{
196 const xmlChar *ret;
197
198 if (ctxt->nameNr <= 0)
199 return (NULL((void*)0));
200 ctxt->nameNr--;
201 if (ctxt->nameNr < 0)
202 return (NULL((void*)0));
203 if (ctxt->nameNr > 0)
204 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
205 else
206 ctxt->name = NULL((void*)0);
207 ret = ctxt->nameTab[ctxt->nameNr];
208 ctxt->nameTab[ctxt->nameNr] = NULL((void*)0);
209 return (ret);
210}
211
212/*
213 * Macros for accessing the content. Those should be used only by the parser,
214 * and not exported.
215 *
216 * Dirty macros, i.e. one need to make assumption on the context to use them
217 *
218 * CUR_PTR return the current pointer to the xmlChar to be parsed.
219 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
220 * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
221 * in UNICODE mode. This should be used internally by the parser
222 * only to compare to ASCII values otherwise it would break when
223 * running with UTF-8 encoding.
224 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
225 * to compare on ASCII based substring.
226 * UPP(n) returns the n'th next xmlChar converted to uppercase. Same as CUR
227 * it should be used only to compare on ASCII based substring.
228 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
229 * strings without newlines within the parser.
230 *
231 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
232 *
233 * CURRENT Returns the current char value, with the full decoding of
234 * UTF-8 if we are using this mode. It returns an int.
235 * NEXT Skip to the next character, this does the proper decoding
236 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
237 * NEXTL(l) Skip the current unicode character of l xmlChars long.
238 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
239 */
240
241#define UPPER(toupper(*ctxt->input->cur)) (toupper(*ctxt->input->cur))
242
243#define SKIP(val)ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt
->input->col+=(val)
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val)
244
245#define NXT(val)ctxt->input->cur[(val)] ctxt->input->cur[(val)]
246
247#define UPP(val)(toupper(ctxt->input->cur[(val)])) (toupper(ctxt->input->cur[(val)]))
248
249#define CUR_PTRctxt->input->cur ctxt->input->cur
250
251#define SHRINKif ((ctxt->input->cur - ctxt->input->base > 2 *
250) && (ctxt->input->end - ctxt->input->
cur < 2 * 250)) xmlParserInputShrink__internal_alias(ctxt->
input)
if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK250) && \
252 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK250)) \
253 xmlParserInputShrinkxmlParserInputShrink__internal_alias(ctxt->input)
254
255#define GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
if ((ctxt->progressive == 0) && \
256 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK250)) \
257 xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250)
258
259#define CURRENT((int) (*ctxt->input->cur)) ((int) (*ctxt->input->cur))
260
261#define SKIP_BLANKShtmlSkipBlankChars(ctxt) htmlSkipBlankChars(ctxt)
262
263/* Inported from XML */
264
265/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
266#define CUR((int) (*ctxt->input->cur)) ((int) (*ctxt->input->cur))
267#define NEXTxmlNextChar__internal_alias(ctxt) xmlNextCharxmlNextChar__internal_alias(ctxt)
268
269#define RAW(ctxt->token ? -1 : (*ctxt->input->cur)) (ctxt->token ? -1 : (*ctxt->input->cur))
270#define NXT(val)ctxt->input->cur[(val)] ctxt->input->cur[(val)]
271#define CUR_PTRctxt->input->cur ctxt->input->cur
272
273
274#define NEXTL(l)do { if (*(ctxt->input->cur) == '\n') { ctxt->input->
line++; ctxt->input->col = 1; } else ctxt->input->
col++; ctxt->token = 0; ctxt->input->cur += l; ctxt->
nbChars++; } while (0)
do { \
275 if (*(ctxt->input->cur) == '\n') { \
276 ctxt->input->line++; ctxt->input->col = 1; \
277 } else ctxt->input->col++; \
278 ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \
279 } while (0)
280
281/************
282 \
283 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
284 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
285 ************/
286
287#define CUR_CHAR(l)htmlCurrentChar(ctxt, &l) htmlCurrentChar(ctxt, &l)
288#define CUR_SCHAR(s, l)xmlStringCurrentChar__internal_alias(ctxt, s, &l) xmlStringCurrentCharxmlStringCurrentChar__internal_alias(ctxt, s, &l)
289
290#define COPY_BUF(l,b,i,v)if (l == 1) b[i++] = (xmlChar) v; else i += xmlCopyChar__internal_alias
(l,&b[i],v)
\
291 if (l == 1) b[i++] = (xmlChar) v; \
292 else i += xmlCopyCharxmlCopyChar__internal_alias(l,&b[i],v)
293
294/**
295 * htmlFindEncoding:
296 * @the HTML parser context
297 *
298 * Ty to find and encoding in the current data available in the input
299 * buffer this is needed to try to switch to the proper encoding when
300 * one face a character error.
301 * That's an heuristic, since it's operating outside of parsing it could
302 * try to use a meta which had been commented out, that's the reason it
303 * should only be used in case of error, not as a default.
304 *
305 * Returns an encoding string or NULL if not found, the string need to
306 * be freed
307 */
308static xmlChar *
309htmlFindEncoding(xmlParserCtxtPtr ctxt) {
310 const xmlChar *start, *cur, *end;
311
312 if ((ctxt == NULL((void*)0)) || (ctxt->input == NULL((void*)0)) ||
313 (ctxt->input->encoding != NULL((void*)0)) || (ctxt->input->buf == NULL((void*)0)) ||
314 (ctxt->input->buf->encoder != NULL((void*)0)))
315 return(NULL((void*)0));
316 if ((ctxt->input->cur == NULL((void*)0)) || (ctxt->input->end == NULL((void*)0)))
317 return(NULL((void*)0));
318
319 start = ctxt->input->cur;
320 end = ctxt->input->end;
321 /* we also expect the input buffer to be zero terminated */
322 if (*end != 0)
323 return(NULL((void*)0));
324
325 cur = xmlStrcasestrxmlStrcasestr__internal_alias(start, BAD_CAST(xmlChar *) "HTTP-EQUIV");
326 if (cur == NULL((void*)0))
327 return(NULL((void*)0));
328 cur = xmlStrcasestrxmlStrcasestr__internal_alias(cur, BAD_CAST(xmlChar *) "CONTENT");
329 if (cur == NULL((void*)0))
330 return(NULL((void*)0));
331 cur = xmlStrcasestrxmlStrcasestr__internal_alias(cur, BAD_CAST(xmlChar *) "CHARSET=");
332 if (cur == NULL((void*)0))
333 return(NULL((void*)0));
334 cur += 8;
335 start = cur;
336 while (((*cur >= 'A') && (*cur <= 'Z')) ||
337 ((*cur >= 'a') && (*cur <= 'z')) ||
338 ((*cur >= '0') && (*cur <= '9')) ||
339 (*cur == '-') || (*cur == '_') || (*cur == ':') || (*cur == '/'))
340 cur++;
341 if (cur == start)
342 return(NULL((void*)0));
343 return(xmlStrndupxmlStrndup__internal_alias(start, cur - start));
344}
345
346/**
347 * htmlCurrentChar:
348 * @ctxt: the HTML parser context
349 * @len: pointer to the length of the char read
350 *
351 * The current char value, if using UTF-8 this may actually span multiple
352 * bytes in the input buffer. Implement the end of line normalization:
353 * 2.11 End-of-Line Handling
354 * If the encoding is unspecified, in the case we find an ISO-Latin-1
355 * char, then the encoding converter is plugged in automatically.
356 *
357 * Returns the current char value and its length
358 */
359
360static int
361htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
362 if (ctxt->instate == XML_PARSER_EOF)
363 return(0);
364
365 if (ctxt->token != 0) {
366 *len = 0;
367 return(ctxt->token);
368 }
369 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
370 /*
371 * We are supposed to handle UTF8, check it's valid
372 * From rfc2044: encoding of the Unicode values on UTF-8:
373 *
374 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
375 * 0000 0000-0000 007F 0xxxxxxx
376 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
377 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
378 *
379 * Check for the 0x110000 limit too
380 */
381 const unsigned char *cur = ctxt->input->cur;
382 unsigned char c;
383 unsigned int val;
384
385 c = *cur;
386 if (c & 0x80) {
387 if (cur[1] == 0) {
388 xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250);
389 cur = ctxt->input->cur;
390 }
391 if ((cur[1] & 0xc0) != 0x80)
392 goto encoding_error;
393 if ((c & 0xe0) == 0xe0) {
394
395 if (cur[2] == 0) {
396 xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250);
397 cur = ctxt->input->cur;
398 }
399 if ((cur[2] & 0xc0) != 0x80)
400 goto encoding_error;
401 if ((c & 0xf0) == 0xf0) {
402 if (cur[3] == 0) {
403 xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250);
404 cur = ctxt->input->cur;
405 }
406 if (((c & 0xf8) != 0xf0) ||
407 ((cur[3] & 0xc0) != 0x80))
408 goto encoding_error;
409 /* 4-byte code */
410 *len = 4;
411 val = (cur[0] & 0x7) << 18;
412 val |= (cur[1] & 0x3f) << 12;
413 val |= (cur[2] & 0x3f) << 6;
414 val |= cur[3] & 0x3f;
415 } else {
416 /* 3-byte code */
417 *len = 3;
418 val = (cur[0] & 0xf) << 12;
419 val |= (cur[1] & 0x3f) << 6;
420 val |= cur[2] & 0x3f;
421 }
422 } else {
423 /* 2-byte code */
424 *len = 2;
425 val = (cur[0] & 0x1f) << 6;
426 val |= cur[1] & 0x3f;
427 }
428 if (!IS_CHAR(val)(((val) < 0x100) ? (((0x9 <= ((val))) && (((val
)) <= 0xa)) || (((val)) == 0xd) || (0x20 <= ((val)))) :
(((0x100 <= (val)) && ((val) <= 0xd7ff)) || ((
0xe000 <= (val)) && ((val) <= 0xfffd)) || ((0x10000
<= (val)) && ((val) <= 0x10ffff))))
) {
429 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
430 "Char 0x%X out of allowed range\n", val);
431 }
432 return(val);
433 } else {
434 if ((*ctxt->input->cur == 0) &&
435 (ctxt->input->cur < ctxt->input->end)) {
436 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
437 "Char 0x%X out of allowed range\n", 0);
438 *len = 1;
439 return(' ');
440 }
441 /* 1-byte code */
442 *len = 1;
443 return((int) *ctxt->input->cur);
444 }
445 }
446 /*
447 * Assume it's a fixed length encoding (1) with
448 * a compatible encoding for the ASCII set, since
449 * XML constructs only use < 128 chars
450 */
451 *len = 1;
452 if ((int) *ctxt->input->cur < 0x80)
453 return((int) *ctxt->input->cur);
454
455 /*
456 * Humm this is bad, do an automatic flow conversion
457 */
458 {
459 xmlChar * guess;
460 xmlCharEncodingHandlerPtr handler;
461
462 guess = htmlFindEncoding(ctxt);
463 if (guess == NULL((void*)0)) {
464 xmlSwitchEncodingxmlSwitchEncoding__internal_alias(ctxt, XML_CHAR_ENCODING_8859_1);
465 } else {
466 if (ctxt->input->encoding != NULL((void*)0))
467 xmlFree((xmlChar *) ctxt->input->encoding);
468 ctxt->input->encoding = guess;
469 handler = xmlFindCharEncodingHandlerxmlFindCharEncodingHandler__internal_alias((const char *) guess);
470 if (handler != NULL((void*)0)) {
471 xmlSwitchToEncodingxmlSwitchToEncoding__internal_alias(ctxt, handler);
472 } else {
473 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
474 "Unsupported encoding %s", guess, NULL((void*)0));
475 }
476 }
477 ctxt->charset = XML_CHAR_ENCODING_UTF8;
478 }
479
480 return(xmlCurrentCharxmlCurrentChar__internal_alias(ctxt, len));
481
482encoding_error:
483 /*
484 * If we detect an UTF8 error that probably mean that the
485 * input encoding didn't get properly advertized in the
486 * declaration header. Report the error and switch the encoding
487 * to ISO-Latin-1 (if you don't like this policy, just declare the
488 * encoding !)
489 */
490 {
491 char buffer[150];
492
493 if (ctxt->input->end - ctxt->input->cur >= 4) {
494 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
495 ctxt->input->cur[0], ctxt->input->cur[1],
496 ctxt->input->cur[2], ctxt->input->cur[3]);
497 } else {
498 snprintf(buffer, 149, "Bytes: 0x%02X\n", ctxt->input->cur[0]);
499 }
500 htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,
501 "Input is not proper UTF-8, indicate encoding !\n",
502 BAD_CAST(xmlChar *) buffer, NULL((void*)0));
503 }
504
505 ctxt->charset = XML_CHAR_ENCODING_8859_1;
506 *len = 1;
507 return((int) *ctxt->input->cur);
508}
509
510/**
511 * htmlSkipBlankChars:
512 * @ctxt: the HTML parser context
513 *
514 * skip all blanks character found at that point in the input streams.
515 *
516 * Returns the number of space chars skipped
517 */
518
519static int
520htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
521 int res = 0;
522
523 while (IS_BLANK_CH(*(ctxt->input->cur))(((*(ctxt->input->cur)) == 0x20) || ((0x9 <= (*(ctxt
->input->cur))) && ((*(ctxt->input->cur))
<= 0xa)) || ((*(ctxt->input->cur)) == 0xd))
) {
524 if ((*ctxt->input->cur == 0) &&
525 (xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250) <= 0)) {
526 xmlPopInputxmlPopInput__internal_alias(ctxt);
527 } else {
528 if (*(ctxt->input->cur) == '\n') {
529 ctxt->input->line++; ctxt->input->col = 1;
530 } else ctxt->input->col++;
531 ctxt->input->cur++;
532 ctxt->nbChars++;
533 if (*ctxt->input->cur == 0)
534 xmlParserInputGrowxmlParserInputGrow__internal_alias(ctxt->input, INPUT_CHUNK250);
535 }
536 res++;
537 }
538 return(res);
539}
540
541
542
543/************************************************************************
544 * *
545 * The list of HTML elements and their properties *
546 * *
547 ************************************************************************/
548
549/*
550 * Start Tag: 1 means the start tag can be ommited
551 * End Tag: 1 means the end tag can be ommited
552 * 2 means it's forbidden (empty elements)
553 * 3 means the tag is stylistic and should be closed easily
554 * Depr: this element is deprecated
555 * DTD: 1 means that this element is valid only in the Loose DTD
556 * 2 means that this element is valid only in the Frameset DTD
557 *
558 * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description
559 , subElements , impliedsubelt , Attributes, userdata
560 */
561
562/* Definitions and a couple of vars for HTML Elements */
563
564#define FONTSTYLE"tt", "i", "b", "u", "s", "strike", "big", "small" "tt", "i", "b", "u", "s", "strike", "big", "small"
565#define NB_FONTSTYLE8 8
566#define PHRASE"em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr"
, "acronym"
"em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
567#define NB_PHRASE10 10
568#define SPECIAL"a", "img", "applet", "embed", "object", "font", "basefont", "br"
, "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"a", "img", "applet", "embed", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
569#define NB_SPECIAL16 16
570#define INLINE"tt", "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
PCDATA FONTSTYLE"tt", "i", "b", "u", "s", "strike", "big", "small" PHRASE"em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr"
, "acronym"
SPECIAL"a", "img", "applet", "embed", "object", "font", "basefont", "br"
, "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
FORMCTRL"input", "select", "textarea", "label", "button"
571#define NB_INLINE0 + 8 + 10 + 16 + 5 NB_PCDATA0 + NB_FONTSTYLE8 + NB_PHRASE10 + NB_SPECIAL16 + NB_FORMCTRL5
572#define BLOCK"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address"
HEADING"h1", "h2", "h3", "h4", "h5", "h6", LIST"ul", "ol", "dir", "menu" "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"
573#define NB_BLOCK6 + 4 + 14 NB_HEADING6 + NB_LIST4 + 14
574#define FORMCTRL"input", "select", "textarea", "label", "button" "input", "select", "textarea", "label", "button"
575#define NB_FORMCTRL5 5
576#define PCDATA
577#define NB_PCDATA0 0
578#define HEADING"h1", "h2", "h3", "h4", "h5", "h6" "h1", "h2", "h3", "h4", "h5", "h6"
579#define NB_HEADING6 6
580#define LIST"ul", "ol", "dir", "menu" "ul", "ol", "dir", "menu"
581#define NB_LIST4 4
582#define MODIFIER
583#define NB_MODIFIER0 0
584#define FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
BLOCK"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address"
,INLINE"tt", "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
585#define NB_FLOW6 + 4 + 14 + 0 + 8 + 10 + 16 + 5 NB_BLOCK6 + 4 + 14 + NB_INLINE0 + 8 + 10 + 16 + 5
586#define EMPTY((void*)0) NULL((void*)0)
587
588
589static const char* const html_flow[] = { FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, NULL((void*)0) } ;
590static const char* const html_inline[] = { INLINE"tt", "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, NULL((void*)0) } ;
591
592/* placeholders: elts with content but no subelements */
593static const char* const html_pcdata[] = { NULL((void*)0) } ;
594#define html_cdatahtml_pcdata html_pcdata
595
596
597/* ... and for HTML Attributes */
598
599#define COREATTRS"id", "class", "style", "title" "id", "class", "style", "title"
600#define NB_COREATTRS4 4
601#define I18N"lang", "dir" "lang", "dir"
602#define NB_I18N2 2
603#define EVENTS"onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover"
, "onmouseout", "onkeypress", "onkeydown", "onkeyup"
"onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"
604#define NB_EVENTS9 9
605#define ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
COREATTRS"id", "class", "style", "title",I18N"lang", "dir",EVENTS"onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover"
, "onmouseout", "onkeypress", "onkeydown", "onkeyup"
606#define NB_ATTRSNB_NB_COREATTRS + 2 + 9 NB_NB_COREATTRS + NB_I18N2 + NB_EVENTS9
607#define CELLHALIGN"align", "char", "charoff" "align", "char", "charoff"
608#define NB_CELLHALIGN3 3
609#define CELLVALIGN"valign" "valign"
610#define NB_CELLVALIGN1 1
611
612static const char* const html_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, NULL((void*)0) } ;
613static const char* const core_i18n_attrs[] = { COREATTRS"id", "class", "style", "title", I18N"lang", "dir", NULL((void*)0) } ;
614static const char* const core_attrs[] = { COREATTRS"id", "class", "style", "title", NULL((void*)0) } ;
615static const char* const i18n_attrs[] = { I18N"lang", "dir", NULL((void*)0) } ;
616
617
618/* Other declarations that should go inline ... */
619static const char* const a_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "charset", "type", "name",
620 "href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",
621 "tabindex", "onfocus", "onblur", NULL((void*)0) } ;
622static const char* const target_attr[] = { "target", NULL((void*)0) } ;
623static const char* const rows_cols_attr[] = { "rows", "cols", NULL((void*)0) } ;
624static const char* const alt_attr[] = { "alt", NULL((void*)0) } ;
625static const char* const src_alt_attrs[] = { "src", "alt", NULL((void*)0) } ;
626static const char* const href_attrs[] = { "href", NULL((void*)0) } ;
627static const char* const clear_attrs[] = { "clear", NULL((void*)0) } ;
628static const char* const inline_p[] = { INLINE"tt", "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "p", NULL((void*)0) } ;
629
630static const char* const flow_param[] = { FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "param", NULL((void*)0) } ;
631static const char* const applet_attrs[] = { COREATTRS"id", "class", "style", "title" , "codebase",
632 "archive", "alt", "name", "height", "width", "align",
633 "hspace", "vspace", NULL((void*)0) } ;
634static const char* const area_attrs[] = { "shape", "coords", "href", "nohref",
635 "tabindex", "accesskey", "onfocus", "onblur", NULL((void*)0) } ;
636static const char* const basefont_attrs[] =
637 { "id", "size", "color", "face", NULL((void*)0) } ;
638static const char* const quote_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "cite", NULL((void*)0) } ;
639static const char* const body_contents[] = { FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "ins", "del", NULL((void*)0) } ;
640static const char* const body_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "onload", "onunload", NULL((void*)0) } ;
641static const char* const body_depr[] = { "background", "bgcolor", "text",
642 "link", "vlink", "alink", NULL((void*)0) } ;
643static const char* const button_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "name", "value", "type",
644 "disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL((void*)0) } ;
645
646
647static const char* const col_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "span", "width", CELLHALIGN"align", "char", "charoff", CELLVALIGN"valign", NULL((void*)0) } ;
648static const char* const col_elt[] = { "col", NULL((void*)0) } ;
649static const char* const edit_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "datetime", "cite", NULL((void*)0) } ;
650static const char* const compact_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "compact", NULL((void*)0) } ;
651static const char* const dl_contents[] = { "dt", "dd", NULL((void*)0) } ;
652static const char* const compact_attr[] = { "compact", NULL((void*)0) } ;
653static const char* const label_attr[] = { "label", NULL((void*)0) } ;
654static const char* const fieldset_contents[] = { FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "legend" } ;
655static const char* const font_attrs[] = { COREATTRS"id", "class", "style", "title", I18N"lang", "dir", "size", "color", "face" , NULL((void*)0) } ;
656static const char* const form_contents[] = { HEADING"h1", "h2", "h3", "h4", "h5", "h6", LIST"ul", "ol", "dir", "menu", INLINE"tt", "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL((void*)0) } ;
657static const char* const form_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL((void*)0) } ;
658static const char* const frame_attrs[] = { COREATTRS"id", "class", "style", "title", "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL((void*)0) } ;
659static const char* const frameset_attrs[] = { COREATTRS"id", "class", "style", "title", "rows", "cols", "onload", "onunload", NULL((void*)0) } ;
660static const char* const frameset_contents[] = { "frameset", "frame", "noframes", NULL((void*)0) } ;
661static const char* const head_attrs[] = { I18N"lang", "dir", "profile", NULL((void*)0) } ;
662static const char* const head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL((void*)0) } ;
663static const char* const hr_depr[] = { "align", "noshade", "size", "width", NULL((void*)0) } ;
664static const char* const version_attr[] = { "version", NULL((void*)0) } ;
665static const char* const html_content[] = { "head", "body", "frameset", NULL((void*)0) } ;
666static const char* const iframe_attrs[] = { COREATTRS"id", "class", "style", "title", "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL((void*)0) } ;
667static const char* const img_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "longdesc", "name", "height", "width", "usemap", "ismap", NULL((void*)0) } ;
668static const char* const embed_attrs[] = { COREATTRS"id", "class", "style", "title", "align", "alt", "border", "code", "codebase", "frameborder", "height", "hidden", "hspace", "name", "palette", "pluginspace", "pluginurl", "src", "type", "units", "vspace", "width", NULL((void*)0) } ;
669static const char* const input_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL((void*)0) } ;
670static const char* const prompt_attrs[] = { COREATTRS"id", "class", "style", "title", I18N"lang", "dir", "prompt", NULL((void*)0) } ;
671static const char* const label_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "for", "accesskey", "onfocus", "onblur", NULL((void*)0) } ;
672static const char* const legend_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "accesskey", NULL((void*)0) } ;
673static const char* const align_attr[] = { "align", NULL((void*)0) } ;
674static const char* const link_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL((void*)0) } ;
675static const char* const map_contents[] = { BLOCK"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address"
, "area", NULL((void*)0) } ;
676static const char* const name_attr[] = { "name", NULL((void*)0) } ;
677static const char* const action_attr[] = { "action", NULL((void*)0) } ;
678static const char* const blockli_elt[] = { BLOCK"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address"
, "li", NULL((void*)0) } ;
679static const char* const meta_attrs[] = { I18N"lang", "dir", "http-equiv", "name", "scheme", NULL((void*)0) } ;
680static const char* const content_attr[] = { "content", NULL((void*)0) } ;
681static const char* const type_attr[] = { "type", NULL((void*)0) } ;
682static const char* const noframes_content[] = { "body", FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
MODIFIER, NULL((void*)0) } ;
683static const char* const object_contents[] = { FLOW"h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "dir", "menu"
"pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote"
, "form", "isindex", "hr", "table", "fieldset", "address", "tt"
, "i", "b", "u", "s", "strike", "big", "small" "em", "strong"
, "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"
"a", "img", "applet", "embed", "object", "font", "basefont",
"br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"
"input", "select", "textarea", "label", "button"
, "param", NULL((void*)0) } ;
684static const char* const object_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL((void*)0) } ;
685static const char* const object_depr[] = { "align", "border", "hspace", "vspace", NULL((void*)0) } ;
686static const char* const ol_attrs[] = { "type", "compact", "start", NULL((void*)0)} ;
687static const char* const option_elt[] = { "option", NULL((void*)0) } ;
688static const char* const optgroup_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "disabled", NULL((void*)0) } ;
689static const char* const option_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "disabled", "label", "selected", "value", NULL((void*)0) } ;
690static const char* const param_attrs[] = { "id", "value", "valuetype", "type", NULL((void*)0) } ;
691static const char* const width_attr[] = { "width", NULL((void*)0) } ;
692static const char* const pre_content[] = { PHRASE"em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr"
, "acronym"
, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL((void*)0) } ;
693static const char* const script_attrs[] = { "charset", "src", "defer", "event", "for", NULL((void*)0) } ;
694static const char* const language_attr[] = { "language", NULL((void*)0) } ;
695static const char* const select_content[] = { "optgroup", "option", NULL((void*)0) } ;
696static const char* const select_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL((void*)0) } ;
697static const char* const style_attrs[] = { I18N"lang", "dir", "media", "title", NULL((void*)0) } ;
698static const char* const table_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL((void*)0) } ;
699static const char* const table_depr[] = { "align", "bgcolor", NULL((void*)0) } ;
700static const char* const table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL((void*)0)} ;
701static const char* const tr_elt[] = { "tr", NULL((void*)0) } ;
702static const char* const talign_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, CELLHALIGN"align", "char", "charoff", CELLVALIGN"valign", NULL((void*)0)} ;
703static const char* const th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL((void*)0) } ;
704static const char* const th_td_attr[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN"align", "char", "charoff", CELLVALIGN"valign", NULL((void*)0) } ;
705static const char* const textarea_attrs[] = { ATTRS"id", "class", "style", "title","lang", "dir","onclick", "ondblclick"
, "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress"
, "onkeydown", "onkeyup"
, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL((void*)0) } ;
706static const char* const tr_contents[] = { "th", "td", NULL((void*)0) } ;
707static const char* const bgcolor_attr[] = { "bgcolor", NULL((void*)0) } ;
708static const char* const li_elt[] = { "li", NULL((void*)0) } ;
709static const char* const ul_depr[] = { "type", "compact", NULL((void*)0)} ;
710static const char* const dir_attr[] = { "dir", NULL((void*)0)} ;
711
712#define DECL(const char**) (const char**)
713
714static const htmlElemDesc
715html40ElementTable[] = {
716{ "a", 0, 0, 0, 0, 0, 0, 1, "anchor ",
717 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) a_attrs , DECL(const char**) target_attr, NULL((void*)0)
718},
719{ "abbr", 0, 0, 0, 0, 0, 0, 1, "abbreviated form",
720 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
721},
722{ "acronym", 0, 0, 0, 0, 0, 0, 1, "",
723 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
724},
725{ "address", 0, 0, 0, 0, 0, 0, 0, "information on author ",
726 DECL(const char**) inline_p , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
727},
728{ "applet", 0, 0, 0, 0, 1, 1, 2, "java applet ",
729 DECL(const char**) flow_param , NULL((void*)0) , NULL((void*)0) , DECL(const char**) applet_attrs, NULL((void*)0)
730},
731{ "area", 0, 2, 2, 1, 0, 0, 0, "client-side image map area ",
732 EMPTY((void*)0) , NULL((void*)0) , DECL(const char**) area_attrs , DECL(const char**) target_attr, DECL(const char**) alt_attr
733},
734{ "b", 0, 3, 0, 0, 0, 0, 1, "bold text style",
735 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
736},
737{ "base", 0, 2, 2, 1, 0, 0, 0, "document base uri ",
738 EMPTY((void*)0) , NULL((void*)0) , NULL((void*)0) , DECL(const char**) target_attr, DECL(const char**) href_attrs
739},
740{ "basefont", 0, 2, 2, 1, 1, 1, 1, "base font size " ,
741 EMPTY((void*)0) , NULL((void*)0) , NULL((void*)0), DECL(const char**) basefont_attrs, NULL((void*)0)
742},
743{ "bdo", 0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",
744 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) core_i18n_attrs, NULL((void*)0), DECL(const char**) dir_attr
745},
746{ "big", 0, 3, 0, 0, 0, 0, 1, "large text style",
747 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
748},
749{ "blockquote", 0, 0, 0, 0, 0, 0, 0, "long quotation ",
750 DECL(const char**) html_flow , NULL((void*)0) , DECL(const char**) quote_attrs , NULL((void*)0), NULL((void*)0)
751},
752{ "body", 1, 1, 0, 0, 0, 0, 0, "document body ",
753 DECL(const char**) body_contents , "div" , DECL(const char**) body_attrs, DECL(const char**) body_depr, NULL((void*)0)
754},
755{ "br", 0, 2, 2, 1, 0, 0, 1, "forced line break ",
756 EMPTY((void*)0) , NULL((void*)0) , DECL(const char**) core_attrs, DECL(const char**) clear_attrs , NULL((void*)0)
757},
758{ "button", 0, 0, 0, 0, 0, 0, 2, "push button ",
759 DECL(const char**) html_flow MODIFIER , NULL((void*)0) , DECL(const char**) button_attrs, NULL((void*)0), NULL((void*)0)
760},
761{ "caption", 0, 0, 0, 0, 0, 0, 0, "table caption ",
762 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
763},
764{ "center", 0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",
765 DECL(const char**) html_flow , NULL((void*)0) , NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0)
766},
767{ "cite", 0, 0, 0, 0, 0, 0, 1, "citation",
768 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
769},
770{ "code", 0, 0, 0, 0, 0, 0, 1, "computer code fragment",
771 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
772},
773{ "col", 0, 2, 2, 1, 0, 0, 0, "table column ",
774 EMPTY((void*)0) , NULL((void*)0) , DECL(const char**) col_attrs , NULL((void*)0), NULL((void*)0)
775},
776{ "colgroup", 0, 1, 0, 0, 0, 0, 0, "table column group ",
777 DECL(const char**) col_elt , "col" , DECL(const char**) col_attrs , NULL((void*)0), NULL((void*)0)
778},
779{ "dd", 0, 1, 0, 0, 0, 0, 0, "definition description ",
780 DECL(const char**) html_flow , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
781},
782{ "del", 0, 0, 0, 0, 0, 0, 2, "deleted text ",
783 DECL(const char**) html_flow , NULL((void*)0) , DECL(const char**) edit_attrs , NULL((void*)0), NULL((void*)0)
784},
785{ "dfn", 0, 0, 0, 0, 0, 0, 1, "instance definition",
786 DECL(const char**) html_inline , NULL((void*)0) , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
787},
788{ "dir", 0, 0, 0, 0, 1, 1, 0, "directory list",
789 DECL(const char**) blockli_elt, "li" , NULL((void*)0), DECL(const char**) compact_attrs, NULL((void*)0)
790},
791{ "div", 0, 0, 0, 0, 0, 0, 0, "generic language/style container",
792 DECL(const char**) html_flow, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
793},
794{ "dl", 0, 0, 0, 0, 0, 0, 0, "definition list ",
795 DECL(const char**) dl_contents , "dd" , DECL(const char**) html_attrs, DECL(const char**) compact_attr, NULL((void*)0)
796},
797{ "dt", 0, 1, 0, 0, 0, 0, 0, "definition term ",
798 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
799},
800{ "em", 0, 3, 0, 0, 0, 0, 1, "emphasis",
801 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
802},
803{ "embed", 0, 1, 0, 0, 1, 1, 1, "generic embedded object ",
804 EMPTY((void*)0), NULL((void*)0), DECL(const char**) embed_attrs, NULL((void*)0), NULL((void*)0)
805},
806{ "fieldset", 0, 0, 0, 0, 0, 0, 0, "form control group ",
807 DECL(const char**) fieldset_contents , NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
808},
809{ "font", 0, 3, 0, 0, 1, 1, 1, "local change to font ",
810 DECL(const char**) html_inline, NULL((void*)0), NULL((void*)0), DECL(const char**) font_attrs, NULL((void*)0)
811},
812{ "form", 0, 0, 0, 0, 0, 0, 0, "interactive form ",
813 DECL(const char**) form_contents, "fieldset", DECL(const char**) form_attrs , DECL(const char**) target_attr, DECL(const char**) action_attr
814},
815{ "frame", 0, 2, 2, 1, 0, 2, 0, "subwindow " ,
816 EMPTY((void*)0), NULL((void*)0), NULL((void*)0), DECL(const char**) frame_attrs, NULL((void*)0)
817},
818{ "frameset", 0, 0, 0, 0, 0, 2, 0, "window subdivision" ,
819 DECL(const char**) frameset_contents, "noframes" , NULL((void*)0) , DECL(const char**) frameset_attrs, NULL((void*)0)
820},
821{ "h1", 0, 0, 0, 0, 0, 0, 0, "heading ",
822 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
823},
824{ "h2", 0, 0, 0, 0, 0, 0, 0, "heading ",
825 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
826},
827{ "h3", 0, 0, 0, 0, 0, 0, 0, "heading ",
828 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
829},
830{ "h4", 0, 0, 0, 0, 0, 0, 0, "heading ",
831 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
832},
833{ "h5", 0, 0, 0, 0, 0, 0, 0, "heading ",
834 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
835},
836{ "h6", 0, 0, 0, 0, 0, 0, 0, "heading ",
837 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
838},
839{ "head", 1, 1, 0, 0, 0, 0, 0, "document head ",
840 DECL(const char**) head_contents, NULL((void*)0), DECL(const char**) head_attrs, NULL((void*)0), NULL((void*)0)
841},
842{ "hr", 0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,
843 EMPTY((void*)0), NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) hr_depr, NULL((void*)0)
844},
845{ "html", 1, 1, 0, 0, 0, 0, 0, "document root element ",
846 DECL(const char**) html_content , NULL((void*)0) , DECL(const char**) i18n_attrs, DECL(const char**) version_attr, NULL((void*)0)
847},
848{ "i", 0, 3, 0, 0, 0, 0, 1, "italic text style",
849 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
850},
851{ "iframe", 0, 0, 0, 0, 0, 1, 2, "inline subwindow ",
852 DECL(const char**) html_flow, NULL((void*)0), NULL((void*)0), DECL(const char**) iframe_attrs, NULL((void*)0)
853},
854{ "img", 0, 2, 2, 1, 0, 0, 1, "embedded image ",
855 EMPTY((void*)0), NULL((void*)0), DECL(const char**) img_attrs, DECL(const char**) align_attr, DECL(const char**) src_alt_attrs
856},
857{ "input", 0, 2, 2, 1, 0, 0, 1, "form control ",
858 EMPTY((void*)0), NULL((void*)0), DECL(const char**) input_attrs , DECL(const char**) align_attr, NULL((void*)0)
859},
860{ "ins", 0, 0, 0, 0, 0, 0, 2, "inserted text",
861 DECL(const char**) html_flow, NULL((void*)0), DECL(const char**) edit_attrs, NULL((void*)0), NULL((void*)0)
862},
863{ "isindex", 0, 2, 2, 1, 1, 1, 0, "single line prompt ",
864 EMPTY((void*)0), NULL((void*)0), NULL((void*)0), DECL(const char**) prompt_attrs, NULL((void*)0)
865},
866{ "kbd", 0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",
867 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
868},
869{ "label", 0, 0, 0, 0, 0, 0, 1, "form field label text ",
870 DECL(const char**) html_inline MODIFIER, NULL((void*)0), DECL(const char**) label_attrs , NULL((void*)0), NULL((void*)0)
871},
872{ "legend", 0, 0, 0, 0, 0, 0, 0, "fieldset legend ",
873 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) legend_attrs , DECL(const char**) align_attr, NULL((void*)0)
874},
875{ "li", 0, 1, 1, 0, 0, 0, 0, "list item ",
876 DECL(const char**) html_flow, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
877},
878{ "link", 0, 2, 2, 1, 0, 0, 0, "a media-independent link ",
879 EMPTY((void*)0), NULL((void*)0), DECL(const char**) link_attrs, DECL(const char**) target_attr, NULL((void*)0)
880},
881{ "map", 0, 0, 0, 0, 0, 0, 2, "client-side image map ",
882 DECL(const char**) map_contents , NULL((void*)0), DECL(const char**) html_attrs , NULL((void*)0), DECL(const char**) name_attr
883},
884{ "menu", 0, 0, 0, 0, 1, 1, 0, "menu list ",
885 DECL(const char**) blockli_elt , NULL((void*)0), NULL((void*)0), DECL(const char**) compact_attrs, NULL((void*)0)
886},
887{ "meta", 0, 2, 2, 1, 0, 0, 0, "generic metainformation ",
888 EMPTY((void*)0), NULL((void*)0), DECL(const char**) meta_attrs , NULL((void*)0) , DECL(const char**) content_attr
889},
890{ "noframes", 0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",
891 DECL(const char**) noframes_content, "body" , DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
892},
893{ "noscript", 0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",
894 DECL(const char**) html_flow, "div", DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
895},
896{ "object", 0, 0, 0, 0, 0, 0, 2, "generic embedded object ",
897 DECL(const char**) object_contents , "div" , DECL(const char**) object_attrs, DECL(const char**) object_depr, NULL((void*)0)
898},
899{ "ol", 0, 0, 0, 0, 0, 0, 0, "ordered list ",
900 DECL(const char**) li_elt , "li" , DECL(const char**) html_attrs, DECL(const char**) ol_attrs, NULL((void*)0)
901},
902{ "optgroup", 0, 0, 0, 0, 0, 0, 0, "option group ",
903 DECL(const char**) option_elt , "option", DECL(const char**) optgroup_attrs, NULL((void*)0), DECL(const char**) label_attr
904},
905{ "option", 0, 1, 0, 0, 0, 0, 0, "selectable choice " ,
906 DECL(const char**) html_pcdata, NULL((void*)0), DECL(const char**) option_attrs, NULL((void*)0), NULL((void*)0)
907},
908{ "p", 0, 1, 0, 0, 0, 0, 0, "paragraph ",
909 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) align_attr, NULL((void*)0)
910},
911{ "param", 0, 2, 2, 1, 0, 0, 0, "named property value ",
912 EMPTY((void*)0), NULL((void*)0), DECL(const char**) param_attrs, NULL((void*)0), DECL(const char**) name_attr
913},
914{ "pre", 0, 0, 0, 0, 0, 0, 0, "preformatted text ",
915 DECL(const char**) pre_content, NULL((void*)0), DECL(const char**) html_attrs, DECL(const char**) width_attr, NULL((void*)0)
916},
917{ "q", 0, 0, 0, 0, 0, 0, 1, "short inline quotation ",
918 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) quote_attrs, NULL((void*)0), NULL((void*)0)
919},
920{ "s", 0, 3, 0, 0, 1, 1, 1, "strike-through text style",
921 DECL(const char**) html_inline, NULL((void*)0), NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0)
922},
923{ "samp", 0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",
924 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
925},
926{ "script", 0, 0, 0, 0, 0, 0, 2, "script statements ",
927 DECL(const char**) html_cdatahtml_pcdata, NULL((void*)0), DECL(const char**) script_attrs, DECL(const char**) language_attr, DECL(const char**) type_attr
928},
929{ "select", 0, 0, 0, 0, 0, 0, 1, "option selector ",
930 DECL(const char**) select_content, NULL((void*)0), DECL(const char**) select_attrs, NULL((void*)0), NULL((void*)0)
931},
932{ "small", 0, 3, 0, 0, 0, 0, 1, "small text style",
933 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
934},
935{ "span", 0, 0, 0, 0, 0, 0, 1, "generic language/style container ",
936 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
937},
938{ "strike", 0, 3, 0, 0, 1, 1, 1, "strike-through text",
939 DECL(const char**) html_inline, NULL((void*)0), NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0)
940},
941{ "strong", 0, 3, 0, 0, 0, 0, 1, "strong emphasis",
942 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
943},
944{ "style", 0, 0, 0, 0, 0, 0, 0, "style info ",
945 DECL(const char**) html_cdatahtml_pcdata, NULL((void*)0), DECL(const char**) style_attrs, NULL((void*)0), DECL(const char**) type_attr
946},
947{ "sub", 0, 3, 0, 0, 0, 0, 1, "subscript",
948 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
949},
950{ "sup", 0, 3, 0, 0, 0, 0, 1, "superscript ",
951 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
952},
953{ "table", 0, 0, 0, 0, 0, 0, 0, "",
954 DECL(const char**) table_contents , "tr" , DECL(const char**) table_attrs , DECL(const char**) table_depr, NULL((void*)0)
955},
956{ "tbody", 1, 0, 0, 0, 0, 0, 0, "table body ",
957 DECL(const char**) tr_elt , "tr" , DECL(const char**) talign_attrs, NULL((void*)0), NULL((void*)0)
958},
959{ "td", 0, 0, 0, 0, 0, 0, 0, "table data cell",
960 DECL(const char**) html_flow, NULL((void*)0), DECL(const char**) th_td_attr, DECL(const char**) th_td_depr, NULL((void*)0)
961},
962{ "textarea", 0, 0, 0, 0, 0, 0, 1, "multi-line text field ",
963 DECL(const char**) html_pcdata, NULL((void*)0), DECL(const char**) textarea_attrs, NULL((void*)0), DECL(const char**) rows_cols_attr
964},
965{ "tfoot", 0, 1, 0, 0, 0, 0, 0, "table footer ",
966 DECL(const char**) tr_elt , "tr" , DECL(const char**) talign_attrs, NULL((void*)0), NULL((void*)0)
967},
968{ "th", 0, 1, 0, 0, 0, 0, 0, "table header cell",
969 DECL(const char**) html_flow, NULL((void*)0), DECL(const char**) th_td_attr, DECL(const char**) th_td_depr, NULL((void*)0)
970},
971{ "thead", 0, 1, 0, 0, 0, 0, 0, "table header ",
972 DECL(const char**) tr_elt , "tr" , DECL(const char**) talign_attrs, NULL((void*)0), NULL((void*)0)
973},
974{ "title", 0, 0, 0, 0, 0, 0, 0, "document title ",
975 DECL(const char**) html_pcdata, NULL((void*)0), DECL(const char**) i18n_attrs, NULL((void*)0), NULL((void*)0)
976},
977{ "tr", 0, 0, 0, 0, 0, 0, 0, "table row ",
978 DECL(const char**) tr_contents , "td" , DECL(const char**) talign_attrs, DECL(const char**) bgcolor_attr, NULL((void*)0)
979},
980{ "tt", 0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",
981 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
982},
983{ "u", 0, 3, 0, 0, 1, 1, 1, "underlined text style",
984 DECL(const char**) html_inline, NULL((void*)0), NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0)
985},
986{ "ul", 0, 0, 0, 0, 0, 0, 0, "unordered list ",
987 DECL(const char**) li_elt , "li" , DECL(const char**) html_attrs, DECL(const char**) ul_depr, NULL((void*)0)
988},
989{ "var", 0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",
990 DECL(const char**) html_inline, NULL((void*)0), DECL(const char**) html_attrs, NULL((void*)0), NULL((void*)0)
991}
992};
993
994/*
995 * start tags that imply the end of current element
996 */
997static const char * const htmlStartClose[] = {
998"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
999 "dl", "ul", "ol", "menu", "dir", "address", "pre",
1000 "listing", "xmp", "head", NULL((void*)0),
1001"head", "p", NULL((void*)0),
1002"title", "p", NULL((void*)0),
1003"body", "head", "style", "link", "title", "p", NULL((void*)0),
1004"frameset", "head", "style", "link", "title", "p", NULL((void*)0),
1005"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
1006 "pre", "listing", "xmp", "head", "li", NULL((void*)0),
1007"hr", "p", "head", NULL((void*)0),
1008"h1", "p", "head", NULL((void*)0),
1009"h2", "p", "head", NULL((void*)0),
1010"h3", "p", "head", NULL((void*)0),
1011"h4", "p", "head", NULL((void*)0),
1012"h5", "p", "head", NULL((void*)0),
1013"h6", "p", "head", NULL((void*)0),
1014"dir", "p", "head", NULL((void*)0),
1015"address", "p", "head", "ul", NULL((void*)0),
1016"pre", "p", "head", "ul", NULL((void*)0),
1017"listing", "p", "head", NULL((void*)0),
1018"xmp", "p", "head", NULL((void*)0),
1019"blockquote", "p", "head", NULL((void*)0),
1020"dl", "p", "dt", "menu", "dir", "address", "pre", "listing",
1021 "xmp", "head", NULL((void*)0),
1022"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1023 "head", "dd", NULL((void*)0),
1024"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp",
1025 "head", "dt", NULL((void*)0),
1026"ul", "p", "head", "ol", "menu", "dir", "address", "pre",
1027 "listing", "xmp", NULL((void*)0),
1028"ol", "p", "head", "ul", NULL((void*)0),
1029"menu", "p", "head", "ul", NULL((void*)0),
1030"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE"tt", "i", "b", "u", "s", "strike", "big", "small", NULL((void*)0),
1031"div", "p", "head", NULL((void*)0),
1032"noscript", "p", "head", NULL((void*)0),
1033"center", "font", "b", "i", "p", "head", NULL((void*)0),
1034"a", "a", NULL((void*)0),
1035"caption", "p", NULL((void*)0),
1036"colgroup", "caption", "colgroup", "col", "p", NULL((void*)0),
1037"col", "caption", "col", "p", NULL((void*)0),
1038"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
1039 "listing", "xmp", "a", NULL((void*)0),
1040"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL((void*)0),
1041"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL((void*)0),
1042"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL((void*)0),
1043"thead", "caption", "col", "colgroup", NULL((void*)0),
1044"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1045 "tbody", "p", NULL((void*)0),
1046"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead",
1047 "tfoot", "tbody", "p", NULL((void*)0),
1048"optgroup", "option", NULL((void*)0),
1049"option", "option", NULL((void*)0),
1050"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
1051 "pre", "listing", "xmp", "a", NULL((void*)0),
1052NULL((void*)0)
1053};
1054
1055/*
1056 * The list of HTML elements which are supposed not to have
1057 * CDATA content and where a p element will be implied
1058 *
1059 * TODO: extend that list by reading the HTML SGML DTD on
1060 * implied paragraph
1061 */
1062static const char *const htmlNoContentElements[] = {
1063 "html",
1064 "head",
1065 NULL((void*)0)
1066};
1067
1068/*
1069 * The list of HTML attributes which are of content %Script;
1070 * NOTE: when adding ones, check htmlIsScriptAttribute() since
1071 * it assumes the name starts with 'on'
1072 */
1073static const char *const htmlScriptAttributes[] = {
1074 "onclick",
1075 "ondblclick",
1076 "onmousedown",
1077 "onmouseup",
1078 "onmouseover",
1079 "onmousemove",
1080 "onmouseout",
1081 "onkeypress",
1082 "onkeydown",
1083 "onkeyup",
1084 "onload",
1085 "onunload",
1086 "onfocus",
1087 "onblur",
1088 "onsubmit",
1089 "onrest",
1090 "onchange",
1091 "onselect"
1092};
1093
1094/*
1095 * This table is used by the htmlparser to know what to do with
1096 * broken html pages. By assigning different priorities to different
1097 * elements the parser can decide how to handle extra endtags.
1098 * Endtags are only allowed to close elements with lower or equal
1099 * priority.
1100 */
1101
1102typedef struct {
1103 const char *name;
1104 int priority;
1105} elementPriority;
1106
1107static const elementPriority htmlEndPriority[] = {
1108 {"div", 150},
1109 {"td", 160},
1110 {"th", 160},
1111 {"tr", 170},
1112 {"thead", 180},
1113 {"tbody", 180},
1114 {"tfoot", 180},
1115 {"table", 190},
1116 {"head", 200},
1117 {"body", 200},
1118 {"html", 220},
1119 {NULL((void*)0), 100} /* Default priority */
1120};
1121
1122static const char** htmlStartCloseIndex[100];
1123static int htmlStartCloseIndexinitialized = 0;
1124
1125/************************************************************************
1126 * *
1127 * functions to handle HTML specific data *
1128 * *
1129 ************************************************************************/
1130
1131/**
1132 * htmlInitAutoClose:
1133 *
1134 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
1135 * This is not reentrant. Call xmlInitParser() once before processing in
1136 * case of use in multithreaded programs.
1137 */
1138void
1139htmlInitAutoClose(void) {
1140 int indx, i = 0;
1141
1142 if (htmlStartCloseIndexinitialized) return;
1143
1144 for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL((void*)0);
1145 indx = 0;
1146 while ((htmlStartClose[i] != NULL((void*)0)) && (indx < 100 - 1)) {
1147 htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i];
1148 while (htmlStartClose[i] != NULL((void*)0)) i++;
1149 i++;
1150 }
1151 htmlStartCloseIndexinitialized = 1;
1152}
1153
1154/**
1155 * htmlTagLookup:
1156 * @tag: The tag name in lowercase
1157 *
1158 * Lookup the HTML tag in the ElementTable
1159 *
1160 * Returns the related htmlElemDescPtr or NULL if not found.
1161 */
1162const htmlElemDesc *
1163htmlTagLookup(const xmlChar *tag) {
1164 unsigned int i;
1165
1166 for (i = 0; i < (sizeof(html40ElementTable) /
1167 sizeof(html40ElementTable[0]));i++) {
1168 if (!xmlStrcasecmpxmlStrcasecmp__internal_alias(tag, BAD_CAST(xmlChar *) html40ElementTable[i].name))
1169 return((htmlElemDescPtr) &html40ElementTable[i]);
1170 }
1171 return(NULL((void*)0));
1172}
1173
1174/**
1175 * htmlGetEndPriority:
1176 * @name: The name of the element to look up the priority for.
1177 *
1178 * Return value: The "endtag" priority.
1179 **/
1180static int
1181htmlGetEndPriority (const xmlChar *name) {
1182 int i = 0;
1183
1184 while ((htmlEndPriority[i].name != NULL((void*)0)) &&
1185 (!xmlStrEqualxmlStrEqual__internal_alias((const xmlChar *)htmlEndPriority[i].name, name)))
1186 i++;
1187
1188 return(htmlEndPriority[i].priority);
1189}
1190
1191
1192/**
1193 * htmlCheckAutoClose:
1194 * @newtag: The new tag name
1195 * @oldtag: The old tag name
1196 *
1197 * Checks whether the new tag is one of the registered valid tags for
1198 * closing old.
1199 * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
1200 *
1201 * Returns 0 if no, 1 if yes.
1202 */
1203static int
1204htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag)
1205{
1206 int i, indx;
1207 const char **closed = NULL((void*)0);
1208
1209 if (htmlStartCloseIndexinitialized == 0)
1210 htmlInitAutoClose();
1211
1212 /* inefficient, but not a big deal */
1213 for (indx = 0; indx < 100; indx++) {
1214 closed = htmlStartCloseIndex[indx];
1215 if (closed == NULL((void*)0))
1216 return (0);
1217 if (xmlStrEqualxmlStrEqual__internal_alias(BAD_CAST(xmlChar *) * closed, newtag))
1218 break;
1219 }
1220
1221 i = closed - htmlStartClose;
1222 i++;
1223 while (htmlStartClose[i] != NULL((void*)0)) {
1224 if (xmlStrEqualxmlStrEqual__internal_alias(BAD_CAST(xmlChar *) htmlStartClose[i], oldtag)) {
1225 return (1);
1226 }
1227 i++;
1228 }
1229 return (0);
1230}
1231
1232/**
1233 * htmlAutoCloseOnClose:
1234 * @ctxt: an HTML parser context
1235 * @newtag: The new tag name
1236 * @force: force the tag closure
1237 *
1238 * The HTML DTD allows an ending tag to implicitly close other tags.
1239 */
1240static void
1241htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1242{
1243 const htmlElemDesc *info;
1244 int i, priority;
1245
1246 priority = htmlGetEndPriority(newtag);
1247
1248 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1249
1250 if (xmlStrEqualxmlStrEqual__internal_alias(newtag, ctxt->nameTab[i]))
1251 break;
1252 /*
1253 * A missplaced endtag can only close elements with lower
1254 * or equal priority, so if we find an element with higher
1255 * priority before we find an element with
1256 * matching name, we just ignore this endtag
1257 */
1258 if (htmlGetEndPriority(ctxt->nameTab[i]) > priority)
1259 return;
1260 }
1261 if (i < 0)
1262 return;
1263
1264 while (!xmlStrEqualxmlStrEqual__internal_alias(newtag, ctxt->name)) {
1265 info = htmlTagLookup(ctxt->name);
1266 if ((info != NULL((void*)0)) && (info->endTag == 3)) {
1267 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
1268 "Opening and ending tag mismatch: %s and %s\n",
1269 newtag, ctxt->name);
1270 }
1271 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->endElement != NULL((void*)0)))
1272 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1273 htmlnamePop(ctxt);
1274 }
1275}
1276
1277/**
1278 * htmlAutoCloseOnEnd:
1279 * @ctxt: an HTML parser context
1280 *
1281 * Close all remaining tags at the end of the stream
1282 */
1283static void
1284htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt)
1285{
1286 int i;
1287
1288 if (ctxt->nameNr == 0)
1289 return;
1290 for (i = (ctxt->nameNr - 1); i >= 0; i--) {
1291 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->endElement != NULL((void*)0)))
1292 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1293 htmlnamePop(ctxt);
1294 }
1295}
1296
1297/**
1298 * htmlAutoClose:
1299 * @ctxt: an HTML parser context
1300 * @newtag: The new tag name or NULL
1301 *
1302 * The HTML DTD allows a tag to implicitly close other tags.
1303 * The list is kept in htmlStartClose array. This function is
1304 * called when a new tag has been detected and generates the
1305 * appropriates closes if possible/needed.
1306 * If newtag is NULL this mean we are at the end of the resource
1307 * and we should check
1308 */
1309static void
1310htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag)
1311{
1312 while ((newtag != NULL((void*)0)) && (ctxt->name != NULL((void*)0)) &&
1313 (htmlCheckAutoClose(newtag, ctxt->name))) {
1314 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->endElement != NULL((void*)0)))
1315 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1316 htmlnamePop(ctxt);
1317 }
1318 if (newtag == NULL((void*)0)) {
1319 htmlAutoCloseOnEnd(ctxt);
1320 return;
1321 }
1322 while ((newtag == NULL((void*)0)) && (ctxt->name != NULL((void*)0)) &&
1323 ((xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *) "head")) ||
1324 (xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *) "body")) ||
1325 (xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *) "html")))) {
1326 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->endElement != NULL((void*)0)))
1327 ctxt->sax->endElement(ctxt->userData, ctxt->name);
1328 htmlnamePop(ctxt);
1329 }
1330}
1331
1332/**
1333 * htmlAutoCloseTag:
1334 * @doc: the HTML document
1335 * @name: The tag name
1336 * @elem: the HTML element
1337 *
1338 * The HTML DTD allows a tag to implicitly close other tags.
1339 * The list is kept in htmlStartClose array. This function checks
1340 * if the element or one of it's children would autoclose the
1341 * given tag.
1342 *
1343 * Returns 1 if autoclose, 0 otherwise
1344 */
1345int
1346htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
1347 htmlNodePtr child;
1348
1349 if (elem == NULL((void*)0)) return(1);
1350 if (xmlStrEqualxmlStrEqual__internal_alias(name, elem->name)) return(0);
1351 if (htmlCheckAutoClose(elem->name, name)) return(1);
1352 child = elem->children;
1353 while (child != NULL((void*)0)) {
1354 if (htmlAutoCloseTag(doc, name, child)) return(1);
1355 child = child->next;
1356 }
1357 return(0);
1358}
1359
1360/**
1361 * htmlIsAutoClosed:
1362 * @doc: the HTML document
1363 * @elem: the HTML element
1364 *
1365 * The HTML DTD allows a tag to implicitly close other tags.
1366 * The list is kept in htmlStartClose array. This function checks
1367 * if a tag is autoclosed by one of it's child
1368 *
1369 * Returns 1 if autoclosed, 0 otherwise
1370 */
1371int
1372htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
1373 htmlNodePtr child;
1374
1375 if (elem == NULL((void*)0)) return(1);
1376 child = elem->children;
1377 while (child != NULL((void*)0)) {
1378 if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
1379 child = child->next;
1380 }
1381 return(0);
1382}
1383
1384/**
1385 * htmlCheckImplied:
1386 * @ctxt: an HTML parser context
1387 * @newtag: The new tag name
1388 *
1389 * The HTML DTD allows a tag to exists only implicitly
1390 * called when a new tag has been detected and generates the
1391 * appropriates implicit tags if missing
1392 */
1393static void
1394htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
1395 int i;
1396
1397 if (!htmlOmittedDefaultValue)
1398 return;
1399 if (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"html"))
1400 return;
1401 if (ctxt->nameNr <= 0) {
1402 htmlnamePush(ctxt, BAD_CAST(xmlChar *)"html");
1403 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->startElement != NULL((void*)0)))
1404 ctxt->sax->startElement(ctxt->userData, BAD_CAST(xmlChar *)"html", NULL((void*)0));
1405 }
1406 if ((xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"body")) || (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"head")))
1407 return;
1408 if ((ctxt->nameNr <= 1) &&
1409 ((xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"script")) ||
1410 (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"style")) ||
1411 (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"meta")) ||
1412 (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"link")) ||
1413 (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"title")) ||
1414 (xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"base")))) {
1415 if (ctxt->html >= 3) {
1416 /* we already saw or generated an <head> before */
1417 return;
1418 }
1419 /*
1420 * dropped OBJECT ... i you put it first BODY will be
1421 * assumed !
1422 */
1423 htmlnamePush(ctxt, BAD_CAST(xmlChar *)"head");
1424 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->startElement != NULL((void*)0)))
1425 ctxt->sax->startElement(ctxt->userData, BAD_CAST(xmlChar *)"head", NULL((void*)0));
1426 } else if ((!xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"noframes")) &&
1427 (!xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"frame")) &&
1428 (!xmlStrEqualxmlStrEqual__internal_alias(newtag, BAD_CAST(xmlChar *)"frameset"))) {
1429 if (ctxt->html >= 10) {
1430 /* we already saw or generated a <body> before */
1431 return;
1432 }
1433 for (i = 0;i < ctxt->nameNr;i++) {
1434 if (xmlStrEqualxmlStrEqual__internal_alias(ctxt->nameTab[i], BAD_CAST(xmlChar *)"body")) {
1435 return;
1436 }
1437 if (xmlStrEqualxmlStrEqual__internal_alias(ctxt->nameTab[i], BAD_CAST(xmlChar *)"head")) {
1438 return;
1439 }
1440 }
1441
1442 htmlnamePush(ctxt, BAD_CAST(xmlChar *)"body");
1443 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->startElement != NULL((void*)0)))
1444 ctxt->sax->startElement(ctxt->userData, BAD_CAST(xmlChar *)"body", NULL((void*)0));
1445 }
1446}
1447
1448/**
1449 * htmlCheckParagraph
1450 * @ctxt: an HTML parser context
1451 *
1452 * Check whether a p element need to be implied before inserting
1453 * characters in the current element.
1454 *
1455 * Returns 1 if a paragraph has been inserted, 0 if not and -1
1456 * in case of error.
1457 */
1458
1459static int
1460htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
1461 const xmlChar *tag;
1462 int i;
1463
1464 if (ctxt == NULL((void*)0))
1465 return(-1);
1466 tag = ctxt->name;
1467 if (tag == NULL((void*)0)) {
1468 htmlAutoClose(ctxt, BAD_CAST(xmlChar *)"p");
1469 htmlCheckImplied(ctxt, BAD_CAST(xmlChar *)"p");
1470 htmlnamePush(ctxt, BAD_CAST(xmlChar *)"p");
1471 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->startElement != NULL((void*)0)))
1472 ctxt->sax->startElement(ctxt->userData, BAD_CAST(xmlChar *)"p", NULL((void*)0));
1473 return(1);
1474 }
1475 if (!htmlOmittedDefaultValue)
1476 return(0);
1477 for (i = 0; htmlNoContentElements[i] != NULL((void*)0); i++) {
1478 if (xmlStrEqualxmlStrEqual__internal_alias(tag, BAD_CAST(xmlChar *) htmlNoContentElements[i])) {
1479 htmlAutoClose(ctxt, BAD_CAST(xmlChar *)"p");
1480 htmlCheckImplied(ctxt, BAD_CAST(xmlChar *)"p");
1481 htmlnamePush(ctxt, BAD_CAST(xmlChar *)"p");
1482 if ((ctxt->sax != NULL((void*)0)) && (ctxt->sax->startElement != NULL((void*)0)))
1483 ctxt->sax->startElement(ctxt->userData, BAD_CAST(xmlChar *)"p", NULL((void*)0));
1484 return(1);
1485 }
1486 }
1487 return(0);
1488}
1489
1490/**
1491 * htmlIsScriptAttribute:
1492 * @name: an attribute name
1493 *
1494 * Check if an attribute is of content type Script
1495 *
1496 * Returns 1 is the attribute is a script 0 otherwise
1497 */
1498int
1499htmlIsScriptAttribute(const xmlChar *name) {
1500 unsigned int i;
1501
1502 if (name == NULL((void*)0))
1503 return(0);
1504 /*
1505 * all script attributes start with 'on'
1506 */
1507 if ((name[0] != 'o') || (name[1] != 'n'))
1508 return(0);
1509 for (i = 0;
1510 i < sizeof(htmlScriptAttributes)/sizeof(htmlScriptAttributes[0]);
1511 i++) {
1512 if (xmlStrEqualxmlStrEqual__internal_alias(name, (const xmlChar *) htmlScriptAttributes[i]))
1513 return(1);
1514 }
1515 return(0);
1516}
1517
1518/************************************************************************
1519 * *
1520 * The list of HTML predefined entities *
1521 * *
1522 ************************************************************************/
1523
1524
1525static const htmlEntityDesc html40EntitiesTable[] = {
1526/*
1527 * the 4 absolute ones, plus apostrophe.
1528 */
1529{ 34, "quot", "quotation mark = APL quote, U+0022 ISOnum" },
1530{ 38, "amp", "ampersand, U+0026 ISOnum" },
1531{ 39, "apos", "single quote" },
1532{ 60, "lt", "less-than sign, U+003C ISOnum" },
1533{ 62, "gt", "greater-than sign, U+003E ISOnum" },
1534
1535/*
1536 * A bunch still in the 128-255 range
1537 * Replacing them depend really on the charset used.
1538 */
1539{ 160, "nbsp", "no-break space = non-breaking space, U+00A0 ISOnum" },
1540{ 161, "iexcl","inverted exclamation mark, U+00A1 ISOnum" },
1541{ 162, "cent", "cent sign, U+00A2 ISOnum" },
1542{ 163, "pound","pound sign, U+00A3 ISOnum" },
1543{ 164, "curren","currency sign, U+00A4 ISOnum" },
1544{ 165, "yen", "yen sign = yuan sign, U+00A5 ISOnum" },
1545{ 166, "brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },
1546{ 167, "sect", "section sign, U+00A7 ISOnum" },
1547{ 168, "uml", "diaeresis = spacing diaeresis, U+00A8 ISOdia" },
1548{ 169, "copy", "copyright sign, U+00A9 ISOnum" },
1549{ 170, "ordf", "feminine ordinal indicator, U+00AA ISOnum" },
1550{ 171, "laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },
1551{ 172, "not", "not sign, U+00AC ISOnum" },
1552{ 173, "shy", "soft hyphen = discretionary hyphen, U+00AD ISOnum" },
1553{ 174, "reg", "registered sign = registered trade mark sign, U+00AE ISOnum" },
1554{ 175, "macr", "macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },
1555{ 176, "deg", "degree sign, U+00B0 ISOnum" },
1556{ 177, "plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },
1557{ 178, "sup2", "superscript two = superscript digit two = squared, U+00B2 ISOnum" },
1558{ 179, "sup3", "superscript three = superscript digit three = cubed, U+00B3 ISOnum" },
1559{ 180, "acute","acute accent = spacing acute, U+00B4 ISOdia" },
1560{ 181, "micro","micro sign, U+00B5 ISOnum" },
1561{ 182, "para", "pilcrow sign = paragraph sign, U+00B6 ISOnum" },
1562{ 183, "middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },
1563{ 184, "cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },
1564{ 185, "sup1", "superscript one = superscript digit one, U+00B9 ISOnum" },
1565{ 186, "ordm", "masculine ordinal indicator, U+00BA ISOnum" },
1566{ 187, "raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },
1567{ 188, "frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },
1568{ 189, "frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },
1569{ 190, "frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },
1570{ 191, "iquest","inverted question mark = turned question mark, U+00BF ISOnum" },
1571{ 192, "Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },
1572{ 193, "Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },
1573{ 194, "Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },
1574{ 195, "Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },
1575{ 196, "Auml", "latin capital letter A with diaeresis, U+00C4 ISOlat1" },
1576{ 197, "Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },
1577{ 198, "AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },
1578{ 199, "Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },
1579{ 200, "Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },
1580{ 201, "Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },
1581{ 202, "Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },
1582{ 203, "Euml", "latin capital letter E with diaeresis, U+00CB ISOlat1" },
1583{ 204, "Igrave","latin capital letter I with grave, U+00CC ISOlat1" },
1584{ 205, "Iacute","latin capital letter I with acute, U+00CD ISOlat1" },
1585{ 206, "Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },
1586{ 207, "Iuml", "latin capital letter I with diaeresis, U+00CF ISOlat1" },
1587{ 208, "ETH", "latin capital letter ETH, U+00D0 ISOlat1" },
1588{ 209, "Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },
1589{ 210, "Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },
1590{ 211, "Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },
1591{ 212, "Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },
1592{ 213, "Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },
1593{ 214, "Ouml", "latin capital letter O with diaeresis, U+00D6 ISOlat1" },
1594{ 215, "times","multiplication sign, U+00D7 ISOnum" },
1595{ 216, "Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },
1596{ 217, "Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },
1597{ 218, "Uacute","latin capital letter U with acute, U+00DA ISOlat1" },
1598{ 219, "Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },
1599{ 220, "Uuml", "latin capital letter U with diaeresis, U+00DC ISOlat1" },
1600{ 221, "Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },
1601{ 222, "THORN","latin capital letter THORN, U+00DE ISOlat1" },
1602{ 223, "szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },
1603{ 224, "agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },
1604{ 225, "aacute","latin small letter a with acute, U+00E1 ISOlat1" },
1605{ 226, "acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },
1606{ 227, "atilde","latin small letter a with tilde, U+00E3 ISOlat1" },
1607{ 228, "auml", "latin small letter a with diaeresis, U+00E4 ISOlat1" },
1608{ 229, "aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },
1609{ 230, "aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },
1610{ 231, "ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },
1611{ 232, "egrave","latin small letter e with grave, U+00E8 ISOlat1" },
1612{ 233, "eacute","latin small letter e with acute, U+00E9 ISOlat1" },
1613{ 234, "ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },
1614{ 235, "euml", "latin small letter e with diaeresis, U+00EB ISOlat1" },
1615{ 236, "igrave","latin small letter i with grave, U+00EC ISOlat1" },
1616{ 237, "iacute","latin small letter i with acute, U+00ED ISOlat1" },
1617{ 238, "icirc","latin small letter i with circumflex, U+00EE ISOlat1" },
1618{ 239, "iuml", "latin small letter i with diaeresis, U+00EF ISOlat1" },
1619{ 240, "eth", "latin small letter eth, U+00F0 ISOlat1" },
1620{ 241, "ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },
1621{ 242, "ograve","latin small letter o with grave, U+00F2 ISOlat1" },
1622{ 243, "oacute","latin small letter o with acute, U+00F3 ISOlat1" },
1623{ 244, "ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },
1624{ 245, "otilde","latin small letter o with tilde, U+00F5 ISOlat1" },
1625{ 246, "ouml", "latin small letter o with diaeresis, U+00F6 ISOlat1" },
1626{ 247, "divide","division sign, U+00F7 ISOnum" },
1627{ 248, "oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },
1628{ 249, "ugrave","latin small letter u with grave, U+00F9 ISOlat1" },
1629{ 250, "uacute","latin small letter u with acute, U+00FA ISOlat1" },
1630{ 251, "ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },
1631{ 252, "uuml", "latin small letter u with diaeresis, U+00FC ISOlat1" },
1632{ 253, "yacute","latin small letter y with acute, U+00FD ISOlat1" },
1633{ 254, "thorn","latin small letter thorn with, U+00FE ISOlat1" },
1634{ 255, "yuml", "latin small letter y with diaeresis, U+00FF ISOlat1" },
1635
1636{ 338, "OElig","latin capital ligature OE, U+0152 ISOlat2" },
1637{ 339, "oelig","latin small ligature oe, U+0153 ISOlat2" },
1638{ 352, "Scaron","latin capital letter S with caron, U+0160 ISOlat2" },
1639{ 353, "scaron","latin small letter s with caron, U+0161 ISOlat2" },
1640{ 376, "Yuml", "latin capital letter Y with diaeresis, U+0178 ISOlat2" },
1641
1642/*
1643 * Anything below should really be kept as entities references
1644 */
1645{ 402, "fnof", "latin small f with hook = function = florin, U+0192 ISOtech" },
1646
1647{ 710, "circ", "modifier letter circumflex accent, U+02C6 ISOpub" },
1648{ 732, "tilde","small tilde, U+02DC ISOdia" },
1649
1650{ 913, "Alpha","greek capital letter alpha, U+0391" },
1651{ 914, "Beta", "greek capital letter beta, U+0392" },
1652{ 915, "Gamma","greek capital letter gamma, U+0393 ISOgrk3" },
1653{ 916, "Delta","greek capital letter delta, U+0394 ISOgrk3" },
1654{ 917, "Epsilon","greek capital letter epsilon, U+0395" },
1655{ 918, "Zeta", "greek capital letter zeta, U+0396" },
1656{ 919, "Eta", "greek capital letter eta, U+0397" },
1657{ 920, "Theta","greek capital letter theta, U+0398 ISOgrk3" },
1658{ 921, "Iota", "greek capital letter iota, U+0399" },
1659{ 922, "Kappa","greek capital letter kappa, U+039A" },
1660{ 923, "Lambda", "greek capital letter lambda, U+039B ISOgrk3" },
1661{ 924, "Mu", "greek capital letter mu, U+039C" },
1662{ 925, "Nu", "greek capital letter nu, U+039D" },
1663{ 926, "Xi", "greek capital letter xi, U+039E ISOgrk3" },
1664{ 927, "Omicron","greek capital letter omicron, U+039F" },
1665{ 928, "Pi", "greek capital letter pi, U+03A0 ISOgrk3" },
1666{ 929, "Rho", "greek capital letter rho, U+03A1" },
1667{ 931, "Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },
1668{ 932, "Tau", "greek capital letter tau, U+03A4" },
1669{ 933, "Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },
1670{ 934, "Phi", "greek capital letter phi, U+03A6 ISOgrk3" },
1671{ 935, "Chi", "greek capital letter chi, U+03A7" },
1672{ 936, "Psi", "greek capital letter psi, U+03A8 ISOgrk3" },
1673{ 937, "Omega","greek capital letter omega, U+03A9 ISOgrk3" },
1674
1675{ 945, "alpha","greek small letter alpha, U+03B1 ISOgrk3" },
1676{ 946, "beta", "greek small letter beta, U+03B2 ISOgrk3" },
1677{ 947, "gamma","greek small letter gamma, U+03B3 ISOgrk3" },
1678{ 948, "delta","greek small letter delta, U+03B4 ISOgrk3" },
1679{ 949, "epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },
1680{ 950, "zeta", "greek small letter zeta, U+03B6 ISOgrk3" },
1681{ 951, "eta", "greek small letter eta, U+03B7 ISOgrk3" },
1682{ 952, "theta","greek small letter theta, U+03B8 ISOgrk3" },
1683{ 953, "iota", "greek small letter iota, U+03B9 ISOgrk3" },
1684{ 954, "kappa","greek small letter kappa, U+03BA ISOgrk3" },
1685{ 955, "lambda","greek small letter lambda, U+03BB ISOgrk3" },
1686{ 956, "mu", "greek small letter mu, U+03BC ISOgrk3" },
1687{ 957, "nu", "greek small letter nu, U+03BD ISOgrk3" },
1688{ 958, "xi", "greek small letter xi, U+03BE ISOgrk3" },
1689{ 959, "omicron","greek small letter omicron, U+03BF NEW" },
1690{ 960, "pi", "greek small letter pi, U+03C0 ISOgrk3" },
1691{ 961, "rho", "greek small letter rho, U+03C1 ISOgrk3" },
1692{ 962, "sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },
1693{ 963, "sigma","greek small letter sigma, U+03C3 ISOgrk3" },
1694{ 964, "tau", "greek small letter tau, U+03C4 ISOgrk3" },
1695{ 965, "upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },
1696{ 966, "phi", "greek small letter phi, U+03C6 ISOgrk3" },
1697{ 967, "chi", "greek small letter chi, U+03C7 ISOgrk3" },
1698{ 968, "psi", "greek small letter psi, U+03C8 ISOgrk3" },
1699{ 969, "omega","greek small letter omega, U+03C9 ISOgrk3" },
1700{ 977, "thetasym","greek small letter theta symbol, U+03D1 NEW" },
1701{ 978, "upsih","greek upsilon with hook symbol, U+03D2 NEW" },
1702{ 982, "piv", "greek pi symbol, U+03D6 ISOgrk3" },
1703
1704{ 8194, "ensp", "en space, U+2002 ISOpub" },
1705{ 8195, "emsp", "em space, U+2003 ISOpub" },
1706{ 8201, "thinsp","thin space, U+2009 ISOpub" },
1707{ 8204, "zwnj", "zero width non-joiner, U+200C NEW RFC 2070" },
1708{ 8205, "zwj", "zero width joiner, U+200D NEW RFC 2070" },
1709{ 8206, "lrm", "left-to-right mark, U+200E NEW RFC 2070" },
1710{ 8207, "rlm", "right-to-left mark, U+200F NEW RFC 2070" },
1711{ 8211, "ndash","en dash, U+2013 ISOpub" },
1712{ 8212, "mdash","em dash, U+2014 ISOpub" },
1713{ 8216, "lsquo","left single quotation mark, U+2018 ISOnum" },
1714{ 8217, "rsquo","right single quotation mark, U+2019 ISOnum" },
1715{ 8218, "sbquo","single low-9 quotation mark, U+201A NEW" },
1716{ 8220, "ldquo","left double quotation mark, U+201C ISOnum" },
1717{ 8221, "rdquo","right double quotation mark, U+201D ISOnum" },
1718{ 8222, "bdquo","double low-9 quotation mark, U+201E NEW" },
1719{ 8224, "dagger","dagger, U+2020 ISOpub" },
1720{ 8225, "Dagger","double dagger, U+2021 ISOpub" },
1721
1722{ 8226, "bull", "bullet = black small circle, U+2022 ISOpub" },
1723{ 8230, "hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },
1724
1725{ 8240, "permil","per mille sign, U+2030 ISOtech" },
1726
1727{ 8242, "prime","prime = minutes = feet, U+2032 ISOtech" },
1728{ 8243, "Prime","double prime = seconds = inches, U+2033 ISOtech" },
1729
1730{ 8249, "lsaquo","single left-pointing angle quotation mark, U+2039 ISO proposed" },
1731{ 8250, "rsaquo","single right-pointing angle quotation mark, U+203A ISO proposed" },
1732
1733{ 8254, "oline","overline = spacing overscore, U+203E NEW" },
1734{ 8260, "frasl","fraction slash, U+2044 NEW" },
1735
1736{ 8364, "euro", "euro sign, U+20AC NEW" },
1737
1738{ 8465, "image","blackletter capital I = imaginary part, U+2111 ISOamso" },
1739{ 8472, "weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },
1740{ 8476, "real", "blackletter capital R = real part symbol, U+211C ISOamso" },
1741{ 8482, "trade","trade mark sign, U+2122 ISOnum" },
1742{ 8501, "alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },
1743{ 8592, "larr", "leftwards arrow, U+2190 ISOnum" },
1744{ 8593, "uarr", "upwards arrow, U+2191 ISOnum" },
1745{ 8594, "rarr", "rightwards arrow, U+2192 ISOnum" },
1746{ 8595, "darr", "downwards arrow, U+2193 ISOnum" },
1747{ 8596, "harr", "left right arrow, U+2194 ISOamsa" },
1748{ 8629, "crarr","downwards arrow with corner leftwards = carriage return, U+21B5 NEW" },
1749{ 8656, "lArr", "leftwards double arrow, U+21D0 ISOtech" },
1750{ 8657, "uArr", "upwards double arrow, U+21D1 ISOamsa" },
1751{ 8658, "rArr", "rightwards double arrow, U+21D2 ISOtech" },
1752{ 8659, "dArr", "downwards double arrow, U+21D3 ISOamsa" },
1753{ 8660, "hArr", "left right double arrow, U+21D4 ISOamsa" },
1754
1755{ 8704, "forall","for all, U+2200 ISOtech" },
1756{ 8706, "part", "partial differential, U+2202 ISOtech" },
1757{ 8707, "exist","there exists, U+2203 ISOtech" },
1758{ 8709, "empty","empty set = null set = diameter, U+2205 ISOamso" },
1759{ 8711, "nabla","nabla = backward difference, U+2207 ISOtech" },
1760{ 8712, "isin", "element of, U+2208 ISOtech" },
1761{ 8713, "notin","not an element of, U+2209 ISOtech" },
1762{ 8715, "ni", "contains as member, U+220B ISOtech" },
1763{ 8719, "prod", "n-ary product = product sign, U+220F ISOamsb" },
1764{ 8721, "sum", "n-ary summation, U+2211 ISOamsb" },
1765{ 8722, "minus","minus sign, U+2212 ISOtech" },
1766{ 8727, "lowast","asterisk operator, U+2217 ISOtech" },
1767{ 8730, "radic","square root = radical sign, U+221A ISOtech" },
1768{ 8733, "prop", "proportional to, U+221D ISOtech" },
1769{ 8734, "infin","infinity, U+221E ISOtech" },
1770{ 8736, "ang", "angle, U+2220 ISOamso" },
1771{ 8743, "and", "logical and = wedge, U+2227 ISOtech" },
1772{ 8744, "or", "logical or = vee, U+2228 ISOtech" },
1773{ 8745, "cap", "intersection = cap, U+2229 ISOtech" },
1774{ 8746, "cup", "union = cup, U+222A ISOtech" },
1775{ 8747, "int", "integral, U+222B ISOtech" },
1776{ 8756, "there4","therefore, U+2234 ISOtech" },
1777{ 8764, "sim", "tilde operator = varies with = similar to, U+223C ISOtech" },
1778{ 8773, "cong", "approximately equal to, U+2245 ISOtech" },
1779{ 8776, "asymp","almost equal to = asymptotic to, U+2248 ISOamsr" },
1780{ 8800, "ne", "not equal to, U+2260 ISOtech" },
1781{ 8801, "equiv","identical to, U+2261 ISOtech" },
1782{ 8804, "le", "less-than or equal to, U+2264 ISOtech" },
1783{ 8805, "ge", "greater-than or equal to, U+2265 ISOtech" },
1784{ 8834, "sub", "subset of, U+2282 ISOtech" },
1785{ 8835, "sup", "superset of, U+2283 ISOtech" },
1786{ 8836, "nsub", "not a subset of, U+2284 ISOamsn" },
1787{ 8838, "sube", "subset of or equal to, U+2286 ISOtech" },
1788{ 8839, "supe", "superset of or equal to, U+2287 ISOtech" },
1789{ 8853, "oplus","circled plus = direct sum, U+2295 ISOamsb" },
1790{ 8855, "otimes","circled times = vector product, U+2297 ISOamsb" },
1791{ 8869, "perp", "up tack = orthogonal to = perpendicular, U+22A5 ISOtech" },
1792{ 8901, "sdot", "dot operator, U+22C5 ISOamsb" },
1793{ 8968, "lceil","left ceiling = apl upstile, U+2308 ISOamsc" },
1794{ 8969, "rceil","right ceiling, U+2309 ISOamsc" },
1795{ 8970, "lfloor","left floor = apl downstile, U+230A ISOamsc" },
1796{ 8971, "rfloor","right floor, U+230B ISOamsc" },
1797{ 9001, "lang", "left-pointing angle bracket = bra, U+2329 ISOtech" },
1798{ 9002, "rang", "right-pointing angle bracket = ket, U+232A ISOtech" },
1799{ 9674, "loz", "lozenge, U+25CA ISOpub" },
1800
1801{ 9824, "spades","black spade suit, U+2660 ISOpub" },
1802{ 9827, "clubs","black club suit = shamrock, U+2663 ISOpub" },
1803{ 9829, "hearts","black heart suit = valentine, U+2665 ISOpub" },
1804{ 9830, "diams","black diamond suit, U+2666 ISOpub" },
1805
1806};
1807
1808/************************************************************************
1809 * *
1810 * Commodity functions to handle entities *
1811 * *
1812 ************************************************************************/
1813
1814/*
1815 * Macro used to grow the current buffer.
1816 */
1817#define growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
{ \
1818 xmlChar *tmp; \
1819 buffer##_size *= 2; \
1820 tmp = (xmlChar *) xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
1821 if (tmp == NULL((void*)0)) { \
1822 htmlErrMemory(ctxt, "growing buffer\n"); \
1823 xmlFree(buffer); \
1824 return(NULL((void*)0)); \
1825 } \
1826 buffer = tmp; \
1827}
1828
1829/**
1830 * htmlEntityLookup:
1831 * @name: the entity name
1832 *
1833 * Lookup the given entity in EntitiesTable
1834 *
1835 * TODO: the linear scan is really ugly, an hash table is really needed.
1836 *
1837 * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1838 */
1839const htmlEntityDesc *
1840htmlEntityLookup(const xmlChar *name) {
1841 unsigned int i;
1842
1843 for (i = 0;i < (sizeof(html40EntitiesTable)/
1844 sizeof(html40EntitiesTable[0]));i++) {
1845 if (xmlStrEqualxmlStrEqual__internal_alias(name, BAD_CAST(xmlChar *) html40EntitiesTable[i].name)) {
1846 return((htmlEntityDescPtr) &html40EntitiesTable[i]);
1847 }
1848 }
1849 return(NULL((void*)0));
1850}
1851
1852/**
1853 * htmlEntityValueLookup:
1854 * @value: the entity's unicode value
1855 *
1856 * Lookup the given entity in EntitiesTable
1857 *
1858 * TODO: the linear scan is really ugly, an hash table is really needed.
1859 *
1860 * Returns the associated htmlEntityDescPtr if found, NULL otherwise.
1861 */
1862const htmlEntityDesc *
1863htmlEntityValueLookup(unsigned int value) {
1864 unsigned int i;
1865
1866 for (i = 0;i < (sizeof(html40EntitiesTable)/
1867 sizeof(html40EntitiesTable[0]));i++) {
1868 if (html40EntitiesTable[i].value >= value) {
1869 if (html40EntitiesTable[i].value > value)
1870 break;
1871 return((htmlEntityDescPtr) &html40EntitiesTable[i]);
1872 }
1873 }
1874 return(NULL((void*)0));
1875}
1876
1877/**
1878 * UTF8ToHtml:
1879 * @out: a pointer to an array of bytes to store the result
1880 * @outlen: the length of @out
1881 * @in: a pointer to an array of UTF-8 chars
1882 * @inlen: the length of @in
1883 *
1884 * Take a block of UTF-8 chars in and try to convert it to an ASCII
1885 * plus HTML entities block of chars out.
1886 *
1887 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
1888 * The value of @inlen after return is the number of octets consumed
1889 * as the return value is positive, else unpredictable.
1890 * The value of @outlen after return is the number of octets consumed.
1891 */
1892int
1893UTF8ToHtml(unsigned char* out, int *outlen,
1894 const unsigned char* in, int *inlen) {
1895 const unsigned char* processed = in;
1896 const unsigned char* outend;
1897 const unsigned char* outstart = out;
1898 const unsigned char* instart = in;
1899 const unsigned char* inend;
1900 unsigned int c, d;
1901 int trailing;
1902
1903 if ((out == NULL((void*)0)) || (outlen == NULL((void*)0)) || (inlen == NULL((void*)0))) return(-1);
1904 if (in == NULL((void*)0)) {
1905 /*
1906 * initialization nothing to do
1907 */
1908 *outlen = 0;
1909 *inlen = 0;
1910 return(0);
1911 }
1912 inend = in + (*inlen);
1913 outend = out + (*outlen);
1914 while (in < inend) {
1915 d = *in++;
1916 if (d < 0x80) { c= d; trailing= 0; }
1917 else if (d < 0xC0) {
1918 /* trailing byte in leading position */
1919 *outlen = out - outstart;
1920 *inlen = processed - instart;
1921 return(-2);
1922 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
1923 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
1924 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
1925 else {
1926 /* no chance for this in Ascii */
1927 *outlen = out - outstart;
1928 *inlen = processed - instart;
1929 return(-2);
1930 }
1931
1932 if (inend - in < trailing) {
1933 break;
1934 }
1935
1936 for ( ; trailing; trailing--) {
1937 if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
1938 break;
1939 c <<= 6;
1940 c |= d & 0x3F;
1941 }
1942
1943 /* assertion: c is a single UTF-4 value */
1944 if (c < 0x80) {
1945 if (out + 1 >= outend)
1946 break;
1947 *out++ = c;
1948 } else {
1949 int len;
1950 const htmlEntityDesc * ent;
1951 const char *cp;
1952 char nbuf[16];
1953
1954 /*
1955 * Try to lookup a predefined HTML entity for it
1956 */
1957
1958 ent = htmlEntityValueLookup(c);
1959 if (ent == NULL((void*)0)) {
1960 snprintf(nbuf, sizeof(nbuf), "#%u", c);
1961 cp = nbuf;
1962 }
1963 else
1964 cp = ent->name;
1965 len = strlen(cp);
1966 if (out + 2 + len >= outend)
1967 break;
1968 *out++ = '&';
1969 memcpy(out, cp, len);
1970 out += len;
1971 *out++ = ';';
1972 }
1973 processed = in;
1974 }
1975 *outlen = out - outstart;
1976 *inlen = processed - instart;
1977 return(0);
1978}
1979
1980/**
1981 * htmlEncodeEntities:
1982 * @out: a pointer to an array of bytes to store the result
1983 * @outlen: the length of @out
1984 * @in: a pointer to an array of UTF-8 chars
1985 * @inlen: the length of @in
1986 * @quoteChar: the quote character to escape (' or ") or zero.
1987 *
1988 * Take a block of UTF-8 chars in and try to convert it to an ASCII
1989 * plus HTML entities block of chars out.
1990 *
1991 * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
1992 * The value of @inlen after return is the number of octets consumed
1993 * as the return value is positive, else unpredictable.
1994 * The value of @outlen after return is the number of octets consumed.
1995 */
1996int
1997htmlEncodeEntities(unsigned char* out, int *outlen,
1998 const unsigned char* in, int *inlen, int quoteChar) {
1999 const unsigned char* processed = in;
2000 const unsigned char* outend;
2001 const unsigned char* outstart = out;
2002 const unsigned char* instart = in;
2003 const unsigned char* inend;
2004 unsigned int c, d;
2005 int trailing;
2006
2007 if ((out == NULL((void*)0)) || (outlen == NULL((void*)0)) || (inlen == NULL((void*)0)) || (in == NULL((void*)0)))
2008 return(-1);
2009 outend = out + (*outlen);
2010 inend = in + (*inlen);
2011 while (in < inend) {
2012 d = *in++;
2013 if (d < 0x80) { c= d; trailing= 0; }
2014 else if (d < 0xC0) {
2015 /* trailing byte in leading position */
2016 *outlen = out - outstart;
2017 *inlen = processed - instart;
2018 return(-2);
2019 } else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
2020 else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
2021 else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
2022 else {
2023 /* no chance for this in Ascii */
2024 *outlen = out - outstart;
2025 *inlen = processed - instart;
2026 return(-2);
2027 }
2028
2029 if (inend - in < trailing)
2030 break;
2031
2032 while (trailing--) {
2033 if (((d= *in++) & 0xC0) != 0x80) {
2034 *outlen = out - outstart;
2035 *inlen = processed - instart;
2036 return(-2);
2037 }
2038 c <<= 6;
2039 c |= d & 0x3F;
2040 }
2041
2042 /* assertion: c is a single UTF-4 value */
2043 if ((c < 0x80) && (c != (unsigned int) quoteChar) &&
2044 (c != '&') && (c != '<') && (c != '>')) {
2045 if (out >= outend)
2046 break;
2047 *out++ = c;
2048 } else {
2049 const htmlEntityDesc * ent;
2050 const char *cp;
2051 char nbuf[16];
2052 int len;
2053
2054 /*
2055 * Try to lookup a predefined HTML entity for it
2056 */
2057 ent = htmlEntityValueLookup(c);
2058 if (ent == NULL((void*)0)) {
2059 snprintf(nbuf, sizeof(nbuf), "#%u", c);
2060 cp = nbuf;
2061 }
2062 else
2063 cp = ent->name;
2064 len = strlen(cp);
2065 if (out + 2 + len > outend)
2066 break;
2067 *out++ = '&';
2068 memcpy(out, cp, len);
2069 out += len;
2070 *out++ = ';';
2071 }
2072 processed = in;
2073 }
2074 *outlen = out - outstart;
2075 *inlen = processed - instart;
2076 return(0);
2077}
2078
2079/************************************************************************
2080 * *
2081 * Commodity functions to handle streams *
2082 * *
2083 ************************************************************************/
2084
2085/**
2086 * htmlNewInputStream:
2087 * @ctxt: an HTML parser context
2088 *
2089 * Create a new input stream structure
2090 * Returns the new input stream or NULL
2091 */
2092static htmlParserInputPtr
2093htmlNewInputStream(htmlParserCtxtPtr ctxt) {
2094 htmlParserInputPtr input;
2095
2096 input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));
2097 if (input == NULL((void*)0)) {
2098 htmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
2099 return(NULL((void*)0));
2100 }
2101 memset(input, 0, sizeof(htmlParserInput));
2102 input->filename = NULL((void*)0);
2103 input->directory = NULL((void*)0);
2104 input->base = NULL((void*)0);
2105 input->cur = NULL((void*)0);
2106 input->buf = NULL((void*)0);
2107 input->line = 1;
2108 input->col = 1;
2109 input->buf = NULL((void*)0);
2110 input->free = NULL((void*)0);
2111 input->version = NULL((void*)0);
2112 input->consumed = 0;
2113 input->length = 0;
2114 return(input);
2115}
2116
2117
2118/************************************************************************
2119 * *
2120 * Commodity functions, cleanup needed ? *
2121 * *
2122 ************************************************************************/
2123/*
2124 * all tags allowing pc data from the html 4.01 loose dtd
2125 * NOTE: it might be more apropriate to integrate this information
2126 * into the html40ElementTable array but I don't want to risk any
2127 * binary incomptibility
2128 */
2129static const char *allowPCData[] = {
2130 "a", "abbr", "acronym", "address", "applet", "b", "bdo", "big",
2131 "blockquote", "body", "button", "caption", "center", "cite", "code",
2132 "dd", "del", "dfn", "div", "dt", "em", "font", "form", "h1", "h2",
2133 "h3", "h4", "h5", "h6", "i", "iframe", "ins", "kbd", "label", "legend",
2134 "li", "noframes", "noscript", "object", "p", "pre", "q", "s", "samp",
2135 "small", "span", "strike", "strong", "td", "th", "tt", "u", "var"
2136};
2137
2138/**
2139 * areBlanks:
2140 * @ctxt: an HTML parser context
2141 * @str: a xmlChar *
2142 * @len: the size of @str
2143 *
2144 * Is this a sequence of blank chars that one can ignore ?
2145 *
2146 * Returns 1 if ignorable 0 otherwise.
2147 */
2148
2149static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
2150 unsigned int i;
2151 int j;
2152 xmlNodePtr lastChild;
2153 xmlDtdPtr dtd;
2154
2155 for (j = 0;j < len;j++)
2156 if (!(IS_BLANK_CH(str[j])(((str[j]) == 0x20) || ((0x9 <= (str[j])) && ((str
[j]) <= 0xa)) || ((str[j]) == 0xd))
)) return(0);
2157
2158 if (CUR((int) (*ctxt->input->cur)) == 0) return(1);
2159 if (CUR((int) (*ctxt->input->cur)) != '<') return(0);
2160 if (ctxt->name == NULL((void*)0))
2161 return(1);
2162 if (xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *)"html"))
2163 return(1);
2164 if (xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *)"head"))
2165 return(1);
2166
2167 /* Only strip CDATA children of the body tag for strict HTML DTDs */
2168 if (xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *) "body") && ctxt->myDoc != NULL((void*)0)) {
2169 dtd = xmlGetIntSubsetxmlGetIntSubset__internal_alias(ctxt->myDoc);
2170 if (dtd != NULL((void*)0) && dtd->ExternalID != NULL((void*)0)) {
2171 if (!xmlStrcasecmpxmlStrcasecmp__internal_alias(dtd->ExternalID, BAD_CAST(xmlChar *) "-//W3C//DTD HTML 4.01//EN") ||
2172 !xmlStrcasecmpxmlStrcasecmp__internal_alias(dtd->ExternalID, BAD_CAST(xmlChar *) "-//W3C//DTD HTML 4//EN"))
2173 return(1);
2174 }
2175 }
2176
2177 if (ctxt->node == NULL((void*)0)) return(0);
2178 lastChild = xmlGetLastChildxmlGetLastChild__internal_alias(ctxt->node);
2179 while ((lastChild) && (lastChild->type == XML_COMMENT_NODE))
2180 lastChild = lastChild->prev;
2181 if (lastChild == NULL((void*)0)) {
2182 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2183 (ctxt->node->content != NULL((void*)0))) return(0);
2184 /* keep ws in constructs like ...<b> </b>...
2185 for all tags "b" allowing PCDATA */
2186 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2187 if ( xmlStrEqualxmlStrEqual__internal_alias(ctxt->name, BAD_CAST(xmlChar *) allowPCData[i]) ) {
2188 return(0);
2189 }
2190 }
2191 } else if (xmlNodeIsTextxmlNodeIsText__internal_alias(lastChild)) {
2192 return(0);
2193 } else {
2194 /* keep ws in constructs like <p><b>xy</b> <i>z</i><p>
2195 for all tags "p" allowing PCDATA */
2196 for ( i = 0; i < sizeof(allowPCData)/sizeof(allowPCData[0]); i++ ) {
2197 if ( xmlStrEqualxmlStrEqual__internal_alias(lastChild->name, BAD_CAST(xmlChar *) allowPCData[i]) ) {
2198 return(0);
2199 }
2200 }
2201 }
2202 return(1);
2203}
2204
2205/**
2206 * htmlNewDocNoDtD:
2207 * @URI: URI for the dtd, or NULL
2208 * @ExternalID: the external ID of the DTD, or NULL
2209 *
2210 * Creates a new HTML document without a DTD node if @URI and @ExternalID
2211 * are NULL
2212 *
2213 * Returns a new document, do not initialize the DTD if not provided
2214 */
2215htmlDocPtr
2216htmlNewDocNoDtD(const xmlChar *URI, const xmlChar *ExternalID) {
2217 xmlDocPtr cur;
2218
2219 /*
2220 * Allocate a new document and fill the fields.
2221 */
2222 cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));
2223 if (cur == NULL((void*)0)) {
2224 htmlErrMemory(NULL((void*)0), "HTML document creation failed\n");
2225 return(NULL((void*)0));
2226 }
2227 memset(cur, 0, sizeof(xmlDoc));
2228
2229 cur->type = XML_HTML_DOCUMENT_NODE;
2230 cur->version = NULL((void*)0);
2231 cur->intSubset = NULL((void*)0);
2232 cur->doc = cur;
2233 cur->name = NULL((void*)0);
2234 cur->children = NULL((void*)0);
2235 cur->extSubset = NULL((void*)0);
2236 cur->oldNs = NULL((void*)0);
2237 cur->encoding = NULL((void*)0);
2238 cur->standalone = 1;
2239 cur->compression = 0;
2240 cur->ids = NULL((void*)0);
2241 cur->refs = NULL((void*)0);
2242 cur->_private = NULL((void*)0);
2243 cur->charset = XML_CHAR_ENCODING_UTF8;
2244 cur->properties = XML_DOC_HTML | XML_DOC_USERBUILT;
2245 if ((ExternalID != NULL((void*)0)) ||
2246 (URI != NULL((void*)0)))
2247 xmlCreateIntSubsetxmlCreateIntSubset__internal_alias(cur, BAD_CAST(xmlChar *) "html", ExternalID, URI);
2248 return(cur);
2249}
2250
2251/**
2252 * htmlNewDoc:
2253 * @URI: URI for the dtd, or NULL
2254 * @ExternalID: the external ID of the DTD, or NULL
2255 *
2256 * Creates a new HTML document
2257 *
2258 * Returns a new document
2259 */
2260htmlDocPtr
2261htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
2262 if ((URI == NULL((void*)0)) && (ExternalID == NULL((void*)0)))
2263 return(htmlNewDocNoDtD(
2264 BAD_CAST(xmlChar *) "http://www.w3.org/TR/REC-html40/loose.dtd",
2265 BAD_CAST(xmlChar *) "-//W3C//DTD HTML 4.0 Transitional//EN"));
2266
2267 return(htmlNewDocNoDtD(URI, ExternalID));
2268}
2269
2270
2271/************************************************************************
2272 * *
2273 * The parser itself *
2274 * Relates to http://www.w3.org/TR/html40 *
2275 * *
2276 ************************************************************************/
2277
2278/************************************************************************
2279 * *
2280 * The parser itself *
2281 * *
2282 ************************************************************************/
2283
2284static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
2285
2286/**
2287 * htmlParseHTMLName:
2288 * @ctxt: an HTML parser context
2289 *
2290 * parse an HTML tag or attribute name, note that we convert it to lowercase
2291 * since HTML names are not case-sensitive.
2292 *
2293 * Returns the Tag Name parsed or NULL
2294 */
2295
2296static const xmlChar *
2297htmlParseHTMLName(htmlParserCtxtPtr ctxt) {
2298 int i = 0;
2299 xmlChar loc[HTML_PARSER_BUFFER_SIZE100];
2300
2301 if (!IS_ASCII_LETTER(CUR)(((0x41 <= (((int) (*ctxt->input->cur)))) &&
((((int) (*ctxt->input->cur))) <= 0x5a)) || ((0x61 <=
(((int) (*ctxt->input->cur)))) && ((((int) (*ctxt
->input->cur))) <= 0x7a)))
&& (CUR((int) (*ctxt->input->cur)) != '_') &&
2302 (CUR((int) (*ctxt->input->cur)) != ':') && (CUR((int) (*ctxt->input->cur)) != '.')) return(NULL((void*)0));
2303
2304 while ((i < HTML_PARSER_BUFFER_SIZE100) &&
2305 ((IS_ASCII_LETTER(CUR)(((0x41 <= (((int) (*ctxt->input->cur)))) &&
((((int) (*ctxt->input->cur))) <= 0x5a)) || ((0x61 <=
(((int) (*ctxt->input->cur)))) && ((((int) (*ctxt
->input->cur))) <= 0x7a)))
) || (IS_ASCII_DIGIT(CUR)((0x30 <= (((int) (*ctxt->input->cur)))) && (
(((int) (*ctxt->input->cur))) <= 0x39))
) ||
2306 (CUR((int) (*ctxt->input->cur)) == ':') || (CUR((int) (*ctxt->input->cur)) == '-') || (CUR((int) (*ctxt->input->cur)) == '_') ||
2307 (CUR((int) (*ctxt->input->cur)) == '.'))) {
2308 if ((CUR((int) (*ctxt->input->cur)) >= 'A') && (CUR((int) (*ctxt->input->cur)) <= 'Z')) loc[i] = CUR((int) (*ctxt->input->cur)) + 0x20;
2309 else loc[i] = CUR((int) (*ctxt->input->cur));
2310 i++;
2311
2312 NEXTxmlNextChar__internal_alias(ctxt);
2313 }
2314
2315 return(xmlDictLookupxmlDictLookup__internal_alias(ctxt->dict, loc, i));
2316}
2317
2318
2319/**
2320 * htmlParseHTMLName_nonInvasive:
2321 * @ctxt: an HTML parser context
2322 *
2323 * parse an HTML tag or attribute name, note that we convert it to lowercase
2324 * since HTML names are not case-sensitive, this doesn't consume the data
2325 * from the stream, it's a look-ahead
2326 *
2327 * Returns the Tag Name parsed or NULL
2328 */
2329
2330static const xmlChar *
2331htmlParseHTMLName_nonInvasive(htmlParserCtxtPtr ctxt) {
2332 int i = 0;
2333 xmlChar loc[HTML_PARSER_BUFFER_SIZE100];
2334
2335 if (!IS_ASCII_LETTER(NXT(1))(((0x41 <= (ctxt->input->cur[(1)])) && ((ctxt
->input->cur[(1)]) <= 0x5a)) || ((0x61 <= (ctxt->
input->cur[(1)])) && ((ctxt->input->cur[(1)]
) <= 0x7a)))
&& (NXT(1)ctxt->input->cur[(1)] != '_') &&
2336 (NXT(1)ctxt->input->cur[(1)] != ':')) return(NULL((void*)0));
2337
2338 while ((i < HTML_PARSER_BUFFER_SIZE100) &&
2339 ((IS_ASCII_LETTER(NXT(1+i))(((0x41 <= (ctxt->input->cur[(1 +i)])) && ((
ctxt->input->cur[(1 +i)]) <= 0x5a)) || ((0x61 <= (
ctxt->input->cur[(1 +i)])) && ((ctxt->input->
cur[(1 +i)]) <= 0x7a)))
) || (IS_ASCII_DIGIT(NXT(1+i))((0x30 <= (ctxt->input->cur[(1 +i)])) && ((ctxt
->input->cur[(1 +i)]) <= 0x39))
) ||
2340 (NXT(1+i)ctxt->input->cur[(1 +i)] == ':') || (NXT(1+i)ctxt->input->cur[(1 +i)] == '-') || (NXT(1+i)ctxt->input->cur[(1 +i)] == '_'))) {
2341 if ((NXT(1+i)ctxt->input->cur[(1 +i)] >= 'A') && (NXT(1+i)ctxt->input->cur[(1 +i)] <= 'Z')) loc[i] = NXT(1+i)ctxt->input->cur[(1 +i)] + 0x20;
2342 else loc[i] = NXT(1+i)ctxt->input->cur[(1 +i)];
2343 i++;
2344 }
2345
2346 return(xmlDictLookupxmlDictLookup__internal_alias(ctxt->dict, loc, i));
2347}
2348
2349
2350/**
2351 * htmlParseName:
2352 * @ctxt: an HTML parser context
2353 *
2354 * parse an HTML name, this routine is case sensitive.
2355 *
2356 * Returns the Name parsed or NULL
2357 */
2358
2359static const xmlChar *
2360htmlParseName(htmlParserCtxtPtr ctxt) {
2361 const xmlChar *in;
2362 const xmlChar *ret;
2363 int count = 0;
2364
2365 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2366
2367 /*
2368 * Accelerator for simple ASCII names
2369 */
2370 in = ctxt->input->cur;
2371 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2372 ((*in >= 0x41) && (*in <= 0x5A)) ||
2373 (*in == '_') || (*in == ':')) {
2374 in++;
2375 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2376 ((*in >= 0x41) && (*in <= 0x5A)) ||
2377 ((*in >= 0x30) && (*in <= 0x39)) ||
2378 (*in == '_') || (*in == '-') ||
2379 (*in == ':') || (*in == '.'))
2380 in++;
2381 if ((*in > 0) && (*in < 0x80)) {
2382 count = in - ctxt->input->cur;
2383 ret = xmlDictLookupxmlDictLookup__internal_alias(ctxt->dict, ctxt->input->cur, count);
2384 ctxt->input->cur = in;
2385 ctxt->nbChars += count;
2386 ctxt->input->col += count;
2387 return(ret);
2388 }
2389 }
2390 return(htmlParseNameComplex(ctxt));
2391}
2392
2393static const xmlChar *
2394htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2395 int len = 0, l;
2396 int c;
2397 int count = 0;
2398
2399 /*
2400 * Handler for more complex cases
2401 */
2402 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2403 c = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2404 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2405 (!IS_LETTER(c)((((c) < 0x100) ? (((0x41 <= ((c))) && (((c)) <=
0x5a)) || ((0x61 <= ((c))) && (((c)) <= 0x7a))
|| ((0xc0 <= ((c))) && (((c)) <= 0xd6)) || ((0xd8
<= ((c))) && (((c)) <= 0xf6)) || (0xf8 <= (
(c)))) : xmlCharInRange__internal_alias((c), &xmlIsBaseCharGroup
)) || (((c) < 0x100) ? 0 : (((0x4e00 <= (c)) &&
((c) <= 0x9fa5)) || ((c) == 0x3007) || ((0x3021 <= (c)
) && ((c) <= 0x3029)))))
&& (c != '_') &&
2406 (c != ':'))) {
2407 return(NULL((void*)0));
2408 }
2409
2410 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2411 ((IS_LETTER(c)((((c) < 0x100) ? (((0x41 <= ((c))) && (((c)) <=
0x5a)) || ((0x61 <= ((c))) && (((c)) <= 0x7a))
|| ((0xc0 <= ((c))) && (((c)) <= 0xd6)) || ((0xd8
<= ((c))) && (((c)) <= 0xf6)) || (0xf8 <= (
(c)))) : xmlCharInRange__internal_alias((c), &xmlIsBaseCharGroup
)) || (((c) < 0x100) ? 0 : (((0x4e00 <= (c)) &&
((c) <= 0x9fa5)) || ((c) == 0x3007) || ((0x3021 <= (c)
) && ((c) <= 0x3029)))))
) || (IS_DIGIT(c)(((c) < 0x100) ? (((0x30 <= ((c))) && (((c)) <=
0x39))) : xmlCharInRange__internal_alias((c), &xmlIsDigitGroup
))
) ||
2412 (c == '.') || (c == '-') ||
2413 (c == '_') || (c == ':') ||
2414 (IS_COMBINING(c)(((c) < 0x100) ? 0 : xmlCharInRange__internal_alias((c), &
xmlIsCombiningGroup))
) ||
2415 (IS_EXTENDER(c)(((c) < 0x100) ? ((((c)) == 0xb7)) : xmlCharInRange__internal_alias
((c), &xmlIsExtenderGroup))
))) {
2416 if (count++ > 100) {
2417 count = 0;
2418 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2419 }
2420 len += l;
2421 NEXTL(l)do { if (*(ctxt->input->cur) == '\n') { ctxt->input->
line++; ctxt->input->col = 1; } else ctxt->input->
col++; ctxt->token = 0; ctxt->input->cur += l; ctxt->
nbChars++; } while (0)
;
2422 c = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2423 }
2424 return(xmlDictLookupxmlDictLookup__internal_alias(ctxt->dict, ctxt->input->cur - len, len));
2425}
2426
2427
2428/**
2429 * htmlParseHTMLAttribute:
2430 * @ctxt: an HTML parser context
2431 * @stop: a char stop value
2432 *
2433 * parse an HTML attribute value till the stop (quote), if
2434 * stop is 0 then it stops at the first space
2435 *
2436 * Returns the attribute parsed or NULL
2437 */
2438
2439static xmlChar *
2440htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {
2441 xmlChar *buffer = NULL((void*)0);
2442 int buffer_size = 0;
2443 xmlChar *out = NULL((void*)0);
2444 const xmlChar *name = NULL((void*)0);
2445 const xmlChar *cur = NULL((void*)0);
2446 const htmlEntityDesc * ent;
2447
2448 /*
2449 * allocate a translation buffer.
2450 */
2451 buffer_size = HTML_PARSER_BUFFER_SIZE100;
2452 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
2453 if (buffer == NULL((void*)0)) {
2454 htmlErrMemory(ctxt, "buffer allocation failed\n");
2455 return(NULL((void*)0));
2456 }
2457 out = buffer;
2458
2459 /*
2460 * Ok loop until we reach one of the ending chars
2461 */
2462 while ((CUR((int) (*ctxt->input->cur)) != 0) && (CUR((int) (*ctxt->input->cur)) != stop)) {
2463 if ((stop == 0) && (CUR((int) (*ctxt->input->cur)) == '>')) break;
2464 if ((stop == 0) && (IS_BLANK_CH(CUR)(((((int) (*ctxt->input->cur))) == 0x20) || ((0x9 <=
(((int) (*ctxt->input->cur)))) && ((((int) (*ctxt
->input->cur))) <= 0xa)) || ((((int) (*ctxt->input
->cur))) == 0xd))
)) break;
2465 if (CUR((int) (*ctxt->input->cur)) == '&') {
2466 if (NXT(1)ctxt->input->cur[(1)] == '#') {
2467 unsigned int c;
2468 int bits;
2469
2470 c = htmlParseCharRef(ctxt);
2471 if (c < 0x80)
2472 { *out++ = c; bits= -6; }
2473 else if (c < 0x800)
2474 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2475 else if (c < 0x10000)
2476 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2477 else
2478 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2479
2480 for ( ; bits >= 0; bits-= 6) {
2481 *out++ = ((c >> bits) & 0x3F) | 0x80;
2482 }
2483
2484 if (out - buffer > buffer_size - 100) {
2485 int indx = out - buffer;
2486
2487 growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
;
2488 out = &buffer[indx];
2489 }
2490 } else {
2491 ent = htmlParseEntityRef(ctxt, &name);
2492 if (name == NULL((void*)0)) {
2493 *out++ = '&';
2494 if (out - buffer > buffer_size - 100) {
2495 int indx = out - buffer;
2496
2497 growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
;
2498 out = &buffer[indx];
2499 }
2500 } else if (ent == NULL((void*)0)) {
2501 *out++ = '&';
2502 cur = name;
2503 while (*cur != 0) {
2504 if (out - buffer > buffer_size - 100) {
2505 int indx = out - buffer;
2506
2507 growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
;
2508 out = &buffer[indx];
2509 }
2510 *out++ = *cur++;
2511 }
2512 } else {
2513 unsigned int c;
2514 int bits;
2515
2516 if (out - buffer > buffer_size - 100) {
2517 int indx = out - buffer;
2518
2519 growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
;
2520 out = &buffer[indx];
2521 }
2522 c = ent->value;
2523 if (c < 0x80)
2524 { *out++ = c; bits= -6; }
2525 else if (c < 0x800)
2526 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2527 else if (c < 0x10000)
2528 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2529 else
2530 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2531
2532 for ( ; bits >= 0; bits-= 6) {
2533 *out++ = ((c >> bits) & 0x3F) | 0x80;
2534 }
2535 }
2536 }
2537 } else {
2538 unsigned int c;
2539 int bits, l;
2540
2541 if (out - buffer > buffer_size - 100) {
2542 int indx = out - buffer;
2543
2544 growBuffer(buffer){ xmlChar *tmp; buffer_size *= 2; tmp = (xmlChar *) xmlRealloc
(buffer, buffer_size * sizeof(xmlChar)); if (tmp == ((void*)0
)) { htmlErrMemory(ctxt, "growing buffer\n"); xmlFree(buffer)
; return(((void*)0)); } buffer = tmp; }
;
2545 out = &buffer[indx];
2546 }
2547 c = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2548 if (c < 0x80)
2549 { *out++ = c; bits= -6; }
2550 else if (c < 0x800)
2551 { *out++ =((c >> 6) & 0x1F) | 0xC0; bits= 0; }
2552 else if (c < 0x10000)
2553 { *out++ =((c >> 12) & 0x0F) | 0xE0; bits= 6; }
2554 else
2555 { *out++ =((c >> 18) & 0x07) | 0xF0; bits= 12; }
2556
2557 for ( ; bits >= 0; bits-= 6) {
2558 *out++ = ((c >> bits) & 0x3F) | 0x80;
2559 }
2560 NEXTxmlNextChar__internal_alias(ctxt);
2561 }
2562 }
2563 *out++ = 0;
Value stored to 'out' is never read
2564 return(buffer);
2565}
2566
2567/**
2568 * htmlParseEntityRef:
2569 * @ctxt: an HTML parser context
2570 * @str: location to store the entity name
2571 *
2572 * parse an HTML ENTITY references
2573 *
2574 * [68] EntityRef ::= '&' Name ';'
2575 *
2576 * Returns the associated htmlEntityDescPtr if found, or NULL otherwise,
2577 * if non-NULL *str will have to be freed by the caller.
2578 */
2579const htmlEntityDesc *
2580htmlParseEntityRef(htmlParserCtxtPtr ctxt, const xmlChar **str) {
2581 const xmlChar *name;
2582 const htmlEntityDesc * ent = NULL((void*)0);
2583
2584 if (str != NULL((void*)0)) *str = NULL((void*)0);
2585 if ((ctxt == NULL((void*)0)) || (ctxt->input == NULL((void*)0))) return(NULL((void*)0));
2586
2587 if (CUR((int) (*ctxt->input->cur)) == '&') {
2588 NEXTxmlNextChar__internal_alias(ctxt);
2589 name = htmlParseName(ctxt);
2590 if (name == NULL((void*)0)) {
2591 htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
2592 "htmlParseEntityRef: no name\n", NULL((void*)0), NULL((void*)0));
2593 } else {
2594 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2595 if (CUR((int) (*ctxt->input->cur)) == ';') {
2596 if (str != NULL((void*)0))
2597 *str = name;
2598
2599 /*
2600 * Lookup the entity in the table.
2601 */
2602 ent = htmlEntityLookup(name);
2603 if (ent != NULL((void*)0)) /* OK that's ugly !!! */
2604 NEXTxmlNextChar__internal_alias(ctxt);
2605 } else {
2606 htmlParseErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING,
2607 "htmlParseEntityRef: expecting ';'\n",
2608 NULL((void*)0), NULL((void*)0));
2609 if (str != NULL((void*)0))
2610 *str = name;
2611 }
2612 }
2613 }
2614 return(ent);
2615}
2616
2617/**
2618 * htmlParseAttValue:
2619 * @ctxt: an HTML parser context
2620 *
2621 * parse a value for an attribute
2622 * Note: the parser won't do substitution of entities here, this
2623 * will be handled later in xmlStringGetNodeList, unless it was
2624 * asked for ctxt->replaceEntities != 0
2625 *
2626 * Returns the AttValue parsed or NULL.
2627 */
2628
2629static xmlChar *
2630htmlParseAttValue(htmlParserCtxtPtr ctxt) {
2631 xmlChar *ret = NULL((void*)0);
2632
2633 if (CUR((int) (*ctxt->input->cur)) == '"') {
2634 NEXTxmlNextChar__internal_alias(ctxt);
2635 ret = htmlParseHTMLAttribute(ctxt, '"');
2636 if (CUR((int) (*ctxt->input->cur)) != '"') {
2637 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2638 "AttValue: \" expected\n", NULL((void*)0), NULL((void*)0));
2639 } else
2640 NEXTxmlNextChar__internal_alias(ctxt);
2641 } else if (CUR((int) (*ctxt->input->cur)) == '\'') {
2642 NEXTxmlNextChar__internal_alias(ctxt);
2643 ret = htmlParseHTMLAttribute(ctxt, '\'');
2644 if (CUR((int) (*ctxt->input->cur)) != '\'') {
2645 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2646 "AttValue: ' expected\n", NULL((void*)0), NULL((void*)0));
2647 } else
2648 NEXTxmlNextChar__internal_alias(ctxt);
2649 } else {
2650 /*
2651 * That's an HTMLism, the attribute value may not be quoted
2652 */
2653 ret = htmlParseHTMLAttribute(ctxt, 0);
2654 if (ret == NULL((void*)0)) {
2655 htmlParseErr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
2656 "AttValue: no value found\n", NULL((void*)0), NULL((void*)0));
2657 }
2658 }
2659 return(ret);
2660}
2661
2662/**
2663 * htmlParseSystemLiteral:
2664 * @ctxt: an HTML parser context
2665 *
2666 * parse an HTML Literal
2667 *
2668 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2669 *
2670 * Returns the SystemLiteral parsed or NULL
2671 */
2672
2673static xmlChar *
2674htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
2675 const xmlChar *q;
2676 xmlChar *ret = NULL((void*)0);
2677
2678 if (CUR((int) (*ctxt->input->cur)) == '"') {
2679 NEXTxmlNextChar__internal_alias(ctxt);
2680 q = CUR_PTRctxt->input->cur;
2681 while ((IS_CHAR_CH(CUR)(((0x9 <= (((int) (*ctxt->input->cur)))) && (
(((int) (*ctxt->input->cur))) <= 0xa)) || ((((int) (
*ctxt->input->cur))) == 0xd) || (0x20 <= (((int) (*ctxt
->input->cur)))))
) && (CUR((int) (*ctxt->input->cur)) != '"'))
2682 NEXTxmlNextChar__internal_alias(ctxt);
2683 if (!IS_CHAR_CH(CUR)(((0x9 <= (((int) (*ctxt->input->cur)))) && (
(((int) (*ctxt->input->cur))) <= 0xa)) || ((((int) (
*ctxt->input->cur))) == 0xd) || (0x20 <= (((int) (*ctxt
->input->cur)))))
) {
2684 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2685 "Unfinished SystemLiteral\n", NULL((void*)0), NULL((void*)0));
2686 } else {
2687 ret = xmlStrndupxmlStrndup__internal_alias(q, CUR_PTRctxt->input->cur - q);
2688 NEXTxmlNextChar__internal_alias(ctxt);
2689 }
2690 } else if (CUR((int) (*ctxt->input->cur)) == '\'') {
2691 NEXTxmlNextChar__internal_alias(ctxt);
2692 q = CUR_PTRctxt->input->cur;
2693 while ((IS_CHAR_CH(CUR)(((0x9 <= (((int) (*ctxt->input->cur)))) && (
(((int) (*ctxt->input->cur))) <= 0xa)) || ((((int) (
*ctxt->input->cur))) == 0xd) || (0x20 <= (((int) (*ctxt
->input->cur)))))
) && (CUR((int) (*ctxt->input->cur)) != '\''))
2694 NEXTxmlNextChar__internal_alias(ctxt);
2695 if (!IS_CHAR_CH(CUR)(((0x9 <= (((int) (*ctxt->input->cur)))) && (
(((int) (*ctxt->input->cur))) <= 0xa)) || ((((int) (
*ctxt->input->cur))) == 0xd) || (0x20 <= (((int) (*ctxt
->input->cur)))))
) {
2696 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2697 "Unfinished SystemLiteral\n", NULL((void*)0), NULL((void*)0));
2698 } else {
2699 ret = xmlStrndupxmlStrndup__internal_alias(q, CUR_PTRctxt->input->cur - q);
2700 NEXTxmlNextChar__internal_alias(ctxt);
2701 }
2702 } else {
2703 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2704 " or ' expected\n", NULL((void*)0), NULL((void*)0));
2705 }
2706
2707 return(ret);
2708}
2709
2710/**
2711 * htmlParsePubidLiteral:
2712 * @ctxt: an HTML parser context
2713 *
2714 * parse an HTML public literal
2715 *
2716 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2717 *
2718 * Returns the PubidLiteral parsed or NULL.
2719 */
2720
2721static xmlChar *
2722htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
2723 const xmlChar *q;
2724 xmlChar *ret = NULL((void*)0);
2725 /*
2726 * Name ::= (Letter | '_') (NameChar)*
2727 */
2728 if (CUR((int) (*ctxt->input->cur)) == '"') {
2729 NEXTxmlNextChar__internal_alias(ctxt);
2730 q = CUR_PTRctxt->input->cur;
2731 while (IS_PUBIDCHAR_CH(CUR)(xmlIsPubidChar_tab[(((int) (*ctxt->input->cur)))])) NEXTxmlNextChar__internal_alias(ctxt);
2732 if (CUR((int) (*ctxt->input->cur)) != '"') {
2733 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2734 "Unfinished PubidLiteral\n", NULL((void*)0), NULL((void*)0));
2735 } else {
2736 ret = xmlStrndupxmlStrndup__internal_alias(q, CUR_PTRctxt->input->cur - q);
2737 NEXTxmlNextChar__internal_alias(ctxt);
2738 }
2739 } else if (CUR((int) (*ctxt->input->cur)) == '\'') {
2740 NEXTxmlNextChar__internal_alias(ctxt);
2741 q = CUR_PTRctxt->input->cur;
2742 while ((IS_PUBIDCHAR_CH(CUR)(xmlIsPubidChar_tab[(((int) (*ctxt->input->cur)))])) && (CUR((int) (*ctxt->input->cur)) != '\''))
2743 NEXTxmlNextChar__internal_alias(ctxt);
2744 if (CUR((int) (*ctxt->input->cur)) != '\'') {
2745 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
2746 "Unfinished PubidLiteral\n", NULL((void*)0), NULL((void*)0));
2747 } else {
2748 ret = xmlStrndupxmlStrndup__internal_alias(q, CUR_PTRctxt->input->cur - q);
2749 NEXTxmlNextChar__internal_alias(ctxt);
2750 }
2751 } else {
2752 htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
2753 "PubidLiteral \" or ' expected\n", NULL((void*)0), NULL((void*)0));
2754 }
2755
2756 return(ret);
2757}
2758
2759/**
2760 * htmlParseScript:
2761 * @ctxt: an HTML parser context
2762 *
2763 * parse the content of an HTML SCRIPT or STYLE element
2764 * http://www.w3.org/TR/html4/sgml/dtd.html#Script
2765 * http://www.w3.org/TR/html4/sgml/dtd.html#StyleSheet
2766 * http://www.w3.org/TR/html4/types.html#type-script
2767 * http://www.w3.org/TR/html4/types.html#h-6.15
2768 * http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.2.1
2769 *
2770 * Script data ( %Script; in the DTD) can be the content of the SCRIPT
2771 * element and the value of intrinsic event attributes. User agents must
2772 * not evaluate script data as HTML markup but instead must pass it on as
2773 * data to a script engine.
2774 * NOTES:
2775 * - The content is passed like CDATA
2776 * - the attributes for style and scripting "onXXX" are also described
2777 * as CDATA but SGML allows entities references in attributes so their
2778 * processing is identical as other attributes
2779 */
2780static void
2781htmlParseScript(htmlParserCtxtPtr ctxt) {
2782 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE1000 + 5];
2783 int nbchar = 0;
2784 int cur,l;
2785
2786 SHRINKif ((ctxt->input->cur - ctxt->input->base > 2 *
250) && (ctxt->input->end - ctxt->input->
cur < 2 * 250)) xmlParserInputShrink__internal_alias(ctxt->
input)
;
2787 cur = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2788 while (IS_CHAR_CH(cur)(((0x9 <= (cur)) && ((cur) <= 0xa)) || ((cur) ==
0xd) || (0x20 <= (cur)))
) {
2789 if ((cur == '<') && (NXT(1)ctxt->input->cur[(1)] == '/')) {
2790 /*
2791 * One should break here, the specification is clear:
2792 * Authors should therefore escape "</" within the content.
2793 * Escape mechanisms are specific to each scripting or
2794 * style sheet language.
2795 *
2796 * In recovery mode, only break if end tag match the
2797 * current tag, effectively ignoring all tags inside the
2798 * script/style block and treating the entire block as
2799 * CDATA.
2800 */
2801 if (ctxt->recovery) {
2802 if (xmlStrncasecmpxmlStrncasecmp__internal_alias(ctxt->name, ctxt->input->cur+2,
2803 xmlStrlenxmlStrlen__internal_alias(ctxt->name)) == 0)
2804 {
2805 break; /* while */
2806 } else {
2807 htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
2808 "Element %s embeds close tag\n",
2809 ctxt->name, NULL((void*)0));
2810 }
2811 } else {
2812 if (((NXT(2)ctxt->input->cur[(2)] >= 'A') && (NXT(2)ctxt->input->cur[(2)] <= 'Z')) ||
2813 ((NXT(2)ctxt->input->cur[(2)] >= 'a') && (NXT(2)ctxt->input->cur[(2)] <= 'z')))
2814 {
2815 break; /* while */
2816 }
2817 }
2818 }
2819 COPY_BUF(l,buf,nbchar,cur)if (l == 1) buf[nbchar++] = (xmlChar) cur; else nbchar += xmlCopyChar__internal_alias
(l,&buf[nbchar],cur)
;
2820 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE1000) {
2821 if (ctxt->sax->cdataBlock!= NULL((void*)0)) {
2822 /*
2823 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2824 */
2825 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2826 } else if (ctxt->sax->characters != NULL((void*)0)) {
2827 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2828 }
2829 nbchar = 0;
2830 }
2831 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2832 NEXTL(l)do { if (*(ctxt->input->cur) == '\n') { ctxt->input->
line++; ctxt->input->col = 1; } else ctxt->input->
col++; ctxt->token = 0; ctxt->input->cur += l; ctxt->
nbChars++; } while (0)
;
2833 cur = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2834 }
2835
2836 if ((!(IS_CHAR_CH(cur)(((0x9 <= (cur)) && ((cur) <= 0xa)) || ((cur) ==
0xd) || (0x20 <= (cur)))
)) && (!((cur == 0) && (ctxt->progressive)))) {
2837 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2838 "Invalid char in CDATA 0x%X\n", cur);
2839 NEXTxmlNextChar__internal_alias(ctxt);
2840 }
2841
2842 if ((nbchar != 0) && (ctxt->sax != NULL((void*)0)) && (!ctxt->disableSAX)) {
2843 if (ctxt->sax->cdataBlock!= NULL((void*)0)) {
2844 /*
2845 * Insert as CDATA, which is the same as HTML_PRESERVE_NODE
2846 */
2847 ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
2848 } else if (ctxt->sax->characters != NULL((void*)0)) {
2849 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2850 }
2851 }
2852}
2853
2854
2855/**
2856 * htmlParseCharData:
2857 * @ctxt: an HTML parser context
2858 *
2859 * parse a CharData section.
2860 * if we are within a CDATA section ']]>' marks an end of section.
2861 *
2862 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2863 */
2864
2865static void
2866htmlParseCharData(htmlParserCtxtPtr ctxt) {
2867 xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE1000 + 5];
2868 int nbchar = 0;
2869 int cur, l;
2870 int chunk = 0;
2871
2872 SHRINKif ((ctxt->input->cur - ctxt->input->base > 2 *
250) && (ctxt->input->end - ctxt->input->
cur < 2 * 250)) xmlParserInputShrink__internal_alias(ctxt->
input)
;
2873 cur = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2874 while (((cur != '<') || (ctxt->token == '<')) &&
2875 ((cur != '&') || (ctxt->token == '&')) &&
2876 (cur != 0)) {
2877 if (!(IS_CHAR(cur)(((cur) < 0x100) ? (((0x9 <= ((cur))) && (((cur
)) <= 0xa)) || (((cur)) == 0xd) || (0x20 <= ((cur)))) :
(((0x100 <= (cur)) && ((cur) <= 0xd7ff)) || ((
0xe000 <= (cur)) && ((cur) <= 0xfffd)) || ((0x10000
<= (cur)) && ((cur) <= 0x10ffff))))
)) {
2878 htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
2879 "Invalid char in CDATA 0x%X\n", cur);
2880 } else {
2881 COPY_BUF(l,buf,nbchar,cur)if (l == 1) buf[nbchar++] = (xmlChar) cur; else nbchar += xmlCopyChar__internal_alias
(l,&buf[nbchar],cur)
;
2882 }
2883 if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE1000) {
2884 /*
2885 * Ok the segment is to be consumed as chars.
2886 */
2887 if ((ctxt->sax != NULL((void*)0)) && (!ctxt->disableSAX)) {
2888 if (areBlanks(ctxt, buf, nbchar)) {
2889 if (ctxt->sax->ignorableWhitespace != NULL((void*)0))
2890 ctxt->sax->ignorableWhitespace(ctxt->userData,
2891 buf, nbchar);
2892 } else {
2893 htmlCheckParagraph(ctxt);
2894 if (ctxt->sax->characters != NULL((void*)0))
2895 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2896 }
2897 }
2898 nbchar = 0;
2899 }
2900 NEXTL(l)do { if (*(ctxt->input->cur) == '\n') { ctxt->input->
line++; ctxt->input->col = 1; } else ctxt->input->
col++; ctxt->token = 0; ctxt->input->cur += l; ctxt->
nbChars++; } while (0)
;
2901 chunk++;
2902 if (chunk > HTML_PARSER_BUFFER_SIZE100) {
2903 chunk = 0;
2904 SHRINKif ((ctxt->input->cur - ctxt->input->base > 2 *
250) && (ctxt->input->end - ctxt->input->
cur < 2 * 250)) xmlParserInputShrink__internal_alias(ctxt->
input)
;
2905 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2906 }
2907 cur = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2908 if (cur == 0) {
2909 SHRINKif ((ctxt->input->cur - ctxt->input->base > 2 *
250) && (ctxt->input->end - ctxt->input->
cur < 2 * 250)) xmlParserInputShrink__internal_alias(ctxt->
input)
;
2910 GROWif ((ctxt->progressive == 0) && (ctxt->input->
end - ctxt->input->cur < 250)) xmlParserInputGrow__internal_alias
(ctxt->input, 250)
;
2911 cur = CUR_CHAR(l)htmlCurrentChar(ctxt, &l);
2912 }
2913 }
2914 if (nbchar != 0) {
2915 buf[nbchar] = 0;
2916
2917 /*
2918 * Ok the segment is to be consumed as chars.
2919 */
2920 if ((ctxt->sax != NULL((void*)0)) && (!ctxt->disableSAX)) {
2921 if (areBlanks(ctxt, buf, nbchar)) {
2922 if (ctxt->sax->ignorableWhitespace != NULL((void*)0))
2923 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2924 } else {
2925 htmlCheckParagraph(ctxt);
2926 if (ctxt->sax->characters != NULL((void*)0))
2927 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2928 }
2929 }
2930 } else {
2931 /*
2932 * Loop detection
2933 */
2934 if (cur == 0)
2935 ctxt->instate = XML_PARSER_EOF;
2936 }
2937}
2938
2939/**
2940 * htmlParseExternalID:
2941 * @ctxt: an HTML parser context
2942 * @publicID: a xmlChar** receiving PubidLiteral
2943 *
2944 * Parse an External ID or a Public ID
2945 *
2946 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2947 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2948 *
2949 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2950 *
2951 * Returns the function returns SystemLiteral and in the second
2952 * case publicID receives PubidLiteral, is strict is off
2953 * it is possible to return NULL and have publicID set.
2954 */
2955
2956static xmlChar *
2957htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
2958 xmlChar *URI = NULL((void*)0);
2959
2960 if ((UPPER(toupper(*ctxt->input->cur)) == 'S') && (UPP(1)(toupper(ctxt->input->cur[(1)])) == 'Y') &&
2961 (UPP(2)(toupper(ctxt->input->cur[(2)])) == 'S') && (UPP(3)(toupper(ctxt->input->cur[(3)])) == 'T') &&
2962 (UPP(4)(toupper(ctxt->input->cur[(4)])) == 'E') && (UPP(5)(toupper(ctxt->input->cur[(5)])) == 'M')) {
2963 SKIP(6)ctxt->nbChars += (6),ctxt->input->cur += (6),ctxt->
input->col+=(6)
;
2964 if (!IS_BLANK_CH(CUR)(((((int) (*ctxt->input->cur))) == 0x20) || ((0x9 <=
(((int) (*ctxt->input->cur)))) && ((((int) (*ctxt
->input->cur))) <= 0xa)) || ((((int) (*ctxt->input
->cur))) == 0xd))
) {
2965 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2966 "Space required after 'SYSTEM'\n", NULL((void*)0), NULL((void*)0));
2967 }
2968 SKIP_BLANKShtmlSkipBlankChars(ctxt);
2969 URI = htmlParseSystemLiteral(ctxt);
2970 if (URI == NULL((void*)0)) {
2971 htmlParseErr(ctxt, XML_ERR_URI_REQUIRED,
2972 "htmlParseExternalID: SYSTEM, no URI\n", NULL((void*)0), NULL((void*)0));
2973 }
2974 } else if ((UPPER(toupper(*ctxt->input->cur)) == 'P') && (UPP(1)(toupper(ctxt->input->cur[(1)])) == 'U') &&
2975 (UPP(2)(toupper(ctxt->input->cur[(2)])) == 'B') && (UPP(3)(toupper(ctxt->input->cur[(3)])) == 'L') &&
2976 (UPP(4)(toupper(ctxt->input->cur[(4)])) == 'I') && (UPP(5)(toupper(ctxt->input->cur[(5)])) == 'C')) {
2977 SKIP(6)ctxt->nbChars += (6),ctxt->input->cur += (6),ctxt->
input->col+=(6)
;
2978 if (!IS_BLANK_CH(CUR)(((((int) (*ctxt->input->cur))) == 0x20) || ((0x9 <=
(((int) (*ctxt->input->cur)))) && ((((int) (*ctxt
->input->cur))) <= 0xa)) || ((((int) (*ctxt->input
->cur))) == 0xd))
) {
2979 htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
2980 "Space required after 'PUBLIC'\n", NULL((void*)0), NULL((void*)0));
2981 }
2982 SKIP_BLANKShtmlSkipBlankChars(ctxt);
2983 *publicID = htmlParsePubidLiteral(ctxt);
2984 if (*publicID == NULL((void*)0)) {
2985 htmlParseErr(ctxt, XML_ERR_PUBID_REQUIRED,
2986 "htmlParseExternalID: PUBLIC, no Public Identifier\n",
2987 NULL((void*)0), NULL((void*)0));
2988 }
2989 SKIP_BLANKShtmlSkipBlankChars(ctxt);
2990 if<