From 5d308865d0783d0cd70f7453c77980835ac5648e Mon Sep 17 00:00:00 2001 From: Angelo Mantellini Date: Wed, 21 Mar 2018 14:16:02 +0100 Subject: update android-sdk. Now it is possible to compile with clang Change-Id: I156aa48dd90467a2a7540eec11839c0111b13bd2 Signed-off-by: Angelo Mantellini --- external/libxml2_android/jni/libxml2/parser.c | 15783 ------------------------ 1 file changed, 15783 deletions(-) delete mode 100644 external/libxml2_android/jni/libxml2/parser.c (limited to 'external/libxml2_android/jni/libxml2/parser.c') diff --git a/external/libxml2_android/jni/libxml2/parser.c b/external/libxml2_android/jni/libxml2/parser.c deleted file mode 100644 index df2efa55..00000000 --- a/external/libxml2_android/jni/libxml2/parser.c +++ /dev/null @@ -1,15783 +0,0 @@ -/* - * parser.c : an XML 1.0 parser, namespaces and validity support are mostly - * implemented on top of the SAX interfaces - * - * References: - * The XML specification: - * http://www.w3.org/TR/REC-xml - * Original 1.0 version: - * http://www.w3.org/TR/1998/REC-xml-19980210 - * XML second edition working draft - * http://www.w3.org/TR/2000/WD-xml-2e-20000814 - * - * Okay this is a big file, the parser core is around 7000 lines, then it - * is followed by the progressive parser top routines, then the various - * high level APIs to call the parser and a few miscellaneous functions. - * A number of helper functions and deprecated ones have been moved to - * parserInternals.c to reduce this file size. - * As much as possible the functions are associated with their relative - * production in the XML specification. A few productions defining the - * different ranges of character are actually implanted either in - * parserInternals.h or parserInternals.c - * The DOM tree build is realized from the default SAX callbacks in - * the module SAX.c. - * The routines doing the validation checks are in valid.c and called either - * from the SAX callbacks or as standalone functions using a preparsed - * document. - * - * See Copyright for the status of this software. - * - * daniel@veillard.com - */ - -#define IN_LIBXML -#include "libxml.h" - -#if defined(WIN32) && !defined (__CYGWIN__) -#define XML_DIR_SEP '\\' -#else -#define XML_DIR_SEP '/' -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef LIBXML_CATALOG_ENABLED -#include -#endif -#ifdef LIBXML_SCHEMAS_ENABLED -#include -#include -#endif -#ifdef HAVE_CTYPE_H -#include -#endif -#ifdef HAVE_STDLIB_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_ZLIB_H -#include -#endif -#ifdef HAVE_LZMA_H -#include -#endif - -#include "buf.h" -#include "enc.h" - -static void -xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); - -static xmlParserCtxtPtr -xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, - const xmlChar *base, xmlParserCtxtPtr pctx); - -static void xmlHaltParser(xmlParserCtxtPtr ctxt); - -/************************************************************************ - * * - * Arbitrary limits set in the parser. See XML_PARSE_HUGE * - * * - ************************************************************************/ - -#define XML_PARSER_BIG_ENTITY 1000 -#define XML_PARSER_LOT_ENTITY 5000 - -/* - * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity - * replacement over the size in byte of the input indicates that you have - * and eponential behaviour. A value of 10 correspond to at least 3 entity - * replacement per byte of input. - */ -#define XML_PARSER_NON_LINEAR 10 - -/* - * xmlParserEntityCheck - * - * Function to check non-linear entity expansion behaviour - * This is here to detect and stop exponential linear entity expansion - * This is not a limitation of the parser but a safety - * boundary feature. It can be disabled with the XML_PARSE_HUGE - * parser option. - */ -static int -xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, - xmlEntityPtr ent, size_t replacement) -{ - size_t consumed = 0; - - if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) - return (0); - if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) - return (1); - - /* - * This may look absurd but is needed to detect - * entities problems - */ - if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL) && (ent->checked == 0) && - (ctxt->errNo != XML_ERR_ENTITY_LOOP)) { - unsigned long oldnbent = ctxt->nbentities; - xmlChar *rep; - - ent->checked = 1; - - ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, 0, 0, 0); - --ctxt->depth; - if (ctxt->errNo == XML_ERR_ENTITY_LOOP) { - ent->content[0] = 0; - } - - ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; - if (rep != NULL) { - if (xmlStrchr(rep, '<')) - ent->checked |= 1; - xmlFree(rep); - rep = NULL; - } - } - if (replacement != 0) { - if (replacement < XML_MAX_TEXT_LENGTH) - return(0); - - /* - * If the volume of entity copy reaches 10 times the - * amount of parsed data and over the large text threshold - * then that's very likely to be an abuse. - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - if (replacement < XML_PARSER_NON_LINEAR * consumed) - return(0); - } else if (size != 0) { - /* - * Do the check based on the replacement size of the entity - */ - if (size < XML_PARSER_BIG_ENTITY) - return(0); - - /* - * A limit on the amount of text data reasonably used - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - if ((size < XML_PARSER_NON_LINEAR * consumed) && - (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) - return (0); - } else if (ent != NULL) { - /* - * use the number of parsed entities in the replacement - */ - size = ent->checked / 2; - - /* - * The amount of data parsed counting entities size only once - */ - if (ctxt->input != NULL) { - consumed = ctxt->input->consumed + - (ctxt->input->cur - ctxt->input->base); - } - consumed += ctxt->sizeentities; - - /* - * Check the density of entities for the amount of data - * knowing an entity reference will take at least 3 bytes - */ - if (size * 3 < consumed * XML_PARSER_NON_LINEAR) - return (0); - } else { - /* - * strange we got no data for checking - */ - if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) && - (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) || - (ctxt->nbentities <= 10000)) - return (0); - } - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return (1); -} - -/** - * xmlParserMaxDepth: - * - * arbitrary depth limit for the XML documents that we allow to - * process. This is not a limitation of the parser but a safety - * boundary feature. It can be disabled with the XML_PARSE_HUGE - * parser option. - */ -unsigned int xmlParserMaxDepth = 256; - - - -#define SAX2 1 -#define XML_PARSER_BIG_BUFFER_SIZE 300 -#define XML_PARSER_BUFFER_SIZE 100 -#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" - -/** - * XML_PARSER_CHUNK_SIZE - * - * When calling GROW that's the minimal amount of data - * the parser expected to have received. It is not a hard - * limit but an optimization when reading strings like Names - * It is not strictly needed as long as inputs available characters - * are followed by 0, which should be provided by the I/O level - */ -#define XML_PARSER_CHUNK_SIZE 100 - -/* - * List of XML prefixed PI allowed by W3C specs - */ - -static const char *xmlW3CPIs[] = { - "xml-stylesheet", - "xml-model", - NULL -}; - - -/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ -static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, - const xmlChar **str); - -static xmlParserErrors -xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, - xmlSAXHandlerPtr sax, - void *user_data, int depth, const xmlChar *URL, - const xmlChar *ID, xmlNodePtr *list); - -static int -xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, - const char *encoding); -#ifdef LIBXML_LEGACY_ENABLED -static void -xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, - xmlNodePtr lastNode); -#endif /* LIBXML_LEGACY_ENABLED */ - -static xmlParserErrors -xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, - const xmlChar *string, void *user_data, xmlNodePtr *lst); - -static int -xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); - -/************************************************************************ - * * - * Some factorized error routines * - * * - ************************************************************************/ - -/** - * xmlErrAttributeDup: - * @ctxt: an XML parser context - * @prefix: the attribute prefix - * @localname: the attribute localname - * - * Handle a redefinition of attribute error - */ -static void -xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, - const xmlChar * localname) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; - - if (prefix == NULL) - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, - (const char *) localname, NULL, NULL, 0, 0, - "Attribute %s redefined\n", localname); - else - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, - XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, - (const char *) prefix, (const char *) localname, - NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, - localname); - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlFatalErr: - * @ctxt: an XML parser context - * @error: the error number - * @extra: extra information string - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void -xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info) -{ - const char *errmsg; - - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - switch (error) { - case XML_ERR_INVALID_HEX_CHARREF: - errmsg = "CharRef: invalid hexadecimal value"; - break; - case XML_ERR_INVALID_DEC_CHARREF: - errmsg = "CharRef: invalid decimal value"; - break; - case XML_ERR_INVALID_CHARREF: - errmsg = "CharRef: invalid value"; - break; - case XML_ERR_INTERNAL_ERROR: - errmsg = "internal error"; - break; - case XML_ERR_PEREF_AT_EOF: - errmsg = "PEReference at end of document"; - break; - case XML_ERR_PEREF_IN_PROLOG: - errmsg = "PEReference in prolog"; - break; - case XML_ERR_PEREF_IN_EPILOG: - errmsg = "PEReference in epilog"; - break; - case XML_ERR_PEREF_NO_NAME: - errmsg = "PEReference: no name"; - break; - case XML_ERR_PEREF_SEMICOL_MISSING: - errmsg = "PEReference: expecting ';'"; - break; - case XML_ERR_ENTITY_LOOP: - errmsg = "Detected an entity reference loop"; - break; - case XML_ERR_ENTITY_NOT_STARTED: - errmsg = "EntityValue: \" or ' expected"; - break; - case XML_ERR_ENTITY_PE_INTERNAL: - errmsg = "PEReferences forbidden in internal subset"; - break; - case XML_ERR_ENTITY_NOT_FINISHED: - errmsg = "EntityValue: \" or ' expected"; - break; - case XML_ERR_ATTRIBUTE_NOT_STARTED: - errmsg = "AttValue: \" or ' expected"; - break; - case XML_ERR_LT_IN_ATTRIBUTE: - errmsg = "Unescaped '<' not allowed in attributes values"; - break; - case XML_ERR_LITERAL_NOT_STARTED: - errmsg = "SystemLiteral \" or ' expected"; - break; - case XML_ERR_LITERAL_NOT_FINISHED: - errmsg = "Unfinished System or Public ID \" or ' expected"; - break; - case XML_ERR_MISPLACED_CDATA_END: - errmsg = "Sequence ']]>' not allowed in content"; - break; - case XML_ERR_URI_REQUIRED: - errmsg = "SYSTEM or PUBLIC, the URI is missing"; - break; - case XML_ERR_PUBID_REQUIRED: - errmsg = "PUBLIC, the Public Identifier is missing"; - break; - case XML_ERR_HYPHEN_IN_COMMENT: - errmsg = "Comment must not contain '--' (double-hyphen)"; - break; - case XML_ERR_PI_NOT_STARTED: - errmsg = "xmlParsePI : no target name"; - break; - case XML_ERR_RESERVED_XML_NAME: - errmsg = "Invalid PI name"; - break; - case XML_ERR_NOTATION_NOT_STARTED: - errmsg = "NOTATION: Name expected here"; - break; - case XML_ERR_NOTATION_NOT_FINISHED: - errmsg = "'>' required to close NOTATION declaration"; - break; - case XML_ERR_VALUE_REQUIRED: - errmsg = "Entity value required"; - break; - case XML_ERR_URI_FRAGMENT: - errmsg = "Fragment not allowed"; - break; - case XML_ERR_ATTLIST_NOT_STARTED: - errmsg = "'(' required to start ATTLIST enumeration"; - break; - case XML_ERR_NMTOKEN_REQUIRED: - errmsg = "NmToken expected in ATTLIST enumeration"; - break; - case XML_ERR_ATTLIST_NOT_FINISHED: - errmsg = "')' required to finish ATTLIST enumeration"; - break; - case XML_ERR_MIXED_NOT_STARTED: - errmsg = "MixedContentDecl : '|' or ')*' expected"; - break; - case XML_ERR_PCDATA_REQUIRED: - errmsg = "MixedContentDecl : '#PCDATA' expected"; - break; - case XML_ERR_ELEMCONTENT_NOT_STARTED: - errmsg = "ContentDecl : Name or '(' expected"; - break; - case XML_ERR_ELEMCONTENT_NOT_FINISHED: - errmsg = "ContentDecl : ',' '|' or ')' expected"; - break; - case XML_ERR_PEREF_IN_INT_SUBSET: - errmsg = - "PEReference: forbidden within markup decl in internal subset"; - break; - case XML_ERR_GT_REQUIRED: - errmsg = "expected '>'"; - break; - case XML_ERR_CONDSEC_INVALID: - errmsg = "XML conditional section '[' expected"; - break; - case XML_ERR_EXT_SUBSET_NOT_FINISHED: - errmsg = "Content error in the external subset"; - break; - case XML_ERR_CONDSEC_INVALID_KEYWORD: - errmsg = - "conditional section INCLUDE or IGNORE keyword expected"; - break; - case XML_ERR_CONDSEC_NOT_FINISHED: - errmsg = "XML conditional section not closed"; - break; - case XML_ERR_XMLDECL_NOT_STARTED: - errmsg = "Text declaration '' expected"; - break; - case XML_ERR_EXT_ENTITY_STANDALONE: - errmsg = "external parsed entities cannot be standalone"; - break; - case XML_ERR_ENTITYREF_SEMICOL_MISSING: - errmsg = "EntityRef: expecting ';'"; - break; - case XML_ERR_DOCTYPE_NOT_FINISHED: - errmsg = "DOCTYPE improperly terminated"; - break; - case XML_ERR_LTSLASH_REQUIRED: - errmsg = "EndTag: 'errNo = error; - if (info == NULL) { - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n", - errmsg); - } else { - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n", - errmsg, info); - } - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlFatalErrMsg: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlWarningMsg: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @str1: extra data - * @str2: extra data - * - * Handle a warning. - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, const xmlChar *str2) -{ - xmlStructuredErrorFunc schannel = NULL; - - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if ((ctxt != NULL) && (ctxt->sax != NULL) && - (ctxt->sax->initialized == XML_SAX2_MAGIC)) - schannel = ctxt->sax->serror; - if (ctxt != NULL) { - __xmlRaiseError(schannel, - (ctxt->sax) ? ctxt->sax->warning : NULL, - ctxt->userData, - ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_WARNING, NULL, 0, - (const char *) str1, (const char *) str2, NULL, 0, 0, - msg, (const char *) str1, (const char *) str2); - } else { - __xmlRaiseError(schannel, NULL, NULL, - ctxt, NULL, XML_FROM_PARSER, error, - XML_ERR_WARNING, NULL, 0, - (const char *) str1, (const char *) str2, NULL, 0, 0, - msg, (const char *) str1, (const char *) str2); - } -} - -/** - * xmlValidityError: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @str1: extra data - * - * Handle a validity error. - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, const xmlChar *str2) -{ - xmlStructuredErrorFunc schannel = NULL; - - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) { - ctxt->errNo = error; - if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) - schannel = ctxt->sax->serror; - } - if (ctxt != NULL) { - __xmlRaiseError(schannel, - ctxt->vctxt.error, ctxt->vctxt.userData, - ctxt, NULL, XML_FROM_DTD, error, - XML_ERR_ERROR, NULL, 0, (const char *) str1, - (const char *) str2, NULL, 0, 0, - msg, (const char *) str1, (const char *) str2); - ctxt->valid = 0; - } else { - __xmlRaiseError(schannel, NULL, NULL, - ctxt, NULL, XML_FROM_DTD, error, - XML_ERR_ERROR, NULL, 0, (const char *) str1, - (const char *) str2, NULL, 0, 0, - msg, (const char *) str1, (const char *) str2); - } -} - -/** - * xmlFatalErrMsgInt: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @val: an integer value - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, int val) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, - ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, - NULL, 0, NULL, NULL, NULL, val, 0, msg, val); - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlFatalErrMsgStrIntStr: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @str1: an string info - * @val: an integer value - * @str2: an string info - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar *str1, int val, - const xmlChar *str2) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, - ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, - NULL, 0, (const char *) str1, (const char *) str2, - NULL, val, 0, msg, str1, val, str2); - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlFatalErrMsgStr: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @val: a string value - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar * val) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, - XML_FROM_PARSER, error, XML_ERR_FATAL, - NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, - val); - if (ctxt != NULL) { - ctxt->wellFormed = 0; - if (ctxt->recovery == 0) - ctxt->disableSAX = 1; - } -} - -/** - * xmlErrMsgStr: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the error message - * @val: a string value - * - * Handle a non fatal parser error - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, const xmlChar * val) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, - XML_FROM_PARSER, error, XML_ERR_ERROR, - NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, - val); -} - -/** - * xmlNsErr: - * @ctxt: an XML parser context - * @error: the error number - * @msg: the message - * @info1: extra information string - * @info2: extra information string - * - * Handle a fatal parser error, i.e. violating Well-Formedness constraints - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, - const xmlChar * info1, const xmlChar * info2, - const xmlChar * info3) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - if (ctxt != NULL) - ctxt->errNo = error; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, - XML_ERR_ERROR, NULL, 0, (const char *) info1, - (const char *) info2, (const char *) info3, 0, 0, msg, - info1, info2, info3); - if (ctxt != NULL) - ctxt->nsWellFormed = 0; -} - -/** - * xmlNsWarn - * @ctxt: an XML parser context - * @error: the error number - * @msg: the message - * @info1: extra information string - * @info2: extra information string - * - * Handle a namespace warning error - */ -static void LIBXML_ATTR_FORMAT(3,0) -xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, - const char *msg, - const xmlChar * info1, const xmlChar * info2, - const xmlChar * info3) -{ - if ((ctxt != NULL) && (ctxt->disableSAX != 0) && - (ctxt->instate == XML_PARSER_EOF)) - return; - __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, - XML_ERR_WARNING, NULL, 0, (const char *) info1, - (const char *) info2, (const char *) info3, 0, 0, msg, - info1, info2, info3); -} - -/************************************************************************ - * * - * Library wide options * - * * - ************************************************************************/ - -/** - * xmlHasFeature: - * @feature: the feature to be examined - * - * Examines if the library has been compiled with a given feature. - * - * Returns a non-zero value if the feature exist, otherwise zero. - * Returns zero (0) if the feature does not exist or an unknown - * unknown feature is requested, non-zero otherwise. - */ -int -xmlHasFeature(xmlFeature feature) -{ - switch (feature) { - case XML_WITH_THREAD: -#ifdef LIBXML_THREAD_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_TREE: -#ifdef LIBXML_TREE_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_OUTPUT: -#ifdef LIBXML_OUTPUT_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_PUSH: -#ifdef LIBXML_PUSH_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_READER: -#ifdef LIBXML_READER_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_PATTERN: -#ifdef LIBXML_PATTERN_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_WRITER: -#ifdef LIBXML_WRITER_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_SAX1: -#ifdef LIBXML_SAX1_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_FTP: -#ifdef LIBXML_FTP_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_HTTP: -#ifdef LIBXML_HTTP_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_VALID: -#ifdef LIBXML_VALID_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_HTML: -#ifdef LIBXML_HTML_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_LEGACY: -#ifdef LIBXML_LEGACY_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_C14N: -#ifdef LIBXML_C14N_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_CATALOG: -#ifdef LIBXML_CATALOG_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_XPATH: -#ifdef LIBXML_XPATH_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_XPTR: -#ifdef LIBXML_XPTR_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_XINCLUDE: -#ifdef LIBXML_XINCLUDE_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_ICONV: -#ifdef LIBXML_ICONV_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_ISO8859X: -#ifdef LIBXML_ISO8859X_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_UNICODE: -#ifdef LIBXML_UNICODE_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_REGEXP: -#ifdef LIBXML_REGEXP_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_AUTOMATA: -#ifdef LIBXML_AUTOMATA_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_EXPR: -#ifdef LIBXML_EXPR_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_SCHEMAS: -#ifdef LIBXML_SCHEMAS_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_SCHEMATRON: -#ifdef LIBXML_SCHEMATRON_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_MODULES: -#ifdef LIBXML_MODULES_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_DEBUG: -#ifdef LIBXML_DEBUG_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_DEBUG_MEM: -#ifdef DEBUG_MEMORY_LOCATION - return(1); -#else - return(0); -#endif - case XML_WITH_DEBUG_RUN: -#ifdef LIBXML_DEBUG_RUNTIME - return(1); -#else - return(0); -#endif - case XML_WITH_ZLIB: -#ifdef LIBXML_ZLIB_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_LZMA: -#ifdef LIBXML_LZMA_ENABLED - return(1); -#else - return(0); -#endif - case XML_WITH_ICU: -#ifdef LIBXML_ICU_ENABLED - return(1); -#else - return(0); -#endif - default: - break; - } - return(0); -} - -/************************************************************************ - * * - * SAX2 defaulted attributes handling * - * * - ************************************************************************/ - -/** - * xmlDetectSAX2: - * @ctxt: an XML parser context - * - * Do the SAX2 detection and specific intialization - */ -static void -xmlDetectSAX2(xmlParserCtxtPtr ctxt) { - if (ctxt == NULL) return; -#ifdef LIBXML_SAX1_ENABLED - if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && - ((ctxt->sax->startElementNs != NULL) || - (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; -#else - ctxt->sax2 = 1; -#endif /* LIBXML_SAX1_ENABLED */ - - ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); - ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); - ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); - if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || - (ctxt->str_xml_ns == NULL)) { - xmlErrMemory(ctxt, NULL); - } -} - -typedef struct _xmlDefAttrs xmlDefAttrs; -typedef xmlDefAttrs *xmlDefAttrsPtr; -struct _xmlDefAttrs { - int nbAttrs; /* number of defaulted attributes on that element */ - int maxAttrs; /* the size of the array */ -#if __STDC_VERSION__ >= 199901L - /* Using a C99 flexible array member avoids UBSan errors. */ - const xmlChar *values[]; /* array of localname/prefix/values/external */ -#else - const xmlChar *values[5]; -#endif -}; - -/** - * xmlAttrNormalizeSpace: - * @src: the source string - * @dst: the target string - * - * Normalize the space in non CDATA attribute values: - * If the attribute type is not CDATA, then the XML processor MUST further - * process the normalized attribute value by discarding any leading and - * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. - * Note that the size of dst need to be at least src, and if one doesn't need - * to preserve dst (and it doesn't come from a dictionary or read-only) then - * passing src as dst is just fine. - * - * Returns a pointer to the normalized value (dst) or NULL if no conversion - * is needed. - */ -static xmlChar * -xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) -{ - if ((src == NULL) || (dst == NULL)) - return(NULL); - - while (*src == 0x20) src++; - while (*src != 0) { - if (*src == 0x20) { - while (*src == 0x20) src++; - if (*src != 0) - *dst++ = 0x20; - } else { - *dst++ = *src++; - } - } - *dst = 0; - if (dst == src) - return(NULL); - return(dst); -} - -/** - * xmlAttrNormalizeSpace2: - * @src: the source string - * - * Normalize the space in non CDATA attribute values, a slightly more complex - * front end to avoid allocation problems when running on attribute values - * coming from the input. - * - * Returns a pointer to the normalized value (dst) or NULL if no conversion - * is needed. - */ -static const xmlChar * -xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) -{ - int i; - int remove_head = 0; - int need_realloc = 0; - const xmlChar *cur; - - if ((ctxt == NULL) || (src == NULL) || (len == NULL)) - return(NULL); - i = *len; - if (i <= 0) - return(NULL); - - cur = src; - while (*cur == 0x20) { - cur++; - remove_head++; - } - while (*cur != 0) { - if (*cur == 0x20) { - cur++; - if ((*cur == 0x20) || (*cur == 0)) { - need_realloc = 1; - break; - } - } else - cur++; - } - if (need_realloc) { - xmlChar *ret; - - ret = xmlStrndup(src + remove_head, i - remove_head + 1); - if (ret == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - xmlAttrNormalizeSpace(ret, ret); - *len = (int) strlen((const char *)ret); - return(ret); - } else if (remove_head) { - *len -= remove_head; - memmove(src, src + remove_head, 1 + *len); - return(src); - } - return(NULL); -} - -/** - * xmlAddDefAttrs: - * @ctxt: an XML parser context - * @fullname: the element fullname - * @fullattr: the attribute fullname - * @value: the attribute value - * - * Add a defaulted attribute for an element - */ -static void -xmlAddDefAttrs(xmlParserCtxtPtr ctxt, - const xmlChar *fullname, - const xmlChar *fullattr, - const xmlChar *value) { - xmlDefAttrsPtr defaults; - int len; - const xmlChar *name; - const xmlChar *prefix; - - /* - * Allows to detect attribute redefinitions - */ - if (ctxt->attsSpecial != NULL) { - if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) - return; - } - - if (ctxt->attsDefault == NULL) { - ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); - if (ctxt->attsDefault == NULL) - goto mem_error; - } - - /* - * split the element name into prefix:localname , the string found - * are within the DTD and then not associated to namespace names. - */ - name = xmlSplitQName3(fullname, &len); - if (name == NULL) { - name = xmlDictLookup(ctxt->dict, fullname, -1); - prefix = NULL; - } else { - name = xmlDictLookup(ctxt->dict, name, -1); - prefix = xmlDictLookup(ctxt->dict, fullname, len); - } - - /* - * make sure there is some storage - */ - defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix); - if (defaults == NULL) { - defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) + - (4 * 5) * sizeof(const xmlChar *)); - if (defaults == NULL) - goto mem_error; - defaults->nbAttrs = 0; - defaults->maxAttrs = 4; - if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, - defaults, NULL) < 0) { - xmlFree(defaults); - goto mem_error; - } - } else if (defaults->nbAttrs >= defaults->maxAttrs) { - xmlDefAttrsPtr temp; - - temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) + - (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *)); - if (temp == NULL) - goto mem_error; - defaults = temp; - defaults->maxAttrs *= 2; - if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, - defaults, NULL) < 0) { - xmlFree(defaults); - goto mem_error; - } - } - - /* - * Split the element name into prefix:localname , the string found - * are within the DTD and hen not associated to namespace names. - */ - name = xmlSplitQName3(fullattr, &len); - if (name == NULL) { - name = xmlDictLookup(ctxt->dict, fullattr, -1); - prefix = NULL; - } else { - name = xmlDictLookup(ctxt->dict, name, -1); - prefix = xmlDictLookup(ctxt->dict, fullattr, len); - } - - defaults->values[5 * defaults->nbAttrs] = name; - defaults->values[5 * defaults->nbAttrs + 1] = prefix; - /* intern the string and precompute the end */ - len = xmlStrlen(value); - value = xmlDictLookup(ctxt->dict, value, len); - defaults->values[5 * defaults->nbAttrs + 2] = value; - defaults->values[5 * defaults->nbAttrs + 3] = value + len; - if (ctxt->external) - defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external"; - else - defaults->values[5 * defaults->nbAttrs + 4] = NULL; - defaults->nbAttrs++; - - return; - -mem_error: - xmlErrMemory(ctxt, NULL); - return; -} - -/** - * xmlAddSpecialAttr: - * @ctxt: an XML parser context - * @fullname: the element fullname - * @fullattr: the attribute fullname - * @type: the attribute type - * - * Register this attribute type - */ -static void -xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, - const xmlChar *fullname, - const xmlChar *fullattr, - int type) -{ - if (ctxt->attsSpecial == NULL) { - ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); - if (ctxt->attsSpecial == NULL) - goto mem_error; - } - - if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) - return; - - xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, - (void *) (long) type); - return; - -mem_error: - xmlErrMemory(ctxt, NULL); - return; -} - -/** - * xmlCleanSpecialAttrCallback: - * - * Removes CDATA attributes from the special attribute table - */ -static void -xmlCleanSpecialAttrCallback(void *payload, void *data, - const xmlChar *fullname, const xmlChar *fullattr, - const xmlChar *unused ATTRIBUTE_UNUSED) { - xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; - - if (((long) payload) == XML_ATTRIBUTE_CDATA) { - xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); - } -} - -/** - * xmlCleanSpecialAttr: - * @ctxt: an XML parser context - * - * Trim the list of attributes defined to remove all those of type - * CDATA as they are not special. This call should be done when finishing - * to parse the DTD and before starting to parse the document root. - */ -static void -xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) -{ - if (ctxt->attsSpecial == NULL) - return; - - xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); - - if (xmlHashSize(ctxt->attsSpecial) == 0) { - xmlHashFree(ctxt->attsSpecial, NULL); - ctxt->attsSpecial = NULL; - } - return; -} - -/** - * xmlCheckLanguageID: - * @lang: pointer to the string value - * - * Checks that the value conforms to the LanguageID production: - * - * NOTE: this is somewhat deprecated, those productions were removed from - * the XML Second edition. - * - * [33] LanguageID ::= Langcode ('-' Subcode)* - * [34] Langcode ::= ISO639Code | IanaCode | UserCode - * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) - * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ - * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ - * [38] Subcode ::= ([a-z] | [A-Z])+ - * - * The current REC reference the sucessors of RFC 1766, currently 5646 - * - * http://www.rfc-editor.org/rfc/rfc5646.txt - * langtag = language - * ["-" script] - * ["-" region] - * *("-" variant) - * *("-" extension) - * ["-" privateuse] - * language = 2*3ALPHA ; shortest ISO 639 code - * ["-" extlang] ; sometimes followed by - * ; extended language subtags - * / 4ALPHA ; or reserved for future use - * / 5*8ALPHA ; or registered language subtag - * - * extlang = 3ALPHA ; selected ISO 639 codes - * *2("-" 3ALPHA) ; permanently reserved - * - * script = 4ALPHA ; ISO 15924 code - * - * region = 2ALPHA ; ISO 3166-1 code - * / 3DIGIT ; UN M.49 code - * - * variant = 5*8alphanum ; registered variants - * / (DIGIT 3alphanum) - * - * extension = singleton 1*("-" (2*8alphanum)) - * - * ; Single alphanumerics - * ; "x" reserved for private use - * singleton = DIGIT ; 0 - 9 - * / %x41-57 ; A - W - * / %x59-5A ; Y - Z - * / %x61-77 ; a - w - * / %x79-7A ; y - z - * - * it sounds right to still allow Irregular i-xxx IANA and user codes too - * The parser below doesn't try to cope with extension or privateuse - * that could be added but that's not interoperable anyway - * - * Returns 1 if correct 0 otherwise - **/ -int -xmlCheckLanguageID(const xmlChar * lang) -{ - const xmlChar *cur = lang, *nxt; - - if (cur == NULL) - return (0); - if (((cur[0] == 'i') && (cur[1] == '-')) || - ((cur[0] == 'I') && (cur[1] == '-')) || - ((cur[0] == 'x') && (cur[1] == '-')) || - ((cur[0] == 'X') && (cur[1] == '-'))) { - /* - * Still allow IANA code and user code which were coming - * from the previous version of the XML-1.0 specification - * it's deprecated but we should not fail - */ - cur += 2; - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || - ((cur[0] >= 'a') && (cur[0] <= 'z'))) - cur++; - return(cur[0] == 0); - } - nxt = cur; - while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || - ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) - nxt++; - if (nxt - cur >= 4) { - /* - * Reserved - */ - if ((nxt - cur > 8) || (nxt[0] != 0)) - return(0); - return(1); - } - if (nxt - cur < 2) - return(0); - /* we got an ISO 639 code */ - if (nxt[0] == 0) - return(1); - if (nxt[0] != '-') - return(0); - - nxt++; - cur = nxt; - /* now we can have extlang or script or region or variant */ - if ((nxt[0] >= '0') && (nxt[0] <= '9')) - goto region_m49; - - while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || - ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) - nxt++; - if (nxt - cur == 4) - goto script; - if (nxt - cur == 2) - goto region; - if ((nxt - cur >= 5) && (nxt - cur <= 8)) - goto variant; - if (nxt - cur != 3) - return(0); - /* we parsed an extlang */ - if (nxt[0] == 0) - return(1); - if (nxt[0] != '-') - return(0); - - nxt++; - cur = nxt; - /* now we can have script or region or variant */ - if ((nxt[0] >= '0') && (nxt[0] <= '9')) - goto region_m49; - - while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || - ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) - nxt++; - if (nxt - cur == 2) - goto region; - if ((nxt - cur >= 5) && (nxt - cur <= 8)) - goto variant; - if (nxt - cur != 4) - return(0); - /* we parsed a script */ -script: - if (nxt[0] == 0) - return(1); - if (nxt[0] != '-') - return(0); - - nxt++; - cur = nxt; - /* now we can have region or variant */ - if ((nxt[0] >= '0') && (nxt[0] <= '9')) - goto region_m49; - - while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || - ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) - nxt++; - - if ((nxt - cur >= 5) && (nxt - cur <= 8)) - goto variant; - if (nxt - cur != 2) - return(0); - /* we parsed a region */ -region: - if (nxt[0] == 0) - return(1); - if (nxt[0] != '-') - return(0); - - nxt++; - cur = nxt; - /* now we can just have a variant */ - while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || - ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) - nxt++; - - if ((nxt - cur < 5) || (nxt - cur > 8)) - return(0); - - /* we parsed a variant */ -variant: - if (nxt[0] == 0) - return(1); - if (nxt[0] != '-') - return(0); - /* extensions and private use subtags not checked */ - return (1); - -region_m49: - if (((nxt[1] >= '0') && (nxt[1] <= '9')) && - ((nxt[2] >= '0') && (nxt[2] <= '9'))) { - nxt += 3; - goto region; - } - return(0); -} - -/************************************************************************ - * * - * Parser stacks related functions and macros * - * * - ************************************************************************/ - -static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, - const xmlChar ** str); - -#ifdef SAX2 -/** - * nsPush: - * @ctxt: an XML parser context - * @prefix: the namespace prefix or NULL - * @URL: the namespace name - * - * Pushes a new parser namespace on top of the ns stack - * - * Returns -1 in case of error, -2 if the namespace should be discarded - * and the index in the stack otherwise. - */ -static int -nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL) -{ - if (ctxt->options & XML_PARSE_NSCLEAN) { - int i; - for (i = ctxt->nsNr - 2;i >= 0;i -= 2) { - if (ctxt->nsTab[i] == prefix) { - /* in scope */ - if (ctxt->nsTab[i + 1] == URL) - return(-2); - /* out of scope keep it */ - break; - } - } - } - if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) { - ctxt->nsMax = 10; - ctxt->nsNr = 0; - ctxt->nsTab = (const xmlChar **) - xmlMalloc(ctxt->nsMax * sizeof(xmlChar *)); - if (ctxt->nsTab == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->nsMax = 0; - return (-1); - } - } else if (ctxt->nsNr >= ctxt->nsMax) { - const xmlChar ** tmp; - ctxt->nsMax *= 2; - tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab, - ctxt->nsMax * sizeof(ctxt->nsTab[0])); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->nsMax /= 2; - return (-1); - } - ctxt->nsTab = tmp; - } - ctxt->nsTab[ctxt->nsNr++] = prefix; - ctxt->nsTab[ctxt->nsNr++] = URL; - return (ctxt->nsNr); -} -/** - * nsPop: - * @ctxt: an XML parser context - * @nr: the number to pop - * - * Pops the top @nr parser prefix/namespace from the ns stack - * - * Returns the number of namespaces removed - */ -static int -nsPop(xmlParserCtxtPtr ctxt, int nr) -{ - int i; - - if (ctxt->nsTab == NULL) return(0); - if (ctxt->nsNr < nr) { - xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr); - nr = ctxt->nsNr; - } - if (ctxt->nsNr <= 0) - return (0); - - for (i = 0;i < nr;i++) { - ctxt->nsNr--; - ctxt->nsTab[ctxt->nsNr] = NULL; - } - return(nr); -} -#endif - -static int -xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { - const xmlChar **atts; - int *attallocs; - int maxatts; - - if (ctxt->atts == NULL) { - maxatts = 55; /* allow for 10 attrs by default */ - atts = (const xmlChar **) - xmlMalloc(maxatts * sizeof(xmlChar *)); - if (atts == NULL) goto mem_error; - ctxt->atts = atts; - attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int)); - if (attallocs == NULL) goto mem_error; - ctxt->attallocs = attallocs; - ctxt->maxatts = maxatts; - } else if (nr + 5 > ctxt->maxatts) { - maxatts = (nr + 5) * 2; - atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts, - maxatts * sizeof(const xmlChar *)); - if (atts == NULL) goto mem_error; - ctxt->atts = atts; - attallocs = (int *) xmlRealloc((void *) ctxt->attallocs, - (maxatts / 5) * sizeof(int)); - if (attallocs == NULL) goto mem_error; - ctxt->attallocs = attallocs; - ctxt->maxatts = maxatts; - } - return(ctxt->maxatts); -mem_error: - xmlErrMemory(ctxt, NULL); - return(-1); -} - -/** - * inputPush: - * @ctxt: an XML parser context - * @value: the parser input - * - * Pushes a new parser input on top of the input stack - * - * Returns -1 in case of error, the index in the stack otherwise - */ -int -inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) -{ - if ((ctxt == NULL) || (value == NULL)) - return(-1); - if (ctxt->inputNr >= ctxt->inputMax) { - ctxt->inputMax *= 2; - ctxt->inputTab = - (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, - ctxt->inputMax * - sizeof(ctxt->inputTab[0])); - if (ctxt->inputTab == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFreeInputStream(value); - ctxt->inputMax /= 2; - value = NULL; - return (-1); - } - } - ctxt->inputTab[ctxt->inputNr] = value; - ctxt->input = value; - return (ctxt->inputNr++); -} -/** - * inputPop: - * @ctxt: an XML parser context - * - * Pops the top parser input from the input stack - * - * Returns the input just removed - */ -xmlParserInputPtr -inputPop(xmlParserCtxtPtr ctxt) -{ - xmlParserInputPtr ret; - - if (ctxt == NULL) - return(NULL); - if (ctxt->inputNr <= 0) - return (NULL); - ctxt->inputNr--; - if (ctxt->inputNr > 0) - ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; - else - ctxt->input = NULL; - ret = ctxt->inputTab[ctxt->inputNr]; - ctxt->inputTab[ctxt->inputNr] = NULL; - return (ret); -} -/** - * nodePush: - * @ctxt: an XML parser context - * @value: the element node - * - * Pushes a new element node on top of the node stack - * - * Returns -1 in case of error, the index in the stack otherwise - */ -int -nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) -{ - if (ctxt == NULL) return(0); - if (ctxt->nodeNr >= ctxt->nodeMax) { - xmlNodePtr *tmp; - - tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, - ctxt->nodeMax * 2 * - sizeof(ctxt->nodeTab[0])); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - return (-1); - } - ctxt->nodeTab = tmp; - ctxt->nodeMax *= 2; - } - if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, - "Excessive depth in document: %d use XML_PARSE_HUGE option\n", - xmlParserMaxDepth); - xmlHaltParser(ctxt); - return(-1); - } - ctxt->nodeTab[ctxt->nodeNr] = value; - ctxt->node = value; - return (ctxt->nodeNr++); -} - -/** - * nodePop: - * @ctxt: an XML parser context - * - * Pops the top element node from the node stack - * - * Returns the node just removed - */ -xmlNodePtr -nodePop(xmlParserCtxtPtr ctxt) -{ - xmlNodePtr ret; - - if (ctxt == NULL) return(NULL); - if (ctxt->nodeNr <= 0) - return (NULL); - ctxt->nodeNr--; - if (ctxt->nodeNr > 0) - ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; - else - ctxt->node = NULL; - ret = ctxt->nodeTab[ctxt->nodeNr]; - ctxt->nodeTab[ctxt->nodeNr] = NULL; - return (ret); -} - -#ifdef LIBXML_PUSH_ENABLED -/** - * nameNsPush: - * @ctxt: an XML parser context - * @value: the element name - * @prefix: the element prefix - * @URI: the element namespace name - * - * Pushes a new element name/prefix/URL on top of the name stack - * - * Returns -1 in case of error, the index in the stack otherwise - */ -static int -nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, - const xmlChar *prefix, const xmlChar *URI, int nsNr) -{ - if (ctxt->nameNr >= ctxt->nameMax) { - const xmlChar * *tmp; - void **tmp2; - ctxt->nameMax *= 2; - tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, - ctxt->nameMax * - sizeof(ctxt->nameTab[0])); - if (tmp == NULL) { - ctxt->nameMax /= 2; - goto mem_error; - } - ctxt->nameTab = tmp; - tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, - ctxt->nameMax * 3 * - sizeof(ctxt->pushTab[0])); - if (tmp2 == NULL) { - ctxt->nameMax /= 2; - goto mem_error; - } - ctxt->pushTab = tmp2; - } - ctxt->nameTab[ctxt->nameNr] = value; - ctxt->name = value; - ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; - ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; - ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr; - return (ctxt->nameNr++); -mem_error: - xmlErrMemory(ctxt, NULL); - return (-1); -} -/** - * nameNsPop: - * @ctxt: an XML parser context - * - * Pops the top element/prefix/URI name from the name stack - * - * Returns the name just removed - */ -static const xmlChar * -nameNsPop(xmlParserCtxtPtr ctxt) -{ - const xmlChar *ret; - - if (ctxt->nameNr <= 0) - return (NULL); - ctxt->nameNr--; - if (ctxt->nameNr > 0) - ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; - else - ctxt->name = NULL; - ret = ctxt->nameTab[ctxt->nameNr]; - ctxt->nameTab[ctxt->nameNr] = NULL; - return (ret); -} -#endif /* LIBXML_PUSH_ENABLED */ - -/** - * namePush: - * @ctxt: an XML parser context - * @value: the element name - * - * Pushes a new element name on top of the name stack - * - * Returns -1 in case of error, the index in the stack otherwise - */ -int -namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) -{ - if (ctxt == NULL) return (-1); - - if (ctxt->nameNr >= ctxt->nameMax) { - const xmlChar * *tmp; - tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, - ctxt->nameMax * 2 * - sizeof(ctxt->nameTab[0])); - if (tmp == NULL) { - goto mem_error; - } - ctxt->nameTab = tmp; - ctxt->nameMax *= 2; - } - ctxt->nameTab[ctxt->nameNr] = value; - ctxt->name = value; - return (ctxt->nameNr++); -mem_error: - xmlErrMemory(ctxt, NULL); - return (-1); -} -/** - * namePop: - * @ctxt: an XML parser context - * - * Pops the top element name from the name stack - * - * Returns the name just removed - */ -const xmlChar * -namePop(xmlParserCtxtPtr ctxt) -{ - const xmlChar *ret; - - if ((ctxt == NULL) || (ctxt->nameNr <= 0)) - return (NULL); - ctxt->nameNr--; - if (ctxt->nameNr > 0) - ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; - else - ctxt->name = NULL; - ret = ctxt->nameTab[ctxt->nameNr]; - ctxt->nameTab[ctxt->nameNr] = NULL; - return (ret); -} - -static int spacePush(xmlParserCtxtPtr ctxt, int val) { - if (ctxt->spaceNr >= ctxt->spaceMax) { - int *tmp; - - ctxt->spaceMax *= 2; - tmp = (int *) xmlRealloc(ctxt->spaceTab, - ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->spaceMax /=2; - return(-1); - } - ctxt->spaceTab = tmp; - } - ctxt->spaceTab[ctxt->spaceNr] = val; - ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; - return(ctxt->spaceNr++); -} - -static int spacePop(xmlParserCtxtPtr ctxt) { - int ret; - if (ctxt->spaceNr <= 0) return(0); - ctxt->spaceNr--; - if (ctxt->spaceNr > 0) - ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; - else - ctxt->space = &ctxt->spaceTab[0]; - ret = ctxt->spaceTab[ctxt->spaceNr]; - ctxt->spaceTab[ctxt->spaceNr] = -1; - return(ret); -} - -/* - * Macros for accessing the content. Those should be used only by the parser, - * and not exported. - * - * Dirty macros, i.e. one often need to make assumption on the context to - * use them - * - * CUR_PTR return the current pointer to the xmlChar to be parsed. - * To be used with extreme caution since operations consuming - * characters may move the input buffer to a different location ! - * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled - * This should be used internally by the parser - * only to compare to ASCII values otherwise it would break when - * running with UTF-8 encoding. - * RAW same as CUR but in the input buffer, bypass any token - * extraction that may have been done - * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only - * to compare on ASCII based substring. - * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined - * strings without newlines within the parser. - * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII - * defined char within the parser. - * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding - * - * NEXT Skip to the next character, this does the proper decoding - * in UTF-8 mode. It also pop-up unfinished entities on the fly. - * NEXTL(l) Skip the current unicode character of l xmlChars long. - * CUR_CHAR(l) returns the current unicode character (int), set l - * to the number of xmlChars used for the encoding [0-5]. - * CUR_SCHAR same but operate on a string instead of the context - * COPY_BUF copy the current unicode char to the target buffer, increment - * the index - * GROW, SHRINK handling of input buffers - */ - -#define RAW (*ctxt->input->cur) -#define CUR (*ctxt->input->cur) -#define NXT(val) ctxt->input->cur[(val)] -#define CUR_PTR ctxt->input->cur -#define BASE_PTR ctxt->input->base - -#define CMP4( s, c1, c2, c3, c4 ) \ - ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ - ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) -#define CMP5( s, c1, c2, c3, c4, c5 ) \ - ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) -#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ - ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) -#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ - ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) -#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ - ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) -#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ - ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ - ((unsigned char *) s)[ 8 ] == c9 ) -#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ - ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ - ((unsigned char *) s)[ 9 ] == c10 ) - -#define SKIP(val) do { \ - ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ - if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ - if ((*ctxt->input->cur == 0) && \ - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ - xmlPopInput(ctxt); \ - } while (0) - -#define SKIPL(val) do { \ - int skipl; \ - for(skipl=0; skiplinput->cur) == '\n') { \ - ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->nbChars++; \ - ctxt->input->cur++; \ - } \ - if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ - if ((*ctxt->input->cur == 0) && \ - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \ - xmlPopInput(ctxt); \ - } while (0) - -#define SHRINK if ((ctxt->progressive == 0) && \ - (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ - (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ - xmlSHRINK (ctxt); - -static void xmlSHRINK (xmlParserCtxtPtr ctxt) { - xmlParserInputShrink(ctxt->input); - if ((*ctxt->input->cur == 0) && - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) - xmlPopInput(ctxt); - } - -#define GROW if ((ctxt->progressive == 0) && \ - (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \ - xmlGROW (ctxt); - -static void xmlGROW (xmlParserCtxtPtr ctxt) { - unsigned long curEnd = ctxt->input->end - ctxt->input->cur; - unsigned long curBase = ctxt->input->cur - ctxt->input->base; - - if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) || - (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) && - ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup"); - xmlHaltParser(ctxt); - return; - } - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - if ((ctxt->input->cur > ctxt->input->end) || - (ctxt->input->cur < ctxt->input->base)) { - xmlHaltParser(ctxt); - xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound"); - return; - } - if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) && - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) - xmlPopInput(ctxt); -} - -#define SKIP_BLANKS xmlSkipBlankChars(ctxt) - -#define NEXT xmlNextChar(ctxt) - -#define NEXT1 { \ - ctxt->input->col++; \ - ctxt->input->cur++; \ - ctxt->nbChars++; \ - if (*ctxt->input->cur == 0) \ - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ - } - -#define NEXTL(l) do { \ - if (*(ctxt->input->cur) == '\n') { \ - ctxt->input->line++; ctxt->input->col = 1; \ - } else ctxt->input->col++; \ - ctxt->input->cur += l; \ - if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ - } while (0) - -#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) -#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) - -#define COPY_BUF(l,b,i,v) \ - if (l == 1) b[i++] = (xmlChar) v; \ - else i += xmlCopyCharMultiByte(&b[i],v) - -/** - * xmlSkipBlankChars: - * @ctxt: the XML parser context - * - * skip all blanks character found at that point in the input streams. - * It pops up finished entities in the process if allowable at that point. - * - * Returns the number of space chars skipped - */ - -int -xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { - int res = 0; - - /* - * It's Okay to use CUR/NEXT here since all the blanks are on - * the ASCII range. - */ - if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { - const xmlChar *cur; - /* - * if we are in the document content, go really fast - */ - cur = ctxt->input->cur; - while (IS_BLANK_CH(*cur)) { - if (*cur == '\n') { - ctxt->input->line++; ctxt->input->col = 1; - } else { - ctxt->input->col++; - } - cur++; - res++; - if (*cur == 0) { - ctxt->input->cur = cur; - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - } - ctxt->input->cur = cur; - } else { - int cur; - do { - cur = CUR; - while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */ - (ctxt->instate != XML_PARSER_EOF))) { - NEXT; - cur = CUR; - res++; - } - while ((cur == 0) && (ctxt->inputNr > 1) && - (ctxt->instate != XML_PARSER_COMMENT)) { - xmlPopInput(ctxt); - cur = CUR; - } - /* - * Need to handle support of entities branching here - */ - if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); - } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */ - (ctxt->instate != XML_PARSER_EOF)); - } - return(res); -} - -/************************************************************************ - * * - * Commodity functions to handle entities * - * * - ************************************************************************/ - -/** - * xmlPopInput: - * @ctxt: an XML parser context - * - * xmlPopInput: the current input pointed by ctxt->input came to an end - * pop it and return the next char. - * - * Returns the current xmlChar in the parser context - */ -xmlChar -xmlPopInput(xmlParserCtxtPtr ctxt) { - if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); - if (xmlParserDebugEntities) - xmlGenericError(xmlGenericErrorContext, - "Popping input %d\n", ctxt->inputNr); - xmlFreeInputStream(inputPop(ctxt)); - if ((*ctxt->input->cur == 0) && - (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) - return(xmlPopInput(ctxt)); - return(CUR); -} - -/** - * xmlPushInput: - * @ctxt: an XML parser context - * @input: an XML parser input fragment (entity, XML fragment ...). - * - * xmlPushInput: switch to a new input stream which is stacked on top - * of the previous one(s). - * Returns -1 in case of error or the index in the input stack - */ -int -xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { - int ret; - if (input == NULL) return(-1); - - if (xmlParserDebugEntities) { - if ((ctxt->input != NULL) && (ctxt->input->filename)) - xmlGenericError(xmlGenericErrorContext, - "%s(%d): ", ctxt->input->filename, - ctxt->input->line); - xmlGenericError(xmlGenericErrorContext, - "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); - } - ret = inputPush(ctxt, input); - if (ctxt->instate == XML_PARSER_EOF) - return(-1); - GROW; - return(ret); -} - -/** - * xmlParseCharRef: - * @ctxt: an XML parser context - * - * parse Reference declarations - * - * [66] CharRef ::= '&#' [0-9]+ ';' | - * '&#x' [0-9a-fA-F]+ ';' - * - * [ WFC: Legal Character ] - * Characters referred to using character references must match the - * production for Char. - * - * Returns the value parsed (as an int), 0 in case of error - */ -int -xmlParseCharRef(xmlParserCtxtPtr ctxt) { - unsigned int val = 0; - int count = 0; - unsigned int outofrange = 0; - - /* - * Using RAW/CUR/NEXT is okay since we are working on ASCII range here - */ - if ((RAW == '&') && (NXT(1) == '#') && - (NXT(2) == 'x')) { - SKIP(3); - GROW; - while (RAW != ';') { /* loop blocked by count */ - if (count++ > 20) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(0); - } - if ((RAW >= '0') && (RAW <= '9')) - val = val * 16 + (CUR - '0'); - else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) - val = val * 16 + (CUR - 'a') + 10; - else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) - val = val * 16 + (CUR - 'A') + 10; - else { - xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); - val = 0; - break; - } - if (val > 0x10FFFF) - outofrange = val; - - NEXT; - count++; - } - if (RAW == ';') { - /* on purpose to avoid reentrancy problems with NEXT and SKIP */ - ctxt->input->col++; - ctxt->nbChars ++; - ctxt->input->cur++; - } - } else if ((RAW == '&') && (NXT(1) == '#')) { - SKIP(2); - GROW; - while (RAW != ';') { /* loop blocked by count */ - if (count++ > 20) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(0); - } - if ((RAW >= '0') && (RAW <= '9')) - val = val * 10 + (CUR - '0'); - else { - xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); - val = 0; - break; - } - if (val > 0x10FFFF) - outofrange = val; - - NEXT; - count++; - } - if (RAW == ';') { - /* on purpose to avoid reentrancy problems with NEXT and SKIP */ - ctxt->input->col++; - ctxt->nbChars ++; - ctxt->input->cur++; - } - } else { - xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); - } - - /* - * [ WFC: Legal Character ] - * Characters referred to using character references must match the - * production for Char. - */ - if ((IS_CHAR(val) && (outofrange == 0))) { - return(val); - } else { - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "xmlParseCharRef: invalid xmlChar value %d\n", - val); - } - return(0); -} - -/** - * xmlParseStringCharRef: - * @ctxt: an XML parser context - * @str: a pointer to an index in the string - * - * parse Reference declarations, variant parsing from a string rather - * than an an input flow. - * - * [66] CharRef ::= '&#' [0-9]+ ';' | - * '&#x' [0-9a-fA-F]+ ';' - * - * [ WFC: Legal Character ] - * Characters referred to using character references must match the - * production for Char. - * - * Returns the value parsed (as an int), 0 in case of error, str will be - * updated to the current value of the index - */ -static int -xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { - const xmlChar *ptr; - xmlChar cur; - unsigned int val = 0; - unsigned int outofrange = 0; - - if ((str == NULL) || (*str == NULL)) return(0); - ptr = *str; - cur = *ptr; - if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { - ptr += 3; - cur = *ptr; - while (cur != ';') { /* Non input consuming loop */ - if ((cur >= '0') && (cur <= '9')) - val = val * 16 + (cur - '0'); - else if ((cur >= 'a') && (cur <= 'f')) - val = val * 16 + (cur - 'a') + 10; - else if ((cur >= 'A') && (cur <= 'F')) - val = val * 16 + (cur - 'A') + 10; - else { - xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); - val = 0; - break; - } - if (val > 0x10FFFF) - outofrange = val; - - ptr++; - cur = *ptr; - } - if (cur == ';') - ptr++; - } else if ((cur == '&') && (ptr[1] == '#')){ - ptr += 2; - cur = *ptr; - while (cur != ';') { /* Non input consuming loops */ - if ((cur >= '0') && (cur <= '9')) - val = val * 10 + (cur - '0'); - else { - xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); - val = 0; - break; - } - if (val > 0x10FFFF) - outofrange = val; - - ptr++; - cur = *ptr; - } - if (cur == ';') - ptr++; - } else { - xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); - return(0); - } - *str = ptr; - - /* - * [ WFC: Legal Character ] - * Characters referred to using character references must match the - * production for Char. - */ - if ((IS_CHAR(val) && (outofrange == 0))) { - return(val); - } else { - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "xmlParseStringCharRef: invalid xmlChar value %d\n", - val); - } - return(0); -} - -/** - * xmlNewBlanksWrapperInputStream: - * @ctxt: an XML parser context - * @entity: an Entity pointer - * - * Create a new input stream for wrapping - * blanks around a PEReference - * - * Returns the new input stream or NULL - */ - -static void deallocblankswrapper (xmlChar *str) {xmlFree(str);} - -static xmlParserInputPtr -xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { - xmlParserInputPtr input; - xmlChar *buffer; - size_t length; - if (entity == NULL) { - xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, - "xmlNewBlanksWrapperInputStream entity\n"); - return(NULL); - } - if (xmlParserDebugEntities) - xmlGenericError(xmlGenericErrorContext, - "new blanks wrapper for entity: %s\n", entity->name); - input = xmlNewInputStream(ctxt); - if (input == NULL) { - return(NULL); - } - length = xmlStrlen(entity->name) + 5; - buffer = xmlMallocAtomic(length); - if (buffer == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(input); - return(NULL); - } - buffer [0] = ' '; - buffer [1] = '%'; - buffer [length-3] = ';'; - buffer [length-2] = ' '; - buffer [length-1] = 0; - memcpy(buffer + 2, entity->name, length - 5); - input->free = deallocblankswrapper; - input->base = buffer; - input->cur = buffer; - input->length = length; - input->end = &buffer[length]; - return(input); -} - -/** - * xmlParserHandlePEReference: - * @ctxt: the parser context - * - * [69] PEReference ::= '%' Name ';' - * - * [ WFC: No Recursion ] - * A parsed entity must not contain a recursive - * reference to itself, either directly or indirectly. - * - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an internal DTD - * subset which contains no parameter entity references, or a document - * with "standalone='yes'", ... ... The declaration of a parameter - * entity must precede any reference to it... - * - * [ VC: Entity Declared ] - * In a document with an external subset or external parameter entities - * with "standalone='no'", ... ... The declaration of a parameter entity - * must precede any reference to it... - * - * [ WFC: In DTD ] - * Parameter-entity references may only appear in the DTD. - * NOTE: misleading but this is handled. - * - * A PEReference may have been detected in the current input stream - * the handling is done accordingly to - * http://www.w3.org/TR/REC-xml#entproc - * i.e. - * - Included in literal in entity values - * - Included as Parameter Entity reference within DTDs - */ -void -xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { - const xmlChar *name; - xmlEntityPtr entity = NULL; - xmlParserInputPtr input; - - if (RAW != '%') return; - switch(ctxt->instate) { - case XML_PARSER_CDATA_SECTION: - return; - case XML_PARSER_COMMENT: - return; - case XML_PARSER_START_TAG: - return; - case XML_PARSER_END_TAG: - return; - case XML_PARSER_EOF: - xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); - return; - case XML_PARSER_PROLOG: - case XML_PARSER_START: - case XML_PARSER_MISC: - xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); - return; - case XML_PARSER_ENTITY_DECL: - case XML_PARSER_CONTENT: - case XML_PARSER_ATTRIBUTE_VALUE: - case XML_PARSER_PI: - case XML_PARSER_SYSTEM_LITERAL: - case XML_PARSER_PUBLIC_LITERAL: - /* we just ignore it there */ - return; - case XML_PARSER_EPILOG: - xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); - return; - case XML_PARSER_ENTITY_VALUE: - /* - * NOTE: in the case of entity values, we don't do the - * substitution here since we need the literal - * entity value to be able to save the internal - * subset of the document. - * This will be handled by xmlStringDecodeEntities - */ - return; - case XML_PARSER_DTD: - /* - * [WFC: Well-Formedness Constraint: PEs in Internal Subset] - * In the internal DTD subset, parameter-entity references - * can occur only where markup declarations can occur, not - * within markup declarations. - * In that case this is handled in xmlParseMarkupDecl - */ - if ((ctxt->external == 0) && (ctxt->inputNr == 1)) - return; - if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) - return; - break; - case XML_PARSER_IGNORE: - return; - } - - NEXT; - name = xmlParseName(ctxt); - if (xmlParserDebugEntities) - xmlGenericError(xmlGenericErrorContext, - "PEReference: %s\n", name); - if (name == NULL) { - xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL); - } else { - if (RAW == ';') { - NEXT; - if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) - entity = ctxt->sax->getParameterEntity(ctxt->userData, name); - if (ctxt->instate == XML_PARSER_EOF) - return; - if (entity == NULL) { - - /* - * [ WFC: Entity Declared ] - * In a document without any DTD, a document with only an - * internal DTD subset which contains no parameter entity - * references, or a document with "standalone='yes'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->standalone == 1) || - ((ctxt->hasExternalSubset == 0) && - (ctxt->hasPErefs == 0))) { - xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", name); - } else { - /* - * [ VC: Entity Declared ] - * In a document with an external subset or external - * parameter entities with "standalone='no'", ... - * ... The declaration of a parameter entity must precede - * any reference to it... - */ - if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) { - xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - } else - xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, - "PEReference: %%%s; not found\n", - name, NULL); - ctxt->valid = 0; - } - xmlParserEntityCheck(ctxt, 0, NULL, 0); - } else if (ctxt->input->free != deallocblankswrapper) { - input = xmlNewBlanksWrapperInputStream(ctxt, entity); - if (xmlPushInput(ctxt, input) < 0) - return; - } else { - if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || - (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) { - xmlChar start[4]; - xmlCharEncoding enc; - - /* - * Note: external parameter entities will not be loaded, it - * is not required for a non-validating parser, unless the - * option of validating, or substituting entities were - * given. Doing so is far more secure as the parser will - * only process data coming from the document entity by - * default. - */ - if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - ((ctxt->options & XML_PARSE_NOENT) == 0) && - ((ctxt->options & XML_PARSE_DTDVALID) == 0) && - ((ctxt->options & XML_PARSE_DTDLOAD) == 0) && - ((ctxt->options & XML_PARSE_DTDATTR) == 0) && - (ctxt->replaceEntities == 0) && - (ctxt->validate == 0)) - return; - - /* - * handle the extra spaces added before and after - * c.f. http://www.w3.org/TR/REC-xml#as-PE - * this is done independently. - */ - input = xmlNewEntityInputStream(ctxt, entity); - if (xmlPushInput(ctxt, input) < 0) - return; - - /* - * Get the 4 first bytes and decode the charset - * if enc != XML_CHAR_ENCODING_NONE - * plug some encoding conversion routines. - * Note that, since we may have some non-UTF8 - * encoding (like UTF16, bug 135229), the 'length' - * is not known, but we can calculate based upon - * the amount of data in the buffer. - */ - GROW - if (ctxt->instate == XML_PARSER_EOF) - return; - if ((ctxt->input->end - ctxt->input->cur)>=4) { - start[0] = RAW; - start[1] = NXT(1); - start[2] = NXT(2); - start[3] = NXT(3); - enc = xmlDetectCharEncoding(start, 4); - if (enc != XML_CHAR_ENCODING_NONE) { - xmlSwitchEncoding(ctxt, enc); - } - } - - if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && - (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) && - (IS_BLANK_CH(NXT(5)))) { - xmlParseTextDecl(ctxt); - } - } else { - xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER, - "PEReference: %s is not a parameter entity\n", - name); - } - } - } else { - xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL); - } - } -} - -/* - * Macro used to grow the current buffer. - * buffer##_size is expected to be a size_t - * mem_error: is expected to handle memory allocation failures - */ -#define growBuffer(buffer, n) { \ - xmlChar *tmp; \ - size_t new_size = buffer##_size * 2 + n; \ - if (new_size < buffer##_size) goto mem_error; \ - tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ - if (tmp == NULL) goto mem_error; \ - buffer = tmp; \ - buffer##_size = new_size; \ -} - -/** - * xmlStringLenDecodeEntities: - * @ctxt: the parser context - * @str: the input string - * @len: the string length - * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF - * @end: an end marker xmlChar, 0 if none - * @end2: an end marker xmlChar, 0 if none - * @end3: an end marker xmlChar, 0 if none - * - * Takes a entity string content and process to do the adequate substitutions. - * - * [67] Reference ::= EntityRef | CharRef - * - * [69] PEReference ::= '%' Name ';' - * - * Returns A newly allocated string with the substitution done. The caller - * must deallocate it ! - */ -xmlChar * -xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, - int what, xmlChar end, xmlChar end2, xmlChar end3) { - xmlChar *buffer = NULL; - size_t buffer_size = 0; - size_t nbchars = 0; - - xmlChar *current = NULL; - xmlChar *rep = NULL; - const xmlChar *last; - xmlEntityPtr ent; - int c,l; - - if ((ctxt == NULL) || (str == NULL) || (len < 0)) - return(NULL); - last = str + len; - - if (((ctxt->depth > 40) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) || - (ctxt->depth > 1024)) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); - return(NULL); - } - - /* - * allocate a translation buffer. - */ - buffer_size = XML_PARSER_BIG_BUFFER_SIZE; - buffer = (xmlChar *) xmlMallocAtomic(buffer_size); - if (buffer == NULL) goto mem_error; - - /* - * OK loop until we reach one of the ending char or a size limit. - * we are operating on already parsed values. - */ - if (str < last) - c = CUR_SCHAR(str, l); - else - c = 0; - while ((c != 0) && (c != end) && /* non input consuming loop */ - (c != end2) && (c != end3)) { - - if (c == 0) break; - if ((c == '&') && (str[1] == '#')) { - int val = xmlParseStringCharRef(ctxt, &str); - if (val != 0) { - COPY_BUF(0,buffer,nbchars,val); - } - if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); - } - } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { - if (xmlParserDebugEntities) - xmlGenericError(xmlGenericErrorContext, - "String decoding Entity Reference: %.30s\n", - str); - ent = xmlParseStringEntityRef(ctxt, &str); - if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || - (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) - goto int_error; - xmlParserEntityCheck(ctxt, 0, ent, 0); - if (ent != NULL) - ctxt->nbentities += ent->checked / 2; - if ((ent != NULL) && - (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (ent->content != NULL) { - COPY_BUF(0,buffer,nbchars,ent->content[0]); - if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); - } - } else { - xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, - "predefined entity has no content\n"); - } - } else if ((ent != NULL) && (ent->content != NULL)) { - ctxt->depth++; - rep = xmlStringDecodeEntities(ctxt, ent->content, what, - 0, 0, 0); - ctxt->depth--; - - if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) || - (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR)) - goto int_error; - - if (rep != NULL) { - current = rep; - while (*current != 0) { /* non input consuming loop */ - buffer[nbchars++] = *current++; - if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) - goto int_error; - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); - } - } - xmlFree(rep); - rep = NULL; - } - } else if (ent != NULL) { - int i = xmlStrlen(ent->name); - const xmlChar *cur = ent->name; - - buffer[nbchars++] = '&'; - if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { - growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); - } - for (;i > 0;i--) - buffer[nbchars++] = *cur++; - buffer[nbchars++] = ';'; - } - } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { - if (xmlParserDebugEntities) - xmlGenericError(xmlGenericErrorContext, - "String decoding PE Reference: %.30s\n", str); - ent = xmlParseStringPEReference(ctxt, &str); - if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) - goto int_error; - xmlParserEntityCheck(ctxt, 0, ent, 0); - if (ent != NULL) - ctxt->nbentities += ent->checked / 2; - if (ent != NULL) { - if (ent->content == NULL) { - /* - * Note: external parsed entities will not be loaded, - * it is not required for a non-validating parser to - * complete external PEreferences coming from the - * internal subset - */ - if (((ctxt->options & XML_PARSE_NOENT) != 0) || - ((ctxt->options & XML_PARSE_DTDVALID) != 0) || - (ctxt->validate != 0)) { - xmlLoadEntityContent(ctxt, ent); - } else { - xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, - "not validating will not read content for PE entity %s\n", - ent->name, NULL); - } - } - ctxt->depth++; - rep = xmlStringDecodeEntities(ctxt, ent->content, what, - 0, 0, 0); - ctxt->depth--; - if (rep != NULL) { - current = rep; - while (*current != 0) { /* non input consuming loop */ - buffer[nbchars++] = *current++; - if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - if (xmlParserEntityCheck(ctxt, nbchars, ent, 0)) - goto int_error; - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); - } - } - xmlFree(rep); - rep = NULL; - } - } - } else { - COPY_BUF(l,buffer,nbchars,c); - str += l; - if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { - growBuffer(buffer, XML_PARSER_BUFFER_SIZE); - } - } - if (str < last) - c = CUR_SCHAR(str, l); - else - c = 0; - } - buffer[nbchars] = 0; - return(buffer); - -mem_error: - xmlErrMemory(ctxt, NULL); -int_error: - if (rep != NULL) - xmlFree(rep); - if (buffer != NULL) - xmlFree(buffer); - return(NULL); -} - -/** - * xmlStringDecodeEntities: - * @ctxt: the parser context - * @str: the input string - * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF - * @end: an end marker xmlChar, 0 if none - * @end2: an end marker xmlChar, 0 if none - * @end3: an end marker xmlChar, 0 if none - * - * Takes a entity string content and process to do the adequate substitutions. - * - * [67] Reference ::= EntityRef | CharRef - * - * [69] PEReference ::= '%' Name ';' - * - * Returns A newly allocated string with the substitution done. The caller - * must deallocate it ! - */ -xmlChar * -xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, - xmlChar end, xmlChar end2, xmlChar end3) { - if ((ctxt == NULL) || (str == NULL)) return(NULL); - return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what, - end, end2, end3)); -} - -/************************************************************************ - * * - * Commodity functions, cleanup needed ? * - * * - ************************************************************************/ - -/** - * areBlanks: - * @ctxt: an XML parser context - * @str: a xmlChar * - * @len: the size of @str - * @blank_chars: we know the chars are blanks - * - * Is this a sequence of blank chars that one can ignore ? - * - * Returns 1 if ignorable 0 otherwise. - */ - -static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, - int blank_chars) { - int i, ret; - xmlNodePtr lastChild; - - /* - * Don't spend time trying to differentiate them, the same callback is - * used ! - */ - if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) - return(0); - - /* - * Check for xml:space value. - */ - if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || - (*(ctxt->space) == -2)) - return(0); - - /* - * Check that the string is made of blanks - */ - if (blank_chars == 0) { - for (i = 0;i < len;i++) - if (!(IS_BLANK_CH(str[i]))) return(0); - } - - /* - * Look if the element is mixed content in the DTD if available - */ - if (ctxt->node == NULL) return(0); - if (ctxt->myDoc != NULL) { - ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); - if (ret == 0) return(1); - if (ret == 1) return(0); - } - - /* - * Otherwise, heuristic :-\ - */ - if ((RAW != '<') && (RAW != 0xD)) return(0); - if ((ctxt->node->children == NULL) && - (RAW == '<') && (NXT(1) == '/')) return(0); - - lastChild = xmlGetLastChild(ctxt->node); - if (lastChild == NULL) { - if ((ctxt->node->type != XML_ELEMENT_NODE) && - (ctxt->node->content != NULL)) return(0); - } else if (xmlNodeIsText(lastChild)) - return(0); - else if ((ctxt->node->children != NULL) && - (xmlNodeIsText(ctxt->node->children))) - return(0); - return(1); -} - -/************************************************************************ - * * - * Extra stuff for namespace support * - * Relates to http://www.w3.org/TR/WD-xml-names * - * * - ************************************************************************/ - -/** - * xmlSplitQName: - * @ctxt: an XML parser context - * @name: an XML parser context - * @prefix: a xmlChar ** - * - * parse an UTF8 encoded XML qualified name string - * - * [NS 5] QName ::= (Prefix ':')? LocalPart - * - * [NS 6] Prefix ::= NCName - * - * [NS 7] LocalPart ::= NCName - * - * Returns the local part, and prefix is updated - * to get the Prefix if any. - */ - -xmlChar * -xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { - xmlChar buf[XML_MAX_NAMELEN + 5]; - xmlChar *buffer = NULL; - int len = 0; - int max = XML_MAX_NAMELEN; - xmlChar *ret = NULL; - const xmlChar *cur = name; - int c; - - if (prefix == NULL) return(NULL); - *prefix = NULL; - - if (cur == NULL) return(NULL); - -#ifndef XML_XML_NAMESPACE - /* xml: prefix is not really a namespace */ - if ((cur[0] == 'x') && (cur[1] == 'm') && - (cur[2] == 'l') && (cur[3] == ':')) - return(xmlStrdup(name)); -#endif - - /* nasty but well=formed */ - if (cur[0] == ':') - return(xmlStrdup(name)); - - c = *cur++; - while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ - buf[len++] = c; - c = *cur++; - } - if (len >= max) { - /* - * Okay someone managed to make a huge name, so he's ready to pay - * for the processing speed. - */ - max = len * 2; - - buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); - if (buffer == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - memcpy(buffer, buf, len); - while ((c != 0) && (c != ':')) { /* tested bigname.xml */ - if (len + 10 > max) { - xmlChar *tmp; - - max *= 2; - tmp = (xmlChar *) xmlRealloc(buffer, - max * sizeof(xmlChar)); - if (tmp == NULL) { - xmlFree(buffer); - xmlErrMemory(ctxt, NULL); - return(NULL); - } - buffer = tmp; - } - buffer[len++] = c; - c = *cur++; - } - buffer[len] = 0; - } - - if ((c == ':') && (*cur == 0)) { - if (buffer != NULL) - xmlFree(buffer); - *prefix = NULL; - return(xmlStrdup(name)); - } - - if (buffer == NULL) - ret = xmlStrndup(buf, len); - else { - ret = buffer; - buffer = NULL; - max = XML_MAX_NAMELEN; - } - - - if (c == ':') { - c = *cur; - *prefix = ret; - if (c == 0) { - return(xmlStrndup(BAD_CAST "", 0)); - } - len = 0; - - /* - * Check that the first character is proper to start - * a new name - */ - if (!(((c >= 0x61) && (c <= 0x7A)) || - ((c >= 0x41) && (c <= 0x5A)) || - (c == '_') || (c == ':'))) { - int l; - int first = CUR_SCHAR(cur, l); - - if (!IS_LETTER(first) && (first != '_')) { - xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, - "Name %s is not XML Namespace compliant\n", - name); - } - } - cur++; - - while ((c != 0) && (len < max)) { /* tested bigname2.xml */ - buf[len++] = c; - c = *cur++; - } - if (len >= max) { - /* - * Okay someone managed to make a huge name, so he's ready to pay - * for the processing speed. - */ - max = len * 2; - - buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); - if (buffer == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - memcpy(buffer, buf, len); - while (c != 0) { /* tested bigname2.xml */ - if (len + 10 > max) { - xmlChar *tmp; - - max *= 2; - tmp = (xmlChar *) xmlRealloc(buffer, - max * sizeof(xmlChar)); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(buffer); - return(NULL); - } - buffer = tmp; - } - buffer[len++] = c; - c = *cur++; - } - buffer[len] = 0; - } - - if (buffer == NULL) - ret = xmlStrndup(buf, len); - else { - ret = buffer; - } - } - - return(ret); -} - -/************************************************************************ - * * - * The parser itself * - * Relates to http://www.w3.org/TR/REC-xml * - * * - ************************************************************************/ - -/************************************************************************ - * * - * Routines to parse Name, NCName and NmToken * - * * - ************************************************************************/ -#ifdef DEBUG -static unsigned long nbParseName = 0; -static unsigned long nbParseNmToken = 0; -static unsigned long nbParseNCName = 0; -static unsigned long nbParseNCNameComplex = 0; -static unsigned long nbParseNameComplex = 0; -static unsigned long nbParseStringName = 0; -#endif - -/* - * The two following functions are related to the change of accepted - * characters for Name and NmToken in the Revision 5 of XML-1.0 - * They correspond to the modified production [4] and the new production [4a] - * changes in that revision. Also note that the macros used for the - * productions Letter, Digit, CombiningChar and Extender are not needed - * anymore. - * We still keep compatibility to pre-revision5 parsing semantic if the - * new XML_PARSE_OLD10 option is given to the parser. - */ -static int -xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { - if ((ctxt->options & XML_PARSE_OLD10) == 0) { - /* - * Use the new checks of production [4] [4a] amd [5] of the - * Update 5 of XML-1.0 - */ - if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ - (((c >= 'a') && (c <= 'z')) || - ((c >= 'A') && (c <= 'Z')) || - (c == '_') || (c == ':') || - ((c >= 0xC0) && (c <= 0xD6)) || - ((c >= 0xD8) && (c <= 0xF6)) || - ((c >= 0xF8) && (c <= 0x2FF)) || - ((c >= 0x370) && (c <= 0x37D)) || - ((c >= 0x37F) && (c <= 0x1FFF)) || - ((c >= 0x200C) && (c <= 0x200D)) || - ((c >= 0x2070) && (c <= 0x218F)) || - ((c >= 0x2C00) && (c <= 0x2FEF)) || - ((c >= 0x3001) && (c <= 0xD7FF)) || - ((c >= 0xF900) && (c <= 0xFDCF)) || - ((c >= 0xFDF0) && (c <= 0xFFFD)) || - ((c >= 0x10000) && (c <= 0xEFFFF)))) - return(1); - } else { - if (IS_LETTER(c) || (c == '_') || (c == ':')) - return(1); - } - return(0); -} - -static int -xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { - if ((ctxt->options & XML_PARSE_OLD10) == 0) { - /* - * Use the new checks of production [4] [4a] amd [5] of the - * Update 5 of XML-1.0 - */ - if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ - (((c >= 'a') && (c <= 'z')) || - ((c >= 'A') && (c <= 'Z')) || - ((c >= '0') && (c <= '9')) || /* !start */ - (c == '_') || (c == ':') || - (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ - ((c >= 0xC0) && (c <= 0xD6)) || - ((c >= 0xD8) && (c <= 0xF6)) || - ((c >= 0xF8) && (c <= 0x2FF)) || - ((c >= 0x300) && (c <= 0x36F)) || /* !start */ - ((c >= 0x370) && (c <= 0x37D)) || - ((c >= 0x37F) && (c <= 0x1FFF)) || - ((c >= 0x200C) && (c <= 0x200D)) || - ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ - ((c >= 0x2070) && (c <= 0x218F)) || - ((c >= 0x2C00) && (c <= 0x2FEF)) || - ((c >= 0x3001) && (c <= 0xD7FF)) || - ((c >= 0xF900) && (c <= 0xFDCF)) || - ((c >= 0xFDF0) && (c <= 0xFFFD)) || - ((c >= 0x10000) && (c <= 0xEFFFF)))) - return(1); - } else { - if ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) - return(1); - } - return(0); -} - -static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, - int *len, int *alloc, int normalize); - -static const xmlChar * -xmlParseNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - -#ifdef DEBUG - nbParseNameComplex++; -#endif - - /* - * Handler for more complex cases - */ - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - if ((ctxt->options & XML_PARSE_OLD10) == 0) { - /* - * Use the new checks of production [4] [4a] amd [5] of the - * Update 5 of XML-1.0 - */ - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!(((c >= 'a') && (c <= 'z')) || - ((c >= 'A') && (c <= 'Z')) || - (c == '_') || (c == ':') || - ((c >= 0xC0) && (c <= 0xD6)) || - ((c >= 0xD8) && (c <= 0xF6)) || - ((c >= 0xF8) && (c <= 0x2FF)) || - ((c >= 0x370) && (c <= 0x37D)) || - ((c >= 0x37F) && (c <= 0x1FFF)) || - ((c >= 0x200C) && (c <= 0x200D)) || - ((c >= 0x2070) && (c <= 0x218F)) || - ((c >= 0x2C00) && (c <= 0x2FEF)) || - ((c >= 0x3001) && (c <= 0xD7FF)) || - ((c >= 0xF900) && (c <= 0xFDCF)) || - ((c >= 0xFDF0) && (c <= 0xFFFD)) || - ((c >= 0x10000) && (c <= 0xEFFFF))))) { - return(NULL); - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ - (((c >= 'a') && (c <= 'z')) || - ((c >= 'A') && (c <= 'Z')) || - ((c >= '0') && (c <= '9')) || /* !start */ - (c == '_') || (c == ':') || - (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ - ((c >= 0xC0) && (c <= 0xD6)) || - ((c >= 0xD8) && (c <= 0xF6)) || - ((c >= 0xF8) && (c <= 0x2FF)) || - ((c >= 0x300) && (c <= 0x36F)) || /* !start */ - ((c >= 0x370) && (c <= 0x37D)) || - ((c >= 0x37F) && (c <= 0x1FFF)) || - ((c >= 0x200C) && (c <= 0x200D)) || - ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ - ((c >= 0x2070) && (c <= 0x218F)) || - ((c >= 0x2C00) && (c <= 0x2FEF)) || - ((c >= 0x3001) && (c <= 0xD7FF)) || - ((c >= 0xF900) && (c <= 0xFDCF)) || - ((c >= 0xFDF0) && (c <= 0xFFFD)) || - ((c >= 0x10000) && (c <= 0xEFFFF)) - )) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - } else { - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_') && - (c != ':'))) { - return(NULL); - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - if (c == 0) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - } - } - } - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); - return(NULL); - } - if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - -/** - * xmlParseName: - * @ctxt: an XML parser context - * - * parse an XML name. - * - * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | - * CombiningChar | Extender - * - * [5] Name ::= (Letter | '_' | ':') (NameChar)* - * - * [6] Names ::= Name (#x20 Name)* - * - * Returns the Name parsed or NULL - */ - -const xmlChar * -xmlParseName(xmlParserCtxtPtr ctxt) { - const xmlChar *in; - const xmlChar *ret; - int count = 0; - - GROW; - -#ifdef DEBUG - nbParseName++; -#endif - - /* - * Accelerator for simple ASCII names - */ - in = ctxt->input->cur; - if (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - (*in == '_') || (*in == ':')) { - in++; - while (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - ((*in >= 0x30) && (*in <= 0x39)) || - (*in == '_') || (*in == '-') || - (*in == ':') || (*in == '.')) - in++; - if ((*in > 0) && (*in < 0x80)) { - count = in - ctxt->input->cur; - if ((count > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); - return(NULL); - } - ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); - ctxt->input->cur = in; - ctxt->nbChars += count; - ctxt->input->col += count; - if (ret == NULL) - xmlErrMemory(ctxt, NULL); - return(ret); - } - } - /* accelerator for special cases */ - return(xmlParseNameComplex(ctxt)); -} - -static const xmlChar * -xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - size_t startPosition = 0; - -#ifdef DEBUG - nbParseNCNameComplex++; -#endif - - /* - * Handler for more complex cases - */ - GROW; - startPosition = CUR_PTR - BASE_PTR; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - (xmlIsNameChar(ctxt, c) && (c != ':'))) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); - return(NULL); - } - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - if (c == 0) { - count = 0; - /* - * when shrinking to extend the buffer we really need to preserve - * the part of the name we already parsed. Hence rolling back - * by current lenght. - */ - ctxt->input->cur -= l; - GROW; - ctxt->input->cur += l; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - } - } - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); - return(NULL); - } - return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len)); -} - -/** - * xmlParseNCName: - * @ctxt: an XML parser context - * @len: length of the string parsed - * - * parse an XML name. - * - * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | - * CombiningChar | Extender - * - * [5NS] NCName ::= (Letter | '_') (NCNameChar)* - * - * Returns the Name parsed or NULL - */ - -static const xmlChar * -xmlParseNCName(xmlParserCtxtPtr ctxt) { - const xmlChar *in, *e; - const xmlChar *ret; - int count = 0; - -#ifdef DEBUG - nbParseNCName++; -#endif - - /* - * Accelerator for simple ASCII names - */ - in = ctxt->input->cur; - e = ctxt->input->end; - if ((((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - (*in == '_')) && (in < e)) { - in++; - while ((((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - ((*in >= 0x30) && (*in <= 0x39)) || - (*in == '_') || (*in == '-') || - (*in == '.')) && (in < e)) - in++; - if (in >= e) - goto complex; - if ((*in > 0) && (*in < 0x80)) { - count = in - ctxt->input->cur; - if ((count > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); - return(NULL); - } - ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); - ctxt->input->cur = in; - ctxt->nbChars += count; - ctxt->input->col += count; - if (ret == NULL) { - xmlErrMemory(ctxt, NULL); - } - return(ret); - } - } -complex: - return(xmlParseNCNameComplex(ctxt)); -} - -/** - * xmlParseNameAndCompare: - * @ctxt: an XML parser context - * - * parse an XML name and compares for match - * (specialized for endtag parsing) - * - * Returns NULL for an illegal name, (xmlChar*) 1 for success - * and the name for mismatch - */ - -static const xmlChar * -xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { - register const xmlChar *cmp = other; - register const xmlChar *in; - const xmlChar *ret; - - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - - in = ctxt->input->cur; - while (*in != 0 && *in == *cmp) { - ++in; - ++cmp; - ctxt->input->col++; - } - if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { - /* success */ - ctxt->input->cur = in; - return (const xmlChar*) 1; - } - /* failure (or end of input buffer), check with full function */ - ret = xmlParseName (ctxt); - /* strings coming from the dictionary direct compare possible */ - if (ret == other) { - return (const xmlChar*) 1; - } - return ret; -} - -/** - * xmlParseStringName: - * @ctxt: an XML parser context - * @str: a pointer to the string pointer (IN/OUT) - * - * parse an XML name. - * - * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | - * CombiningChar | Extender - * - * [5] Name ::= (Letter | '_' | ':') (NameChar)* - * - * [6] Names ::= Name (#x20 Name)* - * - * Returns the Name parsed or NULL. The @str pointer - * is updated to the current location in the string. - */ - -static xmlChar * -xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { - xmlChar buf[XML_MAX_NAMELEN + 5]; - const xmlChar *cur = *str; - int len = 0, l; - int c; - -#ifdef DEBUG - nbParseStringName++; -#endif - - c = CUR_SCHAR(cur, l); - if (!xmlIsNameStartChar(ctxt, c)) { - return(NULL); - } - - COPY_BUF(l,buf,len,c); - cur += l; - c = CUR_SCHAR(cur, l); - while (xmlIsNameChar(ctxt, c)) { - COPY_BUF(l,buf,len,c); - cur += l; - c = CUR_SCHAR(cur, l); - if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ - /* - * Okay someone managed to make a huge name, so he's ready to pay - * for the processing speed. - */ - xmlChar *buffer; - int max = len * 2; - - buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); - if (buffer == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - memcpy(buffer, buf, len); - while (xmlIsNameChar(ctxt, c)) { - if (len + 10 > max) { - xmlChar *tmp; - - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); - xmlFree(buffer); - return(NULL); - } - max *= 2; - tmp = (xmlChar *) xmlRealloc(buffer, - max * sizeof(xmlChar)); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(buffer); - return(NULL); - } - buffer = tmp; - } - COPY_BUF(l,buffer,len,c); - cur += l; - c = CUR_SCHAR(cur, l); - } - buffer[len] = 0; - *str = cur; - return(buffer); - } - } - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); - return(NULL); - } - *str = cur; - return(xmlStrndup(buf, len)); -} - -/** - * xmlParseNmtoken: - * @ctxt: an XML parser context - * - * parse an XML Nmtoken. - * - * [7] Nmtoken ::= (NameChar)+ - * - * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* - * - * Returns the Nmtoken parsed or NULL - */ - -xmlChar * -xmlParseNmtoken(xmlParserCtxtPtr ctxt) { - xmlChar buf[XML_MAX_NAMELEN + 5]; - int len = 0, l; - int c; - int count = 0; - -#ifdef DEBUG - nbParseNmToken++; -#endif - - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - - while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - } - COPY_BUF(l,buf,len,c); - NEXTL(l); - c = CUR_CHAR(l); - if (c == 0) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return(NULL); - c = CUR_CHAR(l); - } - if (len >= XML_MAX_NAMELEN) { - /* - * Okay someone managed to make a huge token, so he's ready to pay - * for the processing speed. - */ - xmlChar *buffer; - int max = len * 2; - - buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); - if (buffer == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - memcpy(buffer, buf, len); - while (xmlIsNameChar(ctxt, c)) { - if (count++ > XML_PARSER_CHUNK_SIZE) { - count = 0; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buffer); - return(NULL); - } - } - if (len + 10 > max) { - xmlChar *tmp; - - if ((max > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); - xmlFree(buffer); - return(NULL); - } - max *= 2; - tmp = (xmlChar *) xmlRealloc(buffer, - max * sizeof(xmlChar)); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(buffer); - return(NULL); - } - buffer = tmp; - } - COPY_BUF(l,buffer,len,c); - NEXTL(l); - c = CUR_CHAR(l); - } - buffer[len] = 0; - return(buffer); - } - } - if (len == 0) - return(NULL); - if ((len > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); - return(NULL); - } - return(xmlStrndup(buf, len)); -} - -/** - * xmlParseEntityValue: - * @ctxt: an XML parser context - * @orig: if non-NULL store a copy of the original entity value - * - * parse a value for ENTITY declarations - * - * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | - * "'" ([^%&'] | PEReference | Reference)* "'" - * - * Returns the EntityValue parsed with reference substituted or NULL - */ - -xmlChar * -xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { - xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; - int c, l; - xmlChar stop; - xmlChar *ret = NULL; - const xmlChar *cur = NULL; - xmlParserInputPtr input; - - if (RAW == '"') stop = '"'; - else if (RAW == '\'') stop = '\''; - else { - xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); - return(NULL); - } - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); - if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - - /* - * The content of the entity definition is copied in a buffer. - */ - - ctxt->instate = XML_PARSER_ENTITY_VALUE; - input = ctxt->input; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - NEXT; - c = CUR_CHAR(l); - /* - * NOTE: 4.4.5 Included in Literal - * When a parameter entity reference appears in a literal entity - * value, ... a single or double quote character in the replacement - * text is always treated as a normal data character and will not - * terminate the literal. - * In practice it means we stop the loop only when back at parsing - * the initial entity and the quote is found - */ - while (((IS_CHAR(c)) && ((c != stop) || /* checked */ - (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { - if (len + 5 >= size) { - xmlChar *tmp; - - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(buf); - return(NULL); - } - buf = tmp; - } - COPY_BUF(l,buf,len,c); - NEXTL(l); - /* - * Pop-up of finished entities. - */ - while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ - xmlPopInput(ctxt); - - GROW; - c = CUR_CHAR(l); - if (c == 0) { - GROW; - c = CUR_CHAR(l); - } - } - buf[len] = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - - /* - * Raise problem w.r.t. '&' and '%' being used in non-entities - * reference constructs. Note Charref will be handled in - * xmlStringDecodeEntities() - */ - cur = buf; - while (*cur != 0) { /* non input consuming */ - if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { - xmlChar *name; - xmlChar tmp = *cur; - - cur++; - name = xmlParseStringName(ctxt, &cur); - if ((name == NULL) || (*cur != ';')) { - xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, - "EntityValue: '%c' forbidden except for entities references\n", - tmp); - } - if ((tmp == '%') && (ctxt->inSubset == 1) && - (ctxt->inputNr == 1)) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); - } - if (name != NULL) - xmlFree(name); - if (*cur == 0) - break; - } - cur++; - } - - /* - * Then PEReference entities are substituted. - */ - if (c != stop) { - xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); - xmlFree(buf); - } else { - NEXT; - /* - * NOTE: 4.4.7 Bypassed - * When a general entity reference appears in the EntityValue in - * an entity declaration, it is bypassed and left as is. - * so XML_SUBSTITUTE_REF is not set here. - */ - ++ctxt->depth; - ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF, - 0, 0, 0); - --ctxt->depth; - if (orig != NULL) - *orig = buf; - else - xmlFree(buf); - } - - return(ret); -} - -/** - * xmlParseAttValueComplex: - * @ctxt: an XML parser context - * @len: the resulting attribute len - * @normalize: wether to apply the inner normalization - * - * parse a value for an attribute, this is the fallback function - * of xmlParseAttValue() when the attribute parsing requires handling - * of non-ASCII characters, or normalization compaction. - * - * Returns the AttValue parsed or NULL. The value has to be freed by the caller. - */ -static xmlChar * -xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { - xmlChar limit = 0; - xmlChar *buf = NULL; - xmlChar *rep = NULL; - size_t len = 0; - size_t buf_size = 0; - int c, l, in_space = 0; - xmlChar *current = NULL; - xmlEntityPtr ent; - - if (NXT(0) == '"') { - ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; - limit = '"'; - NEXT; - } else if (NXT(0) == '\'') { - limit = '\''; - ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; - NEXT; - } else { - xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); - return(NULL); - } - - /* - * allocate a translation buffer. - */ - buf_size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(buf_size); - if (buf == NULL) goto mem_error; - - /* - * OK loop until we reach one of the ending char or a size limit. - */ - c = CUR_CHAR(l); - while (((NXT(0) != limit) && /* checked */ - (IS_CHAR(c)) && (c != '<')) && - (ctxt->instate != XML_PARSER_EOF)) { - /* - * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE - * special option is given - */ - if ((len > XML_MAX_TEXT_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, - "AttValue length too long\n"); - goto mem_error; - } - if (c == 0) break; - if (c == '&') { - in_space = 0; - if (NXT(1) == '#') { - int val = xmlParseCharRef(ctxt); - - if (val == '&') { - if (ctxt->replaceEntities) { - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - buf[len++] = '&'; - } else { - /* - * The reparsing will be done in xmlStringGetNodeList() - * called by the attribute() function in SAX.c - */ - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - buf[len++] = '&'; - buf[len++] = '#'; - buf[len++] = '3'; - buf[len++] = '8'; - buf[len++] = ';'; - } - } else if (val != 0) { - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - len += xmlCopyChar(0, &buf[len], val); - } - } else { - ent = xmlParseEntityRef(ctxt); - ctxt->nbentities++; - if (ent != NULL) - ctxt->nbentities += ent->owner; - if ((ent != NULL) && - (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - if ((ctxt->replaceEntities == 0) && - (ent->content[0] == '&')) { - buf[len++] = '&'; - buf[len++] = '#'; - buf[len++] = '3'; - buf[len++] = '8'; - buf[len++] = ';'; - } else { - buf[len++] = ent->content[0]; - } - } else if ((ent != NULL) && - (ctxt->replaceEntities != 0)) { - if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { - ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, - 0, 0, 0); - --ctxt->depth; - if (rep != NULL) { - current = rep; - while (*current != 0) { /* non input consuming */ - if ((*current == 0xD) || (*current == 0xA) || - (*current == 0x9)) { - buf[len++] = 0x20; - current++; - } else - buf[len++] = *current++; - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - } - xmlFree(rep); - rep = NULL; - } - } else { - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - if (ent->content != NULL) - buf[len++] = ent->content[0]; - } - } else if (ent != NULL) { - int i = xmlStrlen(ent->name); - const xmlChar *cur = ent->name; - - /* - * This may look absurd but is needed to detect - * entities problems - */ - if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && - (ent->content != NULL) && (ent->checked == 0)) { - unsigned long oldnbent = ctxt->nbentities; - - ++ctxt->depth; - rep = xmlStringDecodeEntities(ctxt, ent->content, - XML_SUBSTITUTE_REF, 0, 0, 0); - --ctxt->depth; - - ent->checked = (ctxt->nbentities - oldnbent + 1) * 2; - if (rep != NULL) { - if (xmlStrchr(rep, '<')) - ent->checked |= 1; - xmlFree(rep); - rep = NULL; - } - } - - /* - * Just output the reference - */ - buf[len++] = '&'; - while (len + i + 10 > buf_size) { - growBuffer(buf, i + 10); - } - for (;i > 0;i--) - buf[len++] = *cur++; - buf[len++] = ';'; - } - } - } else { - if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { - if ((len != 0) || (!normalize)) { - if ((!normalize) || (!in_space)) { - COPY_BUF(l,buf,len,0x20); - while (len + 10 > buf_size) { - growBuffer(buf, 10); - } - } - in_space = 1; - } - } else { - in_space = 0; - COPY_BUF(l,buf,len,c); - if (len + 10 > buf_size) { - growBuffer(buf, 10); - } - } - NEXTL(l); - } - GROW; - c = CUR_CHAR(l); - } - if (ctxt->instate == XML_PARSER_EOF) - goto error; - - if ((in_space) && (normalize)) { - while ((len > 0) && (buf[len - 1] == 0x20)) len--; - } - buf[len] = 0; - if (RAW == '<') { - xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); - } else if (RAW != limit) { - if ((c != 0) && (!IS_CHAR(c))) { - xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, - "invalid character in attribute value\n"); - } else { - xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, - "AttValue: ' expected\n"); - } - } else - NEXT; - - /* - * There we potentially risk an overflow, don't allow attribute value of - * length more than INT_MAX it is a very reasonnable assumption ! - */ - if (len >= INT_MAX) { - xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, - "AttValue length too long\n"); - goto mem_error; - } - - if (attlen != NULL) *attlen = (int) len; - return(buf); - -mem_error: - xmlErrMemory(ctxt, NULL); -error: - if (buf != NULL) - xmlFree(buf); - if (rep != NULL) - xmlFree(rep); - return(NULL); -} - -/** - * xmlParseAttValue: - * @ctxt: an XML parser context - * - * parse a value for an attribute - * Note: the parser won't do substitution of entities here, this - * will be handled later in xmlStringGetNodeList - * - * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | - * "'" ([^<&'] | Reference)* "'" - * - * 3.3.3 Attribute-Value Normalization: - * Before the value of an attribute is passed to the application or - * checked for validity, the XML processor must normalize it as follows: - * - a character reference is processed by appending the referenced - * character to the attribute value - * - an entity reference is processed by recursively processing the - * replacement text of the entity - * - a whitespace character (#x20, #xD, #xA, #x9) is processed by - * appending #x20 to the normalized value, except that only a single - * #x20 is appended for a "#xD#xA" sequence that is part of an external - * parsed entity or the literal entity value of an internal parsed entity - * - other characters are processed by appending them to the normalized value - * If the declared value is not CDATA, then the XML processor must further - * process the normalized attribute value by discarding any leading and - * trailing space (#x20) characters, and by replacing sequences of space - * (#x20) characters by a single space (#x20) character. - * All attributes for which no declaration has been read should be treated - * by a non-validating parser as if declared CDATA. - * - * Returns the AttValue parsed or NULL. The value has to be freed by the caller. - */ - - -xmlChar * -xmlParseAttValue(xmlParserCtxtPtr ctxt) { - if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); - return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); -} - -/** - * xmlParseSystemLiteral: - * @ctxt: an XML parser context - * - * parse an XML Literal - * - * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") - * - * Returns the SystemLiteral parsed or NULL - */ - -xmlChar * -xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { - xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; - int cur, l; - xmlChar stop; - int state = ctxt->instate; - int count = 0; - - SHRINK; - if (RAW == '"') { - NEXT; - stop = '"'; - } else if (RAW == '\'') { - NEXT; - stop = '\''; - } else { - xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); - return(NULL); - } - - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); - if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - ctxt->instate = XML_PARSER_SYSTEM_LITERAL; - cur = CUR_CHAR(l); - while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ - if (len + 5 >= size) { - xmlChar *tmp; - - if ((size > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); - xmlFree(buf); - ctxt->instate = (xmlParserInputState) state; - return(NULL); - } - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); - if (tmp == NULL) { - xmlFree(buf); - xmlErrMemory(ctxt, NULL); - ctxt->instate = (xmlParserInputState) state; - return(NULL); - } - buf = tmp; - } - count++; - if (count > 50) { - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } - COPY_BUF(l,buf,len,cur); - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - GROW; - SHRINK; - cur = CUR_CHAR(l); - } - } - buf[len] = 0; - ctxt->instate = (xmlParserInputState) state; - if (!IS_CHAR(cur)) { - xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); - } else { - NEXT; - } - return(buf); -} - -/** - * xmlParsePubidLiteral: - * @ctxt: an XML parser context - * - * parse an XML public literal - * - * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" - * - * Returns the PubidLiteral parsed or NULL. - */ - -xmlChar * -xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { - xmlChar *buf = NULL; - int len = 0; - int size = XML_PARSER_BUFFER_SIZE; - xmlChar cur; - xmlChar stop; - int count = 0; - xmlParserInputState oldstate = ctxt->instate; - - SHRINK; - if (RAW == '"') { - NEXT; - stop = '"'; - } else if (RAW == '\'') { - NEXT; - stop = '\''; - } else { - xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); - return(NULL); - } - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); - if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - return(NULL); - } - ctxt->instate = XML_PARSER_PUBLIC_LITERAL; - cur = CUR; - while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ - if (len + 1 >= size) { - xmlChar *tmp; - - if ((size > XML_MAX_NAME_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); - xmlFree(buf); - return(NULL); - } - size *= 2; - tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); - if (tmp == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFree(buf); - return(NULL); - } - buf = tmp; - } - buf[len++] = cur; - count++; - if (count > 50) { - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return(NULL); - } - } - NEXT; - cur = CUR; - if (cur == 0) { - GROW; - SHRINK; - cur = CUR; - } - } - buf[len] = 0; - if (cur != stop) { - xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); - } else { - NEXT; - } - ctxt->instate = oldstate; - return(buf); -} - -static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata); - -/* - * used for the test in the inner loop of the char data testing - */ -static const unsigned char test_char_data[256] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ - 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -/** - * xmlParseCharData: - * @ctxt: an XML parser context - * @cdata: int indicating whether we are within a CDATA section - * - * parse a CharData section. - * if we are within a CDATA section ']]>' marks an end of section. - * - * The right angle bracket (>) may be represented using the string ">", - * and must, for compatibility, be escaped using ">" or a character - * reference when it appears in the string "]]>" in content, when that - * string is not marking the end of a CDATA section. - * - * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) - */ - -void -xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { - const xmlChar *in; - int nbchar = 0; - int line = ctxt->input->line; - int col = ctxt->input->col; - int ccol; - - SHRINK; - GROW; - /* - * Accelerated common case where input don't need to be - * modified before passing it to the handler. - */ - if (!cdata) { - in = ctxt->input->cur; - do { -get_more_space: - while (*in == 0x20) { in++; ctxt->input->col++; } - if (*in == 0xA) { - do { - ctxt->input->line++; ctxt->input->col = 1; - in++; - } while (*in == 0xA); - goto get_more_space; - } - if (*in == '<') { - nbchar = in - ctxt->input->cur; - if (nbchar > 0) { - const xmlChar *tmp = ctxt->input->cur; - ctxt->input->cur = in; - - if ((ctxt->sax != NULL) && - (ctxt->sax->ignorableWhitespace != - ctxt->sax->characters)) { - if (areBlanks(ctxt, tmp, nbchar, 1)) { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - tmp, nbchar); - } else { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, - tmp, nbchar); - if (*ctxt->space == -1) - *ctxt->space = -2; - } - } else if ((ctxt->sax != NULL) && - (ctxt->sax->characters != NULL)) { - ctxt->sax->characters(ctxt->userData, - tmp, nbchar); - } - } - return; - } - -get_more: - ccol = ctxt->input->col; - while (test_char_data[*in]) { - in++; - ccol++; - } - ctxt->input->col = ccol; - if (*in == 0xA) { - do { - ctxt->input->line++; ctxt->input->col = 1; - in++; - } while (*in == 0xA); - goto get_more; - } - if (*in == ']') { - if ((in[1] == ']') && (in[2] == '>')) { - xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); - ctxt->input->cur = in; - return; - } - in++; - ctxt->input->col++; - goto get_more; - } - nbchar = in - ctxt->input->cur; - if (nbchar > 0) { - if ((ctxt->sax != NULL) && - (ctxt->sax->ignorableWhitespace != - ctxt->sax->characters) && - (IS_BLANK_CH(*ctxt->input->cur))) { - const xmlChar *tmp = ctxt->input->cur; - ctxt->input->cur = in; - - if (areBlanks(ctxt, tmp, nbchar, 0)) { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - tmp, nbchar); - } else { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, - tmp, nbchar); - if (*ctxt->space == -1) - *ctxt->space = -2; - } - line = ctxt->input->line; - col = ctxt->input->col; - } else if (ctxt->sax != NULL) { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, - ctxt->input->cur, nbchar); - line = ctxt->input->line; - col = ctxt->input->col; - } - /* something really bad happened in the SAX callback */ - if (ctxt->instate != XML_PARSER_CONTENT) - return; - } - ctxt->input->cur = in; - if (*in == 0xD) { - in++; - if (*in == 0xA) { - ctxt->input->cur = in; - in++; - ctxt->input->line++; ctxt->input->col = 1; - continue; /* while */ - } - in--; - } - if (*in == '<') { - return; - } - if (*in == '&') { - return; - } - SHRINK; - GROW; - if (ctxt->instate == XML_PARSER_EOF) - return; - in = ctxt->input->cur; - } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); - nbchar = 0; - } - ctxt->input->line = line; - ctxt->input->col = col; - xmlParseCharDataComplex(ctxt, cdata); -} - -/** - * xmlParseCharDataComplex: - * @ctxt: an XML parser context - * @cdata: int indicating whether we are within a CDATA section - * - * parse a CharData section.this is the fallback function - * of xmlParseCharData() when the parsing requires handling - * of non-ASCII characters. - */ -static void -xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { - xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; - int nbchar = 0; - int cur, l; - int count = 0; - - SHRINK; - GROW; - cur = CUR_CHAR(l); - while ((cur != '<') && /* checked */ - (cur != '&') && - (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { - if ((cur == ']') && (NXT(1) == ']') && - (NXT(2) == '>')) { - if (cdata) break; - else { - xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); - } - } - COPY_BUF(l,buf,nbchar,cur); - if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { - buf[nbchar] = 0; - - /* - * OK the segment is to be consumed as chars. - */ - if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { - if (areBlanks(ctxt, buf, nbchar, 0)) { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - buf, nbchar); - } else { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, buf, nbchar); - if ((ctxt->sax->characters != - ctxt->sax->ignorableWhitespace) && - (*ctxt->space == -1)) - *ctxt->space = -2; - } - } - nbchar = 0; - /* something really bad happened in the SAX callback */ - if (ctxt->instate != XML_PARSER_CONTENT) - return; - } - count++; - if (count > 50) { - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) - return; - } - NEXTL(l); - cur = CUR_CHAR(l); - } - if (nbchar != 0) { - buf[nbchar] = 0; - /* - * OK the segment is to be consumed as chars. - */ - if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { - if (areBlanks(ctxt, buf, nbchar, 0)) { - if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); - } else { - if (ctxt->sax->characters != NULL) - ctxt->sax->characters(ctxt->userData, buf, nbchar); - if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && - (*ctxt->space == -1)) - *ctxt->space = -2; - } - } - } - if ((cur != 0) && (!IS_CHAR(cur))) { - /* Generate the error and skip the offending character */ - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "PCDATA invalid Char value %d\n", - cur); - NEXTL(l); - } -} - -/** - * xmlParseExternalID: - * @ctxt: an XML parser context - * @publicID: a xmlChar** receiving PubidLiteral - * @strict: indicate whether we should restrict parsing to only - * production [75], see NOTE below - * - * Parse an External ID or a Public ID - * - * NOTE: Productions [75] and [83] interact badly since [75] can generate - * 'PUBLIC' S PubidLiteral S SystemLiteral - * - * [75] ExternalID ::= 'SYSTEM' S SystemLiteral - * | 'PUBLIC' S PubidLiteral S SystemLiteral - * - * [83] PublicID ::= 'PUBLIC' S PubidLiteral - * - * Returns the function returns SystemLiteral and in the second - * case publicID receives PubidLiteral, is strict is off - * it is possible to return NULL and have publicID set. - */ - -xmlChar * -xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { - xmlChar *URI = NULL; - - SHRINK; - - *publicID = NULL; - if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { - SKIP(6); - if (!IS_BLANK_CH(CUR)) { - xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, - "Space required after 'SYSTEM'\n"); - } - SKIP_BLANKS; - URI = xmlParseSystemLiteral(ctxt); - if (URI == NULL) { - xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); - } - } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { - SKIP(6); - if (!IS_BLANK_CH(CUR)) { - xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, - "Space required after 'PUBLIC'\n"); - } - SKIP_BLANKS; - *publicID = xmlParsePubidLiteral(ctxt); - if (*publicID == NULL) { - xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); - } - if (strict) { - /* - * We don't handle [83] so "S SystemLiteral" is required. - */ - if (!IS_BLANK_CH(CUR)) { - xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, - "Space required after the Public Identifier\n"); - } - } else { - /* - * We handle [83] so we return immediately, if - * "S SystemLiteral" is not detected. From a purely parsing - * point of view that's a nice mess. - */ - const xmlChar *ptr; - GROW; - - ptr = CUR_PTR; - if (!IS_BLANK_CH(*ptr)) return(NULL); - - while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */ - if ((*ptr != '\'') && (*ptr != '"')) return(NULL); - } - SKIP_BLANKS; - URI = xmlParseSystemLiteral(ctxt); - if (URI == NULL) { - xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); - } - } - return(URI); -} - -/** - * xmlParseCommentComplex: - * @ctxt: an XML parser context - * @buf: the already parsed part of the buffer - * @len: number of bytes filles in the buffer - * @size: allocated size of the buffer - * - * Skip an XML (SGML) comment - * The spec says that "For compatibility, the string "--" (double-hyphen) - * must not occur within comments. " - * This is the slow routine in case the accelerator for ascii didn't work - * - * [15] Comment ::= '' - */ -static void -xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, - size_t len, size_t size) { - int q, ql; - int r, rl; - int cur, l; - size_t count = 0; - int inputid; - - inputid = ctxt->input->id; - - if (buf == NULL) { - len = 0; - size = XML_PARSER_BUFFER_SIZE; - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); - if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - return; - } - } - GROW; /* Assure there's enough input data */ - q = CUR_CHAR(ql); - if (q == 0) - goto not_terminated; - if (!IS_CHAR(q)) { - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "xmlParseComment: invalid xmlChar value %d\n", - q); - xmlFree (buf); - return; - } - NEXTL(ql); - r = CUR_CHAR(rl); - if (r == 0) - goto not_terminated; - if (!IS_CHAR(r)) { - xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, - "xmlParseComment: invalid xmlChar value %d\n", - q); - xmlFree (buf); - return; - } - NEXTL(rl); - cur = CUR_CHAR(l); - if (cur == 0) - goto not_terminated; - while (IS_CHAR(cur) && /* checked */ - ((cur != '>') || - (r != '-') || (q != '-'))) { - if ((r == '-') && (q == '-')) { - xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); - } - if ((len > XML_MAX_TEXT_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment too big found", NULL); - xmlFree (buf); - return; - } - if (len + 5 >= size) { - xmlChar *new_buf; - size_t new_size; - - new_size = size * 2; - new_buf = (xmlChar *) xmlRealloc(buf, new_size); - if (new_buf == NULL) { - xmlFree (buf); - xmlErrMemory(ctxt, NULL); - return; - } - buf = new_buf; - size = new_size; - } - COPY_BUF(ql,buf,len,q); - q = r; - ql = rl; - r = cur; - rl = l; - - count++; - if (count > 50) { - GROW; - count = 0; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } - } - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } - } - buf[len] = 0; - if (cur == 0) { - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment not terminated \n - * The spec says that "For compatibility, the string "--" (double-hyphen) - * must not occur within comments. " - * - * [15] Comment ::= '' - */ -void -xmlParseComment(xmlParserCtxtPtr ctxt) { - xmlChar *buf = NULL; - size_t size = XML_PARSER_BUFFER_SIZE; - size_t len = 0; - xmlParserInputState state; - const xmlChar *in; - size_t nbchar = 0; - int ccol; - int inputid; - - /* - * Check that there is a comment right here. - */ - if ((RAW != '<') || (NXT(1) != '!') || - (NXT(2) != '-') || (NXT(3) != '-')) return; - state = ctxt->instate; - ctxt->instate = XML_PARSER_COMMENT; - inputid = ctxt->input->id; - SKIP(4); - SHRINK; - GROW; - - /* - * Accelerated common case where input don't need to be - * modified before passing it to the handler. - */ - in = ctxt->input->cur; - do { - if (*in == 0xA) { - do { - ctxt->input->line++; ctxt->input->col = 1; - in++; - } while (*in == 0xA); - } -get_more: - ccol = ctxt->input->col; - while (((*in > '-') && (*in <= 0x7F)) || - ((*in >= 0x20) && (*in < '-')) || - (*in == 0x09)) { - in++; - ccol++; - } - ctxt->input->col = ccol; - if (*in == 0xA) { - do { - ctxt->input->line++; ctxt->input->col = 1; - in++; - } while (*in == 0xA); - goto get_more; - } - nbchar = in - ctxt->input->cur; - /* - * save current set of data - */ - if (nbchar > 0) { - if ((ctxt->sax != NULL) && - (ctxt->sax->comment != NULL)) { - if (buf == NULL) { - if ((*in == '-') && (in[1] == '-')) - size = nbchar + 1; - else - size = XML_PARSER_BUFFER_SIZE + nbchar; - buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar)); - if (buf == NULL) { - xmlErrMemory(ctxt, NULL); - ctxt->instate = state; - return; - } - len = 0; - } else if (len + nbchar + 1 >= size) { - xmlChar *new_buf; - size += len + nbchar + XML_PARSER_BUFFER_SIZE; - new_buf = (xmlChar *) xmlRealloc(buf, - size * sizeof(xmlChar)); - if (new_buf == NULL) { - xmlFree (buf); - xmlErrMemory(ctxt, NULL); - ctxt->instate = state; - return; - } - buf = new_buf; - } - memcpy(&buf[len], ctxt->input->cur, nbchar); - len += nbchar; - buf[len] = 0; - } - } - if ((len > XML_MAX_TEXT_LENGTH) && - ((ctxt->options & XML_PARSE_HUGE) == 0)) { - xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, - "Comment too big found", NULL); - xmlFree (buf); - return; - } - ctxt->input->cur = in; - if (*in == 0xA) { - in++; - ctxt->input->line++; ctxt->input->col = 1; - } - if (*in == 0xD) { - in++; - if (*in == 0xA) { - ctxt->input->cur = in; - in++; - ctxt->input->line++; ctxt->input->col = 1; - continue; /* while */ - } - in--; - } - SHRINK; - GROW; - if (ctxt->instate == XML_PARSER_EOF) { - xmlFree(buf); - return; - } - in = ctxt->input->cur; - if (*in == '-') { - if (in[1] == '-') { - if (in[2] == '>') { - if (ctxt->input->id != inputid) { - xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, - "comment doesn't start and stop in the same entity\n"); - } - SKIP(3); - if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && - (!ctxt->disableSAX)) { - if (buf != NULL) - ctxt->sax->comment(ctxt->userData, buf); - else - ctxt->sax->comment(ctxt->userData, BAD_CAST ""); - } - if (buf != NULL) - xmlFree(buf); - if (ctxt->instate != XML_PARSER_EOF) - ctxt->instate = state; - return; - } - if (buf != NULL) { - xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, - "Double hyphen within comment: " - "