| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928 |
- /*
- Copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any
- damages arising from the use of this software.
- Permission is granted to anyone to use this software for any
- purpose, including commercial applications, and to alter it and
- redistribute it freely, subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must
- not claim that you wrote the original software. If you use this
- software in a product, an acknowledgment in the product documentation
- would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and
- must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source
- distribution.
- */
- #include "tinyxml.h"
- #include <ctype.h>
- #include <strstream>
- using namespace std;
- //#define DEBUG_PARSER
- TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
- {
- { "&", 5, '&' },
- { "<", 4, '<' },
- { ">", 4, '>' },
- { """, 6, '\"' },
- { "'", 6, '\'' }
- };
- const char* TiXmlBase::SkipWhiteSpace( const char* p )
- {
- if ( !p || !*p )
- {
- return 0;
- }
- while ( p && *p )
- {
- if ( isspace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space.
- ++p;
- else
- break;
- }
- return p;
- }
- /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream* in, std::string* tag )
- {
- for( ;; )
- {
- if ( !in->good() ) return false;
- int c = in->peek();
- if ( !IsWhiteSpace( c ) )
- return true;
- *tag += in->get();
- }
- }
- /*static*/ bool TiXmlBase::StreamTo( std::istream* in, int character, std::string* tag )
- {
- while ( in->good() )
- {
- int c = in->peek();
- if ( c == character )
- return true;
- in->get();
- *tag += c;
- }
- return false;
- }
- const char* TiXmlBase::ReadName( const char* p, string* name )
- {
- *name = "";
- assert( p );
- // Names start with letters or underscores.
- // After that, they can be letters, underscores, numbers,
- // hyphens, or colons. (Colons are valid ony for namespaces,
- // but tinyxml can't tell namespaces from names.)
- if ( p && *p
- && ( isalpha( (unsigned char) *p ) || *p == '_' ) )
- {
- while( p && *p
- && ( isalnum( (unsigned char ) *p )
- || *p == '_'
- || *p == '-'
- || *p == ':' ) )
- {
- (*name) += *p;
- ++p;
- }
- return p;
- }
- return 0;
- }
- const char* TiXmlBase::GetEntity( const char* p, char* value )
- {
- // Presume an entity, and pull it out.
- string ent;
- int i;
- // Ignore the &#x entities.
- if ( strncmp( "&#x", p, 3 ) == 0 )
- {
- *value = *p;
- return p+1;
- }
- // Now try to match it.
- for( i=0; i<NUM_ENTITY; ++i )
- {
- if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
- {
- assert( strlen( entity[i].str ) == entity[i].strLength );
- *value = entity[i].chr;
- return ( p + entity[i].strLength );
- }
- }
- // So it wasn't an entity, its unrecognized, or something like that.
- *value = *p; // Don't put back the last one, since we return it!
- return p+1;
- }
- bool TiXmlBase::StringEqual( const char* p,
- const char* tag,
- bool ignoreCase )
- {
- assert( p );
- if ( !p || !*p )
- {
- assert( 0 );
- return false;
- }
- if ( tolower( *p ) == tolower( *tag ) )
- {
- const char* q = p;
- if (ignoreCase)
- {
- while ( *q && *tag && *q == *tag )
- {
- ++q;
- ++tag;
- }
- if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
- {
- return true;
- }
- }
- else
- {
- while ( *q && *tag && tolower( *q ) == tolower( *tag ) )
- {
- ++q;
- ++tag;
- }
- if ( *tag == 0 )
- {
- return true;
- }
- }
- }
- return false;
- }
- const char* TiXmlBase::ReadText( const char* p,
- string* text,
- bool trimWhiteSpace,
- const char* endTag,
- bool caseInsensitive )
- {
- *text = "";
- if ( !trimWhiteSpace // certain tags always keep whitespace
- || !condenseWhiteSpace ) // if true, whitespace is always kept
- {
- // Keep all the white space.
- while ( p && *p
- && !StringEqual( p, endTag, caseInsensitive )
- )
- {
- char c;
- p = GetChar( p, &c );
- text->append( &c, 1 );
- }
- }
- else
- {
- bool whitespace = false;
- // Remove leading white space:
- p = SkipWhiteSpace( p );
- while ( p && *p
- && !StringEqual( p, endTag, caseInsensitive ) )
- {
- if ( *p == '\r' || *p == '\n' )
- {
- whitespace = true;
- ++p;
- }
- else if ( isspace( *p ) )
- {
- whitespace = true;
- ++p;
- }
- else
- {
- // If we've found whitespace, add it before the
- // new character. Any whitespace just becomes a space.
- if ( whitespace )
- {
- text->append( " ", 1 );
- whitespace = false;
- }
- char c;
- p = GetChar( p, &c );
- text->append( &c, 1 );
- }
- }
- }
- return p + strlen( endTag );
- }
- void TiXmlDocument::StreamIn( std::istream* in, std::string* tag )
- {
- // The basic issue with a document is that we don't know what we're
- // streaming. Read something presumed to be a tag (and hope), then
- // identify it, and call the appropriate stream method on the tag.
- //
- // This "pre-streaming" will never read the closing ">" so the
- // sub-tag can orient itself.
- if ( !StreamTo( in, '<', tag ) )
- {
- SetError( TIXML_ERROR_PARSING_EMPTY );
- return;
- }
- while ( in->good() )
- {
- int tagIndex = tag->length();
- while ( in->good() && in->peek() != '>' )
- {
- int c = in->get();
- (*tag) += (char) c;
- }
- if ( in->good() )
- {
- // We now have something we presume to be a node of
- // some sort. Identify it, and call the node to
- // continue streaming.
- TiXmlNode* node = Identify( tag->c_str() + tagIndex );
- if ( node )
- {
- node->StreamIn( in, tag );
- bool isElement = node->ToElement() != 0;
- delete node;
- node = 0;
- // If this is the root element, we're done. Parsing will be
- // done by the >> operator.
- if ( isElement )
- {
- return;
- }
- }
- else
- {
- SetError( TIXML_ERROR );
- return;
- }
- }
- }
- // We should have returned sooner.
- SetError( TIXML_ERROR );
- }
- const char* TiXmlDocument::Parse( const char* p )
- {
- // Parse away, at the document level. Since a document
- // contains nothing but other tags, most of what happens
- // here is skipping white space.
- //
- // In this variant (as opposed to stream and Parse) we
- // read everything we can.
- if ( !p || !*p || !( p = SkipWhiteSpace( p ) ) )
- {
- SetError( TIXML_ERROR_DOCUMENT_EMPTY );
- return false;
- }
-
- while ( p && *p )
- {
- TiXmlNode* node = Identify( p );
- if ( node )
- {
- p = node->Parse( p );
- LinkEndChild( node );
- }
- else
- {
- break;
- }
- p = SkipWhiteSpace( p );
- }
- // All is well.
- return p;
- }
- TiXmlNode* TiXmlNode::Identify( const char* p )
- {
- TiXmlNode* returnNode = 0;
- p = SkipWhiteSpace( p );
- if( !p || !*p || *p != '<' )
- {
- return 0;
- }
- TiXmlDocument* doc = GetDocument();
- p = SkipWhiteSpace( p );
- if ( !p || !*p )
- {
- return 0;
- }
- // What is this thing?
- // - Elements start with a letter or underscore, but xml is reserved.
- // - Comments: <!--
- // - Decleration: <?xml
- // - Everthing else is unknown to tinyxml.
- //
- const char* xmlHeader = { "<?xml" };
- const char* commentHeader = { "<!--" };
- if ( StringEqual( p, xmlHeader, true ) )
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG( "XML parsing Declaration\n" );
- #endif
- returnNode = new TiXmlDeclaration();
- }
- else if ( isalpha( *(p+1) )
- || *(p+1) == '_' )
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG( "XML parsing Element\n" );
- #endif
- returnNode = new TiXmlElement( "" );
- }
- else if ( StringEqual( p, commentHeader, false ) )
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG( "XML parsing Comment\n" );
- #endif
- returnNode = new TiXmlComment();
- }
- else
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG( "XML parsing Unknown\n" );
- #endif
- returnNode = new TiXmlUnknown();
- }
- if ( returnNode )
- {
- // Set the parent, so it can report errors
- returnNode->parent = this;
- //p = returnNode->Parse( p );
- }
- else
- {
- if ( doc )
- doc->SetError( TIXML_ERROR_OUT_OF_MEMORY );
- }
- return returnNode;
- }
- void TiXmlElement::StreamIn( std::istream* in, std::string* tag )
- {
- // We're called with some amount of pre-parsing. That is, some of "this"
- // element is in "tag". Go ahead and stream to the closing ">"
- while( in->good() )
- {
- int c = in->get();
- (*tag) += (char) c ;
-
- if ( c == '>' )
- break;
- }
- if ( tag->length() < 3 ) return;
- // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
- // If not, identify and stream.
- if ( tag->at( tag->length() - 1 ) == '>'
- && tag->at( tag->length() - 2 ) == '/' )
- {
- // All good!
- return;
- }
- else if ( tag->at( tag->length() - 1 ) == '>' )
- {
- // There is more. Could be:
- // text
- // closing tag
- // another node.
- for ( ;; )
- {
- StreamWhiteSpace( in, tag );
- // Do we have text?
- if ( in->peek() != '<' )
- {
- // Yep, text.
- TiXmlText text( "" );
- text.StreamIn( in, tag );
- // What follows text is a closing tag or another node.
- // Go around again and figure it out.
- continue;
- }
- // We now have either a closing tag...or another node.
- // We should be at a "<", regardless.
- if ( !in->good() ) return;
- assert( in->peek() == '<' );
- int tagIndex = tag->length();
- bool closingTag = false;
- bool firstCharFound = false;
- for( ;; )
- {
- if ( !in->good() )
- return;
- int c = in->peek();
-
- if ( c == '>' )
- break;
- *tag += c;
- in->get();
- if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
- {
- firstCharFound = true;
- if ( c == '/' )
- closingTag = true;
- }
- }
- // If it was a closing tag, then read in the closing '>' to clean up the input stream.
- // If it was not, the streaming will be done by the tag.
- if ( closingTag )
- {
- int c = in->get();
- assert( c == '>' );
- *tag += c;
- // We are done, once we've found our closing tag.
- return;
- }
- else
- {
- // If not a closing tag, id it, and stream.
- const char* tagloc = tag->c_str() + tagIndex;
- TiXmlNode* node = Identify( tagloc );
- if ( !node )
- return;
- node->StreamIn( in, tag );
- delete node;
- node = 0;
- // No return: go around from the beginning: text, closing tag, or node.
- }
- }
- }
- }
- const char* TiXmlElement::Parse( const char* p )
- {
- p = SkipWhiteSpace( p );
- TiXmlDocument* document = GetDocument();
- if ( !p || !*p || *p != '<' )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT );
- return false;
- }
- p = SkipWhiteSpace( p+1 );
- // Read the name.
- p = ReadName( p, &value );
- if ( !p || !*p )
- {
- if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME );
- return false;
- }
- string endTag = "</";
- endTag += value;
- endTag += ">";
- // Check for and read attributes. Also look for an empty
- // tag or an end tag.
- while ( p && *p )
- {
- p = SkipWhiteSpace( p );
- if ( !p || !*p )
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
- return 0;
- }
- if ( *p == '/' )
- {
- ++p;
- // Empty tag.
- if ( *p != '>' )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY );
- return 0;
- }
- return (p+1);
- }
- else if ( *p == '>' )
- {
- // Done with attributes (if there were any.)
- // Read the value -- which can include other
- // elements -- read the end tag, and return.
- ++p;
- p = ReadValue( p ); // Note this is an Element method, and will set the error if one happens.
- if ( !p || !*p )
- return 0;
- // We should find the end tag now
- if ( StringEqual( p, endTag.c_str(), false ) )
- {
- p += endTag.length();
- return p;
- }
- else
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG );
- return 0;
- }
- }
- else
- {
- // Try to read an element:
- TiXmlAttribute attrib;
- attrib.SetDocument( document );
- p = attrib.Parse( p );
- if ( !p || !*p )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT );
- return 0;
- }
- SetAttribute( attrib.Name(), attrib.Value() );
- }
- }
- return p;
- }
- const char* TiXmlElement::ReadValue( const char* p )
- {
- TiXmlDocument* document = GetDocument();
- // Read in text and elements in any order.
- p = SkipWhiteSpace( p );
- while ( p && *p )
- {
- // string text;
- // while ( p && *p && *p != '<' )
- // {
- // text += (*p);
- // ++p;
- // }
- //
- // p = SkipWhiteSpace( p );
- if ( *p != '<' )
- {
- // Take what we have, make a text element.
- TiXmlText* textNode = new TiXmlText( "" );
- if ( !textNode )
- {
- if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY );
- return 0;
- }
- p = textNode->Parse( p );
- if ( !textNode->Blank() )
- LinkEndChild( textNode );
- else
- delete textNode;
- }
- else
- {
- // We hit a '<'
- // Have we hit a new element or an end tag?
- if ( StringEqual( p, "</", false ) )
- {
- return p;
- }
- else
- {
- TiXmlNode* node = Identify( p );
- if ( node )
- {
- p = node->Parse( p );
- LinkEndChild( node );
- }
- else
- {
- return 0;
- }
- }
- }
- p = SkipWhiteSpace( p );
- }
- if ( !p )
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE );
- }
- return p;
- }
- void TiXmlUnknown::StreamIn( std::istream* in, std::string* tag )
- {
- while ( in->good() )
- {
- int c = in->get();
- (*tag) += c;
- if ( c == '>' )
- {
- // All is well.
- return;
- }
- }
- }
- const char* TiXmlUnknown::Parse( const char* p )
- {
- TiXmlDocument* document = GetDocument();
- p = SkipWhiteSpace( p );
- if ( !p || !*p || *p != '<' )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN );
- return 0;
- }
- ++p;
- value = "";
- while ( p && *p && *p != '>' )
- {
- value += *p;
- ++p;
- }
- if ( !p )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN );
- }
- if ( *p == '>' )
- return p+1;
- return p;
- }
- void TiXmlComment::StreamIn( std::istream* in, std::string* tag )
- {
- while ( in->good() )
- {
- int c = in->get();
- (*tag) += c;
- if ( c == '>'
- && tag->at( tag->length() - 2 ) == '-'
- && tag->at( tag->length() - 3 ) == '-' )
- {
- // All is well.
- return;
- }
- }
- }
- const char* TiXmlComment::Parse( const char* p )
- {
- TiXmlDocument* document = GetDocument();
- value = "";
- p = SkipWhiteSpace( p );
- const char* startTag = "<!--";
- const char* endTag = "-->";
- if ( !StringEqual( p, startTag, false ) )
- {
- document->SetError( TIXML_ERROR_PARSING_COMMENT );
- return 0;
- }
- p += strlen( startTag );
- p = ReadText( p, &value, false, endTag, false );
- return p;
- }
- const char* TiXmlAttribute::Parse( const char* p )
- {
- p = SkipWhiteSpace( p );
- if ( !p || !*p ) return 0;
- // Read the name, the '=' and the value.
- p = ReadName( p, &name );
- if ( !p || !*p )
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
- return 0;
- }
- p = SkipWhiteSpace( p );
- if ( !p || !*p || *p != '=' )
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
- return 0;
- }
- ++p; // skip '='
- p = SkipWhiteSpace( p );
- if ( !p || !*p )
- {
- if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
- return 0;
- }
-
- const char* end;
- if ( *p == '\'' )
- {
- ++p;
- end = "\'";
- p = ReadText( p, &value, false, end, false );
- }
- else if ( *p == '"' )
- {
- ++p;
- end = "\"";
- p = ReadText( p, &value, false, end, false );
- }
- else
- {
- // All attribute values should be in single or double quotes.
- // But this is such a common error that the parser will try
- // its best, even without them.
- value = "";
- while ( p && *p // existence
- && !isspace( *p ) && *p != '\n' && *p != '\r' // whitespace
- && *p != '/' && *p != '>' ) // tag end
- {
- value += *p;
- ++p;
- }
- }
- return p;
- }
- void TiXmlText::StreamIn( std::istream* in, std::string* tag )
- {
- while ( in->good() )
- {
- int c = in->peek();
- if ( c == '<' )
- return;
- (*tag) += c;
- in->get();
- }
- }
- const char* TiXmlText::Parse( const char* p )
- {
- value = "";
- //TiXmlDocument* doc = GetDocument();
- bool ignoreWhite = true;
- // if ( doc && !doc->IgnoreWhiteSpace() ) ignoreWhite = false;
- const char* end = "<";
- p = ReadText( p, &value, ignoreWhite, end, false );
- if ( p )
- return p-1; // don't truncate the '<'
- return 0;
- }
- void TiXmlDeclaration::StreamIn( std::istream* in, std::string* tag )
- {
- while ( in->good() )
- {
- int c = in->get();
- (*tag) += c;
- if ( c == '>' )
- {
- // All is well.
- return;
- }
- }
- }
- const char* TiXmlDeclaration::Parse( const char* p )
- {
- p = SkipWhiteSpace( p );
- // Find the beginning, find the end, and look for
- // the stuff in-between.
- TiXmlDocument* document = GetDocument();
- if ( !p || !*p || !StringEqual( p, "<?xml", true ) )
- {
- if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION );
- return 0;
- }
- p += 5;
- // const char* start = p+5;
- // const char* end = strstr( start, "?>" );
- version = "";
- encoding = "";
- standalone = "";
- while ( p && *p )
- {
- if ( *p == '>' )
- {
- ++p;
- return p;
- }
- p = SkipWhiteSpace( p );
- if ( StringEqual( p, "version", true ) )
- {
- // p += 7;
- TiXmlAttribute attrib;
- p = attrib.Parse( p );
- version = attrib.Value();
- }
- else if ( StringEqual( p, "encoding", true ) )
- {
- // p += 8;
- TiXmlAttribute attrib;
- p = attrib.Parse( p );
- encoding = attrib.Value();
- }
- else if ( StringEqual( p, "standalone", true ) )
- {
- // p += 10;
- TiXmlAttribute attrib;
- p = attrib.Parse( p );
- standalone = attrib.Value();
- }
- else
- {
- // Read over whatever it is.
- while( p && *p && *p != '>' && !isspace( *p ) )
- ++p;
- }
- }
- return 0;
- }
- bool TiXmlText::Blank() const
- {
- for ( unsigned i=0; i<value.size(); i++ )
- if ( !isspace( value[i] ) )
- return false;
- return true;
- }
|