/*
* Copyright (C) 2008 Apple Inc. All Rights Reserved.
* Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
* Copyright (C) 2010 Google, Inc. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ios/third_party/blink/src/html_tokenizer.h"
#include "html_markup_tokenizer_inlines.h"
namespace WebCore {
#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
HTMLTokenizer::HTMLTokenizer()
: m_state(HTMLTokenizer::DataState)
, m_token(nullptr)
, m_additionalAllowedCharacter('\0')
, m_inputStreamPreprocessor(this)
{
}
HTMLTokenizer::~HTMLTokenizer()
{
}
void HTMLTokenizer::reset()
{
m_state = HTMLTokenizer::DataState;
m_token = 0;
m_additionalAllowedCharacter = '\0';
}
bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
{
ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
source.next();
if (m_token->type() == HTMLToken::Character)
return true;
return false;
}
#define FLUSH_AND_ADVANCE_TO(stateName) \
do { \
m_state = HTMLTokenizer::stateName; \
if (flushBufferedEndTag(source)) \
return true; \
if (source.isEmpty() \
|| !m_inputStreamPreprocessor.peek(source)) \
return haveBufferedCharacterToken(); \
cc = m_inputStreamPreprocessor.nextInputCharacter(); \
goto stateName; \
} while (false)
bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
{
// If we have a token in progress, then we're supposed to be called back
// with the same token so we can finish it.
ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
m_token = &token;
if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
// Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
switch (m_state) {
HTML_BEGIN_STATE(DataState) {
if (cc == '<') {
if (m_token->type() == HTMLToken::Character) {
// We have a bunch of character tokens queued up that we
// are emitting lazily here.
return true;
}
HTML_ADVANCE_TO(TagOpenState);
} else if (cc == kEndOfFileMarker)
return emitEndOfFile(source);
else {
m_token->ensureIsCharacterToken();
HTML_ADVANCE_TO(DataState);
}
}
END_STATE()
HTML_BEGIN_STATE(TagOpenState) {
if (cc == '!')
HTML_ADVANCE_TO(MarkupDeclarationOpenState);
else if (cc == '/')
HTML_ADVANCE_TO(EndTagOpenState);
else if (isASCIIUpper(cc)) {
m_token->beginStartTag(toLowerCase(cc));
HTML_ADVANCE_TO(TagNameState);
} else if (isASCIILower(cc)) {
m_token->beginStartTag(cc);
HTML_ADVANCE_TO(TagNameState);
} else if (cc == '?') {
parseError();
// The spec consumes the current character before switching
// to the bogus comment state, but it's easier to implement
// if we reconsume the current character.
HTML_RECONSUME_IN(BogusCommentState);
} else {
parseError();
m_token->ensureIsCharacterToken();
HTML_RECONSUME_IN(DataState);
}
}
END_STATE()
HTML_BEGIN_STATE(EndTagOpenState) {
if (isASCIIUpper(cc)) {
m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
HTML_ADVANCE_TO(TagNameState);
} else if (isASCIILower(cc)) {
m_token->beginEndTag(static_cast<LChar>(cc));
HTML_ADVANCE_TO(TagNameState);
} else if (cc == '>') {
parseError();
HTML_ADVANCE_TO(DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
m_token->ensureIsCharacterToken();
HTML_RECONSUME_IN(DataState);
} else {
parseError();
HTML_RECONSUME_IN(BogusCommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(TagNameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
HTML_ADVANCE_TO(TagNameState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
m_token->appendToName(cc);
HTML_ADVANCE_TO(TagNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(BeforeAttributeNameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (isASCIIUpper(cc)) {
HTML_ADVANCE_TO(AttributeNameState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
HTML_ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(AttributeNameState) {
if (isTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(AfterAttributeNameState);
} else if (cc == '/') {
HTML_ADVANCE_TO(SelfClosingStartTagState);
} else if (cc == '=') {
HTML_ADVANCE_TO(BeforeAttributeValueState);
} else if (cc == '>') {
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (isASCIIUpper(cc)) {
HTML_ADVANCE_TO(AttributeNameState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
HTML_ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterAttributeNameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(AfterAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '=')
HTML_ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (isASCIIUpper(cc)) {
HTML_ADVANCE_TO(AttributeNameState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<')
parseError();
HTML_ADVANCE_TO(AttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(BeforeAttributeValueState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeAttributeValueState);
else if (cc == '"') {
HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
} else if (cc == '&') {
HTML_RECONSUME_IN(AttributeValueUnquotedState);
} else if (cc == '\'') {
HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
if (cc == '<' || cc == '=' || cc == '`')
parseError();
HTML_ADVANCE_TO(AttributeValueUnquotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
if (cc == '"') {
HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
if (cc == '\'') {
HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AttributeValueUnquotedState) {
if (isTokenizerWhitespace(cc)) {
HTML_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '>') {
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
parseError();
HTML_ADVANCE_TO(AttributeValueUnquotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
parseError();
HTML_RECONSUME_IN(BeforeAttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(SelfClosingStartTagState) {
if (cc == '>') {
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
parseError();
HTML_RECONSUME_IN(BeforeAttributeNameState);
}
}
END_STATE()
HTML_BEGIN_STATE(BogusCommentState) {
m_token->beginComment();
HTML_RECONSUME_IN(ContinueBogusCommentState);
}
END_STATE()
HTML_BEGIN_STATE(ContinueBogusCommentState) {
if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker)
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
else {
HTML_ADVANCE_TO(ContinueBogusCommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
if (cc == '-') {
if (source.startsWith(dashDashString, dashDashStringLength)) {
advanceAndASSERT(source, '-');
advanceAndASSERT(source, '-');
m_token->beginComment();
HTML_SWITCH_TO(CommentStartState);
} else if (source.remainingBytes() < dashDashStringLength)
return haveBufferedCharacterToken();
} else if (cc == 'D' || cc == 'd') {
if (source.startsWith(doctypeString, doctypeStringLength, true)) {
advanceStringAndASSERTIgnoringCase(source, doctypeString);
HTML_SWITCH_TO(DOCTYPEState);
} else if (source.remainingBytes() < doctypeStringLength)
return haveBufferedCharacterToken();
}
parseError();
HTML_RECONSUME_IN(BogusCommentState);
}
END_STATE()
HTML_BEGIN_STATE(CommentStartState) {
if (cc == '-')
HTML_ADVANCE_TO(CommentStartDashState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentStartDashState) {
if (cc == '-')
HTML_ADVANCE_TO(CommentEndState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentState) {
if (cc == '-')
HTML_ADVANCE_TO(CommentEndDashState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentEndDashState) {
if (cc == '-')
HTML_ADVANCE_TO(CommentEndState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentEndState) {
if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == '!') {
parseError();
HTML_ADVANCE_TO(CommentEndBangState);
} else if (cc == '-') {
parseError();
HTML_ADVANCE_TO(CommentEndState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(CommentEndBangState) {
if (cc == '-') {
HTML_ADVANCE_TO(CommentEndDashState);
} else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(CommentState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPEState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPENameState);
else if (cc == kEndOfFileMarker) {
parseError();
m_token->beginDOCTYPE();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_RECONSUME_IN(BeforeDOCTYPENameState);
}
}
END_STATE()
HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPENameState);
else if (cc == '>') {
parseError();
m_token->beginDOCTYPE();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
m_token->beginDOCTYPE();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
m_token->beginDOCTYPE();
HTML_ADVANCE_TO(DOCTYPENameState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPENameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(AfterDOCTYPENameState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(DOCTYPENameState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterDOCTYPENameState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(AfterDOCTYPENameState);
if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
DEFINE_STATIC_LOCAL_STRING(publicString, "public");
DEFINE_STATIC_LOCAL_STRING(systemString, "system");
if (cc == 'P' || cc == 'p') {
if (source.startsWith(publicString, publicStringLength, true)) {
advanceStringAndASSERTIgnoringCase(source, publicString);
HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
} else if (source.remainingBytes() < publicStringLength)
return haveBufferedCharacterToken();
} else if (cc == 'S' || cc == 's') {
if (source.startsWith(systemString, systemStringLength, true)) {
advanceStringAndASSERTIgnoringCase(source, systemString);
HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
} else if (source.remainingBytes() < systemStringLength)
return haveBufferedCharacterToken();
}
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else if (cc == '"') {
parseError();
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
else if (cc == '"') {
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
} else if (cc == '\'') {
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
if (cc == '"')
HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
if (cc == '\'')
HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == '"') {
parseError();
HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == '"') {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
else if (cc == '"') {
parseError();
HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
parseError();
HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
if (cc == '"') {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
} else if (cc == '\'') {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
} else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
if (cc == '"')
HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
if (cc == '\'')
HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>') {
parseError();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
}
}
END_STATE()
HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
else if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker) {
parseError();
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
} else {
parseError();
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
}
END_STATE()
HTML_BEGIN_STATE(BogusDOCTYPEState) {
if (cc == '>')
return emitAndResumeIn(source, HTMLTokenizer::DataState);
else if (cc == kEndOfFileMarker)
return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
HTML_ADVANCE_TO(BogusDOCTYPEState);
}
END_STATE()
HTML_BEGIN_STATE(CDATASectionState) {
if (cc == ']')
HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
else if (cc == kEndOfFileMarker)
HTML_RECONSUME_IN(DataState);
else {
m_token->ensureIsCharacterToken();
HTML_ADVANCE_TO(CDATASectionState);
}
}
END_STATE()
HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
if (cc == ']')
HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
else {
m_token->ensureIsCharacterToken();
HTML_RECONSUME_IN(CDATASectionState);
}
}
END_STATE()
HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
if (cc == '>')
HTML_ADVANCE_TO(DataState);
else {
m_token->ensureIsCharacterToken();
HTML_RECONSUME_IN(CDATASectionState);
}
}
END_STATE()
}
ASSERT_NOT_REACHED();
return false;
}
inline void HTMLTokenizer::parseError()
{
notImplemented();
}
}