chromium/ios/third_party/blink/src/html_tokenizer.mm

/*
 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "ios/third_party/blink/src/html_tokenizer.h"

#include "html_markup_tokenizer_inlines.h"

namespace WebCore {

#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)

HTMLTokenizer::HTMLTokenizer()
    : m_state(HTMLTokenizer::DataState)
    , m_token(nullptr)
    , m_additionalAllowedCharacter('\0')
    , m_inputStreamPreprocessor(this)
{
}

HTMLTokenizer::~HTMLTokenizer()
{
}

void HTMLTokenizer::reset()
{
    m_state = HTMLTokenizer::DataState;
    m_token = 0;
    m_additionalAllowedCharacter = '\0';
}

bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
{
    ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
    source.next();
    if (m_token->type() == HTMLToken::Character)
        return true;

    return false;
}

#define FLUSH_AND_ADVANCE_TO(stateName)                                    \
    do {                                                                   \
        m_state = HTMLTokenizer::stateName;                           \
        if (flushBufferedEndTag(source))                                   \
            return true;                                                   \
        if (source.isEmpty()                                               \
            || !m_inputStreamPreprocessor.peek(source))                    \
            return haveBufferedCharacterToken();                           \
        cc = m_inputStreamPreprocessor.nextInputCharacter();               \
        goto stateName;                                                    \
    } while (false)

bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
{
    // If we have a token in progress, then we're supposed to be called back
    // with the same token so we can finish it.
    ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
    m_token = &token;

    if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
        return haveBufferedCharacterToken();
    UChar cc = m_inputStreamPreprocessor.nextInputCharacter();

    // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
    switch (m_state) {
    HTML_BEGIN_STATE(DataState) {
        if (cc == '<') {
            if (m_token->type() == HTMLToken::Character) {
                // We have a bunch of character tokens queued up that we
                // are emitting lazily here.
                return true;
            }
            HTML_ADVANCE_TO(TagOpenState);
        } else if (cc == kEndOfFileMarker)
            return emitEndOfFile(source);
        else {
            m_token->ensureIsCharacterToken();
            HTML_ADVANCE_TO(DataState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(TagOpenState) {
        if (cc == '!')
            HTML_ADVANCE_TO(MarkupDeclarationOpenState);
        else if (cc == '/')
            HTML_ADVANCE_TO(EndTagOpenState);
        else if (isASCIIUpper(cc)) {
            m_token->beginStartTag(toLowerCase(cc));
            HTML_ADVANCE_TO(TagNameState);
        } else if (isASCIILower(cc)) {
            m_token->beginStartTag(cc);
            HTML_ADVANCE_TO(TagNameState);
        } else if (cc == '?') {
            parseError();
            // The spec consumes the current character before switching
            // to the bogus comment state, but it's easier to implement
            // if we reconsume the current character.
            HTML_RECONSUME_IN(BogusCommentState);
        } else {
            parseError();
            m_token->ensureIsCharacterToken();
            HTML_RECONSUME_IN(DataState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(EndTagOpenState) {
        if (isASCIIUpper(cc)) {
            m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
            HTML_ADVANCE_TO(TagNameState);
        } else if (isASCIILower(cc)) {
            m_token->beginEndTag(static_cast<LChar>(cc));
            HTML_ADVANCE_TO(TagNameState);
        } else if (cc == '>') {
            parseError();
            HTML_ADVANCE_TO(DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            m_token->ensureIsCharacterToken();
            HTML_RECONSUME_IN(DataState);
        } else {
            parseError();
            HTML_RECONSUME_IN(BogusCommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(TagNameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeAttributeNameState);
        else if (cc == '/')
            HTML_ADVANCE_TO(SelfClosingStartTagState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (isASCIIUpper(cc)) {
            m_token->appendToName(toLowerCase(cc));
            HTML_ADVANCE_TO(TagNameState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            m_token->appendToName(cc);
            HTML_ADVANCE_TO(TagNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BeforeAttributeNameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeAttributeNameState);
        else if (cc == '/')
            HTML_ADVANCE_TO(SelfClosingStartTagState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (isASCIIUpper(cc)) {
            HTML_ADVANCE_TO(AttributeNameState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
                parseError();
            HTML_ADVANCE_TO(AttributeNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AttributeNameState) {
        if (isTokenizerWhitespace(cc)) {
            HTML_ADVANCE_TO(AfterAttributeNameState);
        } else if (cc == '/') {
            HTML_ADVANCE_TO(SelfClosingStartTagState);
        } else if (cc == '=') {
            HTML_ADVANCE_TO(BeforeAttributeValueState);
        } else if (cc == '>') {
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (isASCIIUpper(cc)) {
            HTML_ADVANCE_TO(AttributeNameState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
                parseError();
            HTML_ADVANCE_TO(AttributeNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterAttributeNameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(AfterAttributeNameState);
        else if (cc == '/')
            HTML_ADVANCE_TO(SelfClosingStartTagState);
        else if (cc == '=')
            HTML_ADVANCE_TO(BeforeAttributeValueState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (isASCIIUpper(cc)) {
            HTML_ADVANCE_TO(AttributeNameState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            if (cc == '"' || cc == '\'' || cc == '<')
                parseError();
            HTML_ADVANCE_TO(AttributeNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BeforeAttributeValueState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeAttributeValueState);
        else if (cc == '"') {
            HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
        } else if (cc == '&') {
            HTML_RECONSUME_IN(AttributeValueUnquotedState);
        } else if (cc == '\'') {
            HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
        } else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            if (cc == '<' || cc == '=' || cc == '`')
                parseError();
            HTML_ADVANCE_TO(AttributeValueUnquotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
        if (cc == '"') {
            HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
        if (cc == '\'') {
            HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AttributeValueUnquotedState) {
        if (isTokenizerWhitespace(cc)) {
            HTML_ADVANCE_TO(BeforeAttributeNameState);
        } else if (cc == '>') {
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
                parseError();
            HTML_ADVANCE_TO(AttributeValueUnquotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeAttributeNameState);
        else if (cc == '/')
            HTML_ADVANCE_TO(SelfClosingStartTagState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            parseError();
            HTML_RECONSUME_IN(BeforeAttributeNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(SelfClosingStartTagState) {
        if (cc == '>') {
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            HTML_RECONSUME_IN(DataState);
        } else {
            parseError();
            HTML_RECONSUME_IN(BeforeAttributeNameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BogusCommentState) {
        m_token->beginComment();
        HTML_RECONSUME_IN(ContinueBogusCommentState);
    }
    END_STATE()

    HTML_BEGIN_STATE(ContinueBogusCommentState) {
        if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker)
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        else {
            HTML_ADVANCE_TO(ContinueBogusCommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
        DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
        DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
        if (cc == '-') {
            if (source.startsWith(dashDashString, dashDashStringLength)) {
                advanceAndASSERT(source, '-');
                advanceAndASSERT(source, '-');
                m_token->beginComment();
                HTML_SWITCH_TO(CommentStartState);
            } else if (source.remainingBytes() < dashDashStringLength)
                return haveBufferedCharacterToken();
        } else if (cc == 'D' || cc == 'd') {
            if (source.startsWith(doctypeString, doctypeStringLength, true)) {
                advanceStringAndASSERTIgnoringCase(source, doctypeString);
                HTML_SWITCH_TO(DOCTYPEState);
            } else if (source.remainingBytes() < doctypeStringLength)
                return haveBufferedCharacterToken();
        }
        parseError();
        HTML_RECONSUME_IN(BogusCommentState);
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentStartState) {
        if (cc == '-')
            HTML_ADVANCE_TO(CommentStartDashState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentStartDashState) {
        if (cc == '-')
            HTML_ADVANCE_TO(CommentEndState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentState) {
        if (cc == '-')
            HTML_ADVANCE_TO(CommentEndDashState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentEndDashState) {
        if (cc == '-')
            HTML_ADVANCE_TO(CommentEndState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentEndState) {
        if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == '!') {
            parseError();
            HTML_ADVANCE_TO(CommentEndBangState);
        } else if (cc == '-') {
            parseError();
            HTML_ADVANCE_TO(CommentEndState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CommentEndBangState) {
        if (cc == '-') {
            HTML_ADVANCE_TO(CommentEndDashState);
        } else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(CommentState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPEState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPENameState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            m_token->beginDOCTYPE();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_RECONSUME_IN(BeforeDOCTYPENameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPENameState);
        else if (cc == '>') {
            parseError();
            m_token->beginDOCTYPE();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            m_token->beginDOCTYPE();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            m_token->beginDOCTYPE();
            HTML_ADVANCE_TO(DOCTYPENameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPENameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(AfterDOCTYPENameState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(DOCTYPENameState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterDOCTYPENameState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(AfterDOCTYPENameState);
        if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            DEFINE_STATIC_LOCAL_STRING(publicString, "public");
            DEFINE_STATIC_LOCAL_STRING(systemString, "system");
            if (cc == 'P' || cc == 'p') {
                if (source.startsWith(publicString, publicStringLength, true)) {
                    advanceStringAndASSERTIgnoringCase(source, publicString);
                    HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
                } else if (source.remainingBytes() < publicStringLength)
                    return haveBufferedCharacterToken();
            } else if (cc == 'S' || cc == 's') {
                if (source.startsWith(systemString, systemStringLength, true)) {
                    advanceStringAndASSERTIgnoringCase(source, systemString);
                    HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
                } else if (source.remainingBytes() < systemStringLength)
                    return haveBufferedCharacterToken();
            }
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
        else if (cc == '"') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
        } else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
        else if (cc == '"') {
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
        } else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
        if (cc == '"')
            HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
        if (cc == '\'')
            HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == '"') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == '"') {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
        else if (cc == '"') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            parseError();
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
        } else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
        if (cc == '"') {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
        } else if (cc == '\'') {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
        } else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
        if (cc == '"')
            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
        if (cc == '\'')
            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
        else if (cc == '>') {
            parseError();
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        } else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
        if (isTokenizerWhitespace(cc))
            HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
        else if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker) {
            parseError();
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        } else {
            parseError();
            HTML_ADVANCE_TO(BogusDOCTYPEState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(BogusDOCTYPEState) {
        if (cc == '>')
            return emitAndResumeIn(source, HTMLTokenizer::DataState);
        else if (cc == kEndOfFileMarker)
            return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
        HTML_ADVANCE_TO(BogusDOCTYPEState);
    }
    END_STATE()

    HTML_BEGIN_STATE(CDATASectionState) {
        if (cc == ']')
            HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
        else if (cc == kEndOfFileMarker)
            HTML_RECONSUME_IN(DataState);
        else {
            m_token->ensureIsCharacterToken();
            HTML_ADVANCE_TO(CDATASectionState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
        if (cc == ']')
            HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
        else {
            m_token->ensureIsCharacterToken();
            HTML_RECONSUME_IN(CDATASectionState);
        }
    }
    END_STATE()

    HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
        if (cc == '>')
            HTML_ADVANCE_TO(DataState);
        else {
            m_token->ensureIsCharacterToken();
            HTML_RECONSUME_IN(CDATASectionState);
        }
    }
    END_STATE()

    }

    ASSERT_NOT_REACHED();
    return false;
}

inline void HTMLTokenizer::parseError()
{
    notImplemented();
}

}