/* * Copyright (C) 2009 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // How we handle the base tag better. // Current status: // At now the normal way we use to handling base tag is // a) For those links which have corresponding local saved files, such as // savable CSS, JavaScript files, they will be written to relative URLs which // point to local saved file. Why those links can not be resolved as absolute // file URLs, because if they are resolved as absolute URLs, after moving the // file location from one directory to another directory, the file URLs will // be dead links. // b) For those links which have not corresponding local saved files, such as // links in A, AREA tags, they will be resolved as absolute URLs. // c) We comment all base tags when serialzing DOM for the page. // FireFox also uses above way to handle base tag. // // Problem: // This way can not handle the following situation: // the base tag is written by JavaScript. // For example. The page "www.yahoo.com" use // "document.write('<base href="http://www.yahoo.com/"...');" to setup base URL // of page when loading page. So when saving page as completed-HTML, we assume // that we save "www.yahoo.com" to "c:\yahoo.htm". After then we load the saved // completed-HTML page, then the JavaScript will insert a base tag // <base href="http://www.yahoo.com/"...> to DOM, so all URLs which point to // local saved resource files will be resolved as // "http://www.yahoo.com/yahoo_files/...", which will cause all saved resource // files can not be loaded correctly. Also the page will be rendered ugly since // all saved sub-resource files (such as CSS, JavaScript files) and sub-frame // files can not be fetched. // Now FireFox, IE and WebKit based Browser all have this problem. // // Solution: // My solution is that we comment old base tag and write new base tag: // <base href="." ...> after the previous commented base tag. In WebKit, it // always uses the latest "href" attribute of base tag to set document's base // URL. Based on this behavior, when we encounter a base tag, we comment it and // write a new base tag <base href="."> after the previous commented base tag. // The new added base tag can help engine to locate correct base URL for // correctly loading local saved resource files. Also I think we need to inherit // the base target value from document object when appending new base tag. // If there are multiple base tags in original document, we will comment all old // base tags and append new base tag after each old base tag because we do not // know those old base tags are original content or added by JavaScript. If // they are added by JavaScript, it means when loading saved page, the script(s) // will still insert base tag(s) to DOM, so the new added base tag(s) can // override the incorrect base URL and make sure we alway load correct local // saved resource files. #include "third_party/blink/renderer/core/frame/web_frame_serializer_impl.h" #include "third_party/blink/public/platform/web_vector.h" #include "third_party/blink/renderer/core/dom/document.h" #include "third_party/blink/renderer/core/dom/document_type.h" #include "third_party/blink/renderer/core/dom/element.h" #include "third_party/blink/renderer/core/editing/serializers/serialization.h" #include "third_party/blink/renderer/core/frame/frame_serializer.h" #include "third_party/blink/renderer/core/frame/web_local_frame_impl.h" #include "third_party/blink/renderer/core/html/forms/html_form_element.h" #include "third_party/blink/renderer/core/html/html_all_collection.h" #include "third_party/blink/renderer/core/html/html_base_element.h" #include "third_party/blink/renderer/core/html/html_document.h" #include "third_party/blink/renderer/core/html/html_element.h" #include "third_party/blink/renderer/core/html/html_frame_element_base.h" #include "third_party/blink/renderer/core/html/html_frame_owner_element.h" #include "third_party/blink/renderer/core/html/html_head_element.h" #include "third_party/blink/renderer/core/html/html_html_element.h" #include "third_party/blink/renderer/core/html/html_iframe_element.h" #include "third_party/blink/renderer/core/html/html_meta_element.h" #include "third_party/blink/renderer/core/html_names.h" #include "third_party/blink/renderer/core/loader/document_loader.h" #include "third_party/blink/renderer/core/loader/frame_loader.h" #include "third_party/blink/renderer/platform/wtf/text/text_encoding.h" namespace blink { namespace { // Generate the default base tag declaration. String GenerateBaseTagDeclaration(const String& base_target) { … } } // namespace // Maximum length of data buffer which is used to temporary save generated // html content data. This is a soft limit which might be passed if a very large // contegious string is found in the html document. static const unsigned kDataBufferCapacity = …; WebFrameSerializerImpl::SerializeDomParam::SerializeDomParam( const KURL& url, const WTF::TextEncoding& text_encoding, Document* document) : … { … } String WebFrameSerializerImpl::PreActionBeforeSerializeOpenTag( const Element* element, SerializeDomParam* param, bool* need_skip) { … } String WebFrameSerializerImpl::PostActionAfterSerializeOpenTag( const Element* element, SerializeDomParam* param) { … } String WebFrameSerializerImpl::PreActionBeforeSerializeEndTag( const Element* element, SerializeDomParam* param, bool* need_skip) { … } // After we finish serializing end tag of a element, we give the target // element a chance to do some post work to add some additional data. String WebFrameSerializerImpl::PostActionAfterSerializeEndTag( const Element* element, SerializeDomParam* param) { … } void WebFrameSerializerImpl::SaveHTMLContentToBuffer(const String& result, SerializeDomParam* param) { … } void WebFrameSerializerImpl::EncodeAndFlushBuffer( WebFrameSerializerClient::FrameSerializationStatus status, SerializeDomParam* param, FlushOption flush_option) { … } // TODO(yosin): We should utilize |MarkupFormatter| here to share code, // especially escaping attribute values, done by |WebEntities| |m_htmlEntities| // and |m_xmlEntities|. void WebFrameSerializerImpl::AppendAttribute(StringBuilder& result, bool is_html_document, const String& attr_name, const String& attr_value) { … } void WebFrameSerializerImpl::OpenTagToString(Element* element, SerializeDomParam* param) { … } // Serialize end tag of an specified element. void WebFrameSerializerImpl::EndTagToString(Element* element, SerializeDomParam* param) { … } void WebFrameSerializerImpl::ShadowRootTagToString(ShadowRoot* shadow_root, SerializeDomParam* param) { … } void WebFrameSerializerImpl::BuildContentForNode(Node* node, SerializeDomParam* param) { … } WebFrameSerializerImpl::WebFrameSerializerImpl( WebLocalFrame* frame, WebFrameSerializerClient* client, WebFrameSerializer::LinkRewritingDelegate* delegate, bool save_with_empty_url) : … { … } bool WebFrameSerializerImpl::Serialize() { … } } // namespace blink