// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Fuzzer for content/renderer
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <memory>
#include <random>
#include "base/json/json_reader.h"
#include "base/json/json_writer.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/notreached.h"
#include "base/values.h"
#include "content/test/fuzzer/fuzzer_support.h"
#include "testing/libfuzzer/libfuzzer_exports.h"
extern "C" size_t LLVMFuzzerMutate(uint8_t* Data, size_t Size, size_t MaxSize);
namespace content {
using Random = std::mt19937;
// Pick random element out of stream.
template <typename T>
class ReservoirSampler {
public:
explicit ReservoirSampler(Random* rnd) : rnd_(rnd), n_(0) {}
ReservoirSampler(Random* rnd, const T& t) : rnd_(rnd), t_(t), n_(0) {}
inline bool empty() const { return !n_; }
inline const T& get() const { return t_; }
inline void operator()(const T& t) {
n_++;
// take with 1/n probability.
if ((*rnd_)() % n_ == 0) {
t_ = t;
}
}
private:
const raw_ptr<Random> rnd_;
T t_;
int n_;
};
// Invoke libFuzzer's mutate on std::string.
static size_t MutateString(std::string* str, size_t maxLen) {
static std::vector<uint8_t> v;
v.resize(maxLen);
size_t inLen = std::min(maxLen, str->length());
memcpy(v.data(), str->data(), inLen);
size_t len =
LLVMFuzzerMutate(reinterpret_cast<uint8_t*>(v.data()), inLen, maxLen);
if (!len)
return 0;
*str = std::string(reinterpret_cast<char*>(v.data()), len);
return len;
}
class NodeList;
class Element;
class Text;
class Node {
public:
Node(const Node&) = delete;
Node& operator=(const Node&) = delete;
virtual ~Node() {}
static std::unique_ptr<Node> CreateRandom(Random* rnd);
virtual bool IsElement() const { return false; }
virtual bool IsText() const { return false; }
Element* AsElement() {
CHECK(IsElement());
return reinterpret_cast<Element*>(this);
}
Text* AsText() {
CHECK(IsText());
return reinterpret_cast<Text*>(this);
}
protected:
Node() {}
virtual base::Value ToJson() = 0;
virtual void ParseJson(const base::Value::Dict& dict) = 0;
virtual void WriteHtml(std::string* out) = 0;
private:
friend class NodeList;
static std::unique_ptr<Node> FromValue(const base::Value& value);
};
using Attrs = std::map<std::string, std::string>;
using AttrPosition = std::pair<Attrs*, Attrs::iterator>;
class NodeList : public std::vector<std::unique_ptr<Node>> {
public:
// Position inside the tree. Iterator could be end().
using NodePosition = std::pair<NodeList*, NodeList::iterator>;
NodeList() {}
NodeList(const NodeList&) = delete;
NodeList& operator=(const NodeList&) = delete;
static std::unique_ptr<NodeList> ParseJsonString(const uint8_t* data,
size_t size) {
auto nodes = std::make_unique<NodeList>();
std::optional<base::Value> value(base::JSONReader::Read(
std::string(reinterpret_cast<const char*>(data), size)));
if (value && value->is_list())
nodes->ParseJsonList(value->GetList());
return nodes;
}
base::Value ToJson() const {
base::Value::List result;
for (const auto& node : *this) {
result.Append(node->ToJson());
}
return base::Value(std::move(result));
}
void ToJsonString(std::string* out) const {
base::Value json = ToJson();
bool succ = base::JSONWriter::Write(json, out);
CHECK(succ);
}
void WriteHtml(std::string* out) const {
for (const auto& node : *this) {
node->WriteHtml(out);
}
}
// Traverses the tree an invokes fn on all NodePositions.
template <typename Fn>
void WalkTree(Fn fn);
template <typename FilterFn>
NodePosition PickRandomPos(Random* rnd, FilterFn filter);
AttrPosition PickRandomAttribute(Random* rnd);
void ParseJsonList(const base::Value::List& list) {
for (const auto& item : list) {
std::unique_ptr<Node> node(Node::FromValue(item));
if (node) {
push_back(std::move(node));
}
}
}
private:
friend class Element;
};
class Element : public Node {
public:
static std::unique_ptr<Node> CreateRandom(Random* rnd) {
static std::vector<std::string> tagNames;
if (tagNames.empty()) {
tagNames.insert(
tagNames.begin(),
{"a", "abbr", "address", "area", "article",
"aside", "audio", "b", "base", "bdi",
"bdo", "blockquote", "body", "br", "button",
"canvas", "caption", "cite", "code", "col",
"colgroup", "command", "datalist", "dd", "del",
"details", "dfn", "div", "dl", "dt",
"em", "embed", "fieldset", "figcaption", "figure",
"footer", "form", "h1", "h2", "h3",
"h4", "h5", "h6", "head", "header",
"hgroup", "hr", "html", "i", "iframe",
"img", "input", "ins", "kbd", "keygen",
"label", "legend", "li", "link", "map",
"mark", "menu", "meta", "meter", "nav",
"noscript", "object", "ol", "optgroup", "option",
"output", "p", "param", "pre", "progress",
"q", "rp", "rt", "ruby", "s",
"samp", "script", "section", "small", "source",
"span", "strong", "style", "sub", "summary",
"sup", "table", "tbody", "td", "textarea",
"tfoot", "th", "thead", "time", "title",
"tr", "track", "u", "ul", "var",
"video", "wbr"});
}
return std::unique_ptr<Element>(
new Element(tagNames[(*rnd)() % tagNames.size()]));
}
Element(const Element&) = delete;
Element& operator=(const Element&) = delete;
bool IsElement() const override { return true; }
void SetAttr(const std::string& attr, const std::string& value) {
attrs_[attr] = value;
}
protected:
void WriteHtml(std::string* out) override {
*out += "<" + tag_name_;
if (!attrs_.empty()) {
for (auto p : attrs_) {
*out += " ";
*out += p.first;
*out += "='";
*out += p.second;
*out += "'";
}
}
*out += ">";
children_.WriteHtml(out);
*out += "</" + tag_name_ + ">";
}
base::Value ToJson() override {
base::Value::Dict dict;
dict.Set("e", tag_name_);
if (!children_.empty())
dict.Set("c", children_.ToJson());
if (!attrs_.empty()) {
base::Value::Dict attrs_dict;
for (const auto& pair : attrs_) {
attrs_dict.Set(pair.first, pair.second);
}
dict.Set("a", std::move(attrs_dict));
}
return base::Value(std::move(dict));
}
protected:
void ParseJson(const base::Value::Dict& dict) override {
const base::Value* e_value = dict.Find("e");
CHECK(e_value);
const std::string* e_str = e_value->GetIfString();
if (e_str) {
tag_name_ = *e_str;
}
const base::Value::List* c_list = dict.FindList("c");
if (c_list)
children_.ParseJsonList(*c_list);
const base::Value::Dict* a_dict = dict.FindDict("a");
if (a_dict) {
for (const auto item : *a_dict) {
if (item.second.is_string())
attrs_[item.first] = item.second.GetString();
}
}
}
private:
friend class Node;
friend class NodeList;
Element() : tag_name_("__invalid__") {}
explicit Element(const std::string& tag_name) : tag_name_(tag_name) {}
std::string tag_name_;
NodeList children_;
Attrs attrs_;
};
class Text : public Node {
public:
static std::unique_ptr<Node> CreateRandom(Random* rnd) {
return std::unique_ptr<Node>(new Text);
}
Text(const Text&) = delete;
Text& operator=(const Text&) = delete;
size_t MutateText() {
// TODO(aizatsky): constant?
return MutateString(&text_, 32);
}
protected:
void WriteHtml(std::string* out) override { *out += text_; }
base::Value ToJson() override {
base::Value::Dict result;
result.Set("t", text_);
return base::Value(std::move(result));
}
void ParseJson(const base::Value::Dict& dict) override {
const base::Value* t_value = dict.Find("t");
CHECK(t_value);
const std::string* t_str = t_value->GetIfString();
if (t_str) {
text_ = *t_str;
}
}
bool IsText() const override { return true; }
private:
friend class Node;
Text() {}
explicit Text(std::string text) : text_(text) {}
std::string text_;
};
template <typename Fn>
void NodeList::WalkTree(Fn fn) {
for (iterator i = begin(); i != end(); ++i) {
fn(NodePosition(this, i));
value_type& node = *i;
if (node->IsElement()) {
node->AsElement()->children_.WalkTree(fn);
}
}
fn(NodePosition(this, end()));
}
template <typename FilterFn>
NodeList::NodePosition NodeList::PickRandomPos(Random* rnd,
FilterFn filter_fn) {
ReservoirSampler<NodePosition> sampler(rnd, NodePosition(nullptr, end()));
WalkTree([&filter_fn, &sampler](const NodePosition& p) {
if (filter_fn(p))
sampler(p);
});
return sampler.get();
}
AttrPosition NodeList::PickRandomAttribute(Random* rnd) {
ReservoirSampler<AttrPosition> sampler(
rnd, AttrPosition(nullptr, Attrs::iterator()));
WalkTree([&sampler](const NodePosition& p) {
if (p.second == p.first->end() || !(*p.second)->IsElement())
return;
Element* e = (*p.second)->AsElement();
for (Attrs::iterator i = e->attrs_.begin(); i != e->attrs_.end(); ++i) {
sampler(make_pair(&e->attrs_, i));
}
});
return sampler.get();
}
std::unique_ptr<Node> Node::CreateRandom(Random* rnd) {
switch ((*rnd)() % 2) {
case 0:
return Element::CreateRandom(rnd);
case 1:
return Text::CreateRandom(rnd);
default:
NOTREACHED() << "SHOULD NOT HAPPEN";
}
}
std::unique_ptr<Node> Node::FromValue(const base::Value& value) {
if (!value.is_dict()) {
return nullptr;
}
const base::Value::Dict& dict = value.GetDict();
std::unique_ptr<Node> node;
if (dict.Find("t")) {
node.reset(new Text());
} else if (dict.Find("e")) {
node.reset(new Element());
} else {
LOG(ERROR) << "Bad node";
return nullptr;
}
node->ParseJson(dict);
return node;
}
static bool Mutate_InsertNode(NodeList* nodes, Random* rnd) {
auto [list, pos] = nodes->PickRandomPos(
rnd, [](const NodeList::NodePosition&) { return true; });
list->insert(pos, Node::CreateRandom(rnd));
return true;
}
static bool Mutate_Text(NodeList* nodes, Random* rnd) {
auto [list, pos] =
nodes->PickRandomPos(rnd, [](const NodeList::NodePosition& p) {
return p.second != p.first->end() && (*p.second)->IsText();
});
if (!list)
return false;
return (*pos)->AsText()->MutateText();
}
static bool Mutate_DeleteNode(NodeList* nodes, Random* rnd) {
auto [list, pos] =
nodes->PickRandomPos(rnd, [](const NodeList::NodePosition& p) {
return p.second != p.first->end();
});
if (list == nullptr)
return false;
list->erase(pos);
return true;
}
static bool Mutate_AddAttribute(NodeList* nodes, Random* rnd) {
static std::vector<std::string> attrNames;
if (attrNames.empty()) {
attrNames.insert(attrNames.begin(), {"accept", "accept-charset",
"accesskey", "action",
"align", "alt",
"async", "autocomplete",
"autofocus", "autoplay",
"autosave", "bgcolor",
"border", "buffered",
"challenge", "charset",
"checked", "cite",
"class", "code",
"codebase", "color",
"cols", "colspan",
"content", "cointenteditable",
"contextmenu", "controls",
"data", "data-a",
"data-b", "data-c",
"datetime", "default",
"defer", "dir",
"dirname", "disabled",
"download", "draggable",
"dropzone", "enctype",
"for", "form",
"formaction", "headers",
"height", "hidden",
"high", "href",
"hreflang", "http-equiv",
"icon", "id",
"ismap", "itemprop",
"keytype", "kind",
"label", "lang",
"language", "list",
"loop", "low",
"manifest", "max",
"maxlength", "media",
"method", "min",
"multiple", "muted",
"name", "novalidate",
"open", "optimum",
"pattern", "ping",
"placeholder", "poster",
"preload", "radiogroup",
"readonly", "rel",
"required", "reversed",
"rows", "rowspan",
"sandbox", "scope",
"scoped", "seamless",
"selected", "shape",
"size", "sizes",
"span", "spellcheck",
"src", "srcdoc",
"srclang", "srcset",
"start", "step",
"style", "summary",
"tabindex", "Target",
"title", "type",
"usemap", "value",
"width", "wrap"});
}
auto [list, pos] =
nodes->PickRandomPos(rnd, [](const NodeList::NodePosition& p) {
return p.second != p.first->end() && (*p.second)->IsElement();
});
if (list == nullptr)
return false;
std::string name = attrNames[(*rnd)() % attrNames.size()];
(*pos)->AsElement()->SetAttr(name, "");
return true;
}
static bool Mutate_DeleteAttribute(NodeList* nodes, Random* rnd) {
auto [attrs, pos] = nodes->PickRandomAttribute(rnd);
if (attrs == nullptr)
return false;
attrs->erase(pos);
return true;
}
static bool Mutate_AttributeValue(NodeList* nodes, Random* rnd) {
auto [attrs, pos] = nodes->PickRandomAttribute(rnd);
if (attrs == nullptr)
return false;
// TODO(aizatsky): constant
return MutateString(&pos->second, 32);
}
extern "C" size_t LLVMFuzzerCustomMutator(uint8_t* data,
size_t size,
size_t max_size,
unsigned int Seed) {
using NodeMutator = bool (*)(NodeList * list, Random * rnd);
static std::vector<NodeMutator> mutators;
if (mutators.empty()) {
mutators.push_back(&Mutate_InsertNode);
mutators.push_back(&Mutate_DeleteNode);
mutators.push_back(&Mutate_Text);
mutators.push_back(&Mutate_AddAttribute);
mutators.push_back(&Mutate_DeleteAttribute);
mutators.push_back(&Mutate_AttributeValue);
}
auto nodes = NodeList::ParseJsonString(data, size);
Random rnd(Seed);
NodeMutator mutator = mutators[rnd() % mutators.size()];
if (!mutator(nodes.get(), &rnd)) {
return 0;
}
std::string result;
nodes->ToJsonString(&result);
VLOG(1) << "mutatedjson:'" << result << "'";
if (result.size() > max_size) {
VLOG(1) << "**** OVERFLOW : " << result.size();
return 0;
}
memcpy(data, result.data(), result.size());
return result.size();
}
static Env* env = nullptr;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// Environment has to be initialized in the same thread.
if (env == nullptr)
env = new Env();
auto nodes = NodeList::ParseJsonString(data, size);
std::string html;
nodes->WriteHtml(&html);
env->adapter->LoadHTML(html, "http://www.example.org");
return 0;
}
} // namespace content