// Copyright 2011 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Common types and constants for extracting and evaluating features in the // client-side phishing detection model. A feature is simply a string and an // associated floating-point value between 0 and 1. The phishing // classification model contains rules which give an appropriate weight to each // feature or combination of features. These values can then be summed to // compute a final phishiness score. // // Some features are boolean features. If these features are set, they always // have a value of 0.0 or 1.0. In practice, the features are only set if the // value is true (1.0). // // We also use token features. These features have a unique name that is // constructed from the URL or page contents that we are classifying, for // example, "UrlDomain=chromium". These features are also always set to 1.0 // if they are present. // // The intermediate storage of the features for a URL is a FeatureMap, which is // just a thin wrapper around a map of feature name to value. The entire set // of features for a URL is extracted before we do any scoring. #ifndef COMPONENTS_SAFE_BROWSING_CONTENT_RENDERER_PHISHING_CLASSIFIER_FEATURES_H_ #define COMPONENTS_SAFE_BROWSING_CONTENT_RENDERER_PHISHING_CLASSIFIER_FEATURES_H_ #include <stddef.h> #include <string> #include <unordered_map> #include "base/feature_list.h" #include "base/metrics/field_trial_params.h" safe_browsing // namespace safe_browsing #endif // COMPONENTS_SAFE_BROWSING_CONTENT_RENDERER_PHISHING_CLASSIFIER_FEATURES_H_