chromium/components/assist_ranker/proto/generic_logistic_regression_model.proto

// Copyright 2017 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This proto defines parameters and weights of for a logistic regression model.

syntax = "proto2";

option optimize_for = LITE_RUNTIME;

import "example_preprocessor.proto";

package assist_ranker;

message SparseWeights {
  // Map of weights keyed by sparse feature element name.
  // E.g. A sparse feature named 'user_language' has elements
  // named 'fr', 'en', 'de', etc.
  map<string, float> weights = 1;
  // If a sparse feature element is not present in |weights|, use this weight
  // instead. Defaults to 0.
  optional float default_weight = 2;
}

message BucketizedWeights {
  // Buckets include the left boundary, and exclude the right boundary.
  // Namely, `boundaries = [0., 1., 2.]` corresponds to buckets
  // `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +inf)`.
  repeated float boundaries = 1 [packed = true];
  // Weights are ordered by bucket values, this contains one more element than
  // |boundaries|.
  repeated float weights = 2 [packed = true];

  // If the feature is missing in the input, use the default_weight.
  optional float default_weight = 3;
}

message FeatureWeight {
  oneof feature_type {
    // Scalar input feature are defined by a single number value. If the feature
    // is missing, the weight defaults to zero.
    float scalar = 1;
    // One-hot features are binary vectors where one and only one element is
    // true. The input is represented by a string corresponding to the active
    // element. If no element is specified in the input example, or if the
    // active element is not in the weight map, we use the default weight.
    SparseWeights one_hot = 2;
    // Sparse features are vectors where a few values are non-zero while the
    // majority of the elements are zero. The input is represented as a map
    // string-> float of non-zero-elements. Contrary to one-hot vectors, if no
    // elements are specified, no weight is applied. If specified elements are
    // not in the weight map, we apply the default weight for each of the
    // unknown elements.
    SparseWeights sparse = 3;
    // Bucketized features are int or float input features that are converted to
    // a one-hot vector where elements correspond to ranges of the value.
    BucketizedWeights bucketized = 4;
  }
}

// Defines the weights, bias and decision threshold of a Logistic Regression
// Model.
message GenericLogisticRegressionModel {
  // Decision threshold. If not defined, use 0.5.
  optional float threshold = 1;

  // When computing the activation score, the bias is added to the
  // weight-feature dot product before the non-linearity (sigmoid) is applied.
  optional float bias = 2;

  // Map of weights keyed by feature name. Features can be scalar, one-hot,
  // sparse or bucketized.
  map<string, FeatureWeight> weights = 3;

  // If it's a preprocessed_model, then use preprocessor_config to preprocess
  // the input and fullname_weights to calculate the score.
  optional bool is_preprocessed_model = 4;

  // Map from feature fullname to it's weights.
  map<string, float> fullname_weights = 5;

  // Config for preprocessor (without feature_indices; there is no need for
  // vectorization, since the inference model use ExampleFloatIterator instead).
  optional ExamplePreprocessorConfig preprocessor_config = 6;
}