// Copyright 2023 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // The purpose of this file is to perform end-to-end form classifications for // form structures that were recorded as JSON files. These tests currently don't // produce 100% fidelity compared to real browsing. E.g. we don't support // invisible fields, yet. Still, they are helpful for tuning heuristics and // rationalization against a corpus of observed forms. // // Test files are located components/test/data/autofill/heuristics-json/ // and describe the status quo. Not necessarily the correct, expected behavior. // If the classification changes (for the better or worse), a new file is // written into this directory and the test fails. You can compare the results // with `diff -U5 $old_file $new_file`. // // The structure of the input files is as follows: // { // "config": { // // 2 letter country code, used to mock the user's current location. // "country": "US", // // 2 letter language code, used to mock the website's language. // "language": "en", // // List of fields for which the expected type is verified. Fields not // // listed here are presented to the local heuristics but the outputs // // are not checked and mismatches are not reported. // "fields_in_scope": [ // "UNKNOWN_TYPE", // "ADDRESS_HOME_CITY", // ... // ], // }, // "sites": [ // { // // URL of the website from which the form was recoreded, useful for // // debugging. // "site_url": "https://www.example.com", // // List of forms recorded for the website (e.g. a website can have an // // address form and a payment form). // "forms": [ // { // "form_signature": "1234567", // "fields": [ // { // // "{form_sig}_{field_sig}_{field_rank_in_signature_group} // "id": "15461699092647468671_1855613035_0", // "field_signature": "1855613035", // // Absolute position of the field in the form. Fields should // // be sorted by "field_position". Fields are presented to the // // heuristics in the order they appear in the JSON file. // // This field is only used for debugging purposes. // "field_position": 0, // // <label>{label_attr} // // <input id="{id_attr}" name="{name_attr}" // // type="{type_attr}" // // autocomplete="{autocomplete_attr}"> // // </label> // "id_attr": "first", // "name_attr": "firstName", // "label_attr": "First name", // "type_attr": "text", // "autocomplete_attr": "given-name", // // The field types a human tester considered correct. // // Currently only the first type is considered. // "tester_type": [ // "NAME_FIRST" // ], // // Correctness of the last classification. The value can be // // one of: // // - "correct" if the last classification matched the first // // "tester_type". // // - "not_recognized: {tester_type}, chosen_instead: {type2}" // // if {tester_type} was not recognized but the heuristics // // but classification and rationalization produced // // {type2} instead. // // - "ignored: {tester_type}" if the field type is not in // // scope of the test. // "last_correctness": "correct|not_recognized: ...", // // ^^^^^^ THIS GETS UPDATED BY RUNNING THE TEST. // // The last field type predicted by the heuristics and // // rationalization. // "last_classification": "NAME_FIRST" // // ^^^^^^ THIS GETS UPDATED BY RUNNING THE TEST. // } // ] // } // ] // ], // // Summary of the classification. // "stats": { // // ^^^^^^ THIS GETS UPDATED BY RUNNING THE TEST // "high_level_stats": { // // Which fraction of fields had the heuristic type match the tester // // type. // "fraction_machtes": 0.7258244384259996, // // Number of fields for which the heuristic type matched the tester // // type or did not match. // "matches": 9112, // "mismatches": 3442 // }, // // Same staistics as above, drilled down by tester type. // "per_type_stats": { // "{tester_type}": { // "fraction_machtes": 0.9132743362831859, // "matches": 1032, // "mismatches": 98 // }, // ... // }, // "ignored_types_stats": { // "{tester_type}": 1 // } // } // } #include <iomanip> #include <sstream> #include <string_view> #include "base/command_line.h" #include "base/containers/contains.h" #include "base/containers/flat_map.h" #include "base/containers/flat_set.h" #include "base/feature_list.h" #include "base/files/file_enumerator.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/json/json_reader.h" #include "base/json/json_writer.h" #include "base/logging.h" #include "base/no_destructor.h" #include "base/path_service.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/test/scoped_command_line.h" #include "base/test/scoped_feature_list.h" #include "base/test/test_timeouts.h" #include "base/values.h" #include "build/build_config.h" #include "components/autofill/core/browser/form_structure.h" #include "components/autofill/core/browser/heuristic_source.h" #include "components/autofill/core/browser/logging/log_manager.h" #include "components/autofill/core/browser/logging/log_router.h" #include "components/autofill/core/common/autocomplete_parsing_util.h" #include "components/autofill/core/common/autofill_features.h" #include "components/autofill/core/common/autofill_test_utils.h" #include "components/autofill/core/common/form_data_test_api.h" #include "components/autofill/core/common/language_code.h" #include "components/variations/variations_switches.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" #if BUILDFLAG(IS_MAC) #include "base/apple/foundation_util.h" #endif AssertionFailure; AssertionResult; AssertionSuccess; namespace autofill { namespace { // Helper class that aggregates metrics and diagnostic data about field // classifications that matched or mismatched the expecations. class ResultAnalyzer { … }; void ResultAnalyzer::AnalyzeClassification(const FormStructure& form_structure, base::Value::Dict& form_dict) { … } base::Value ResultAnalyzer::GetResult() { … } // Returns the path containing test input files, // components/test/data/autofill/heuristics-json/. const base::FilePath& GetInputDir() { … } // Returns all "*.json" files in `GetInputDir()`. std::vector<base::FilePath> GetTestFiles() { … } // Extracts data of a single field from `field_dict` using the Form `form_data` // as contextual information. // `field_dict` corresponds to an entry of `.sites[].forms[].fields[]` in the // JSON input file in jq syntax (https://jqlang.github.io/jq/). FormFieldData ParseFieldFromJsonDict(const base::Value::Dict& field_dict, const FormData& form_data) { … } [[nodiscard]] AssertionResult ParseFormFromJsonDict( const base::Value::Dict& form_dict, const std::string& site_url, FormData& form_data) { … } // Tests classifications of a site. The returned test result expresses whether // the test data could be parsed and the fields could be classified. It does // not make an assessment of whether the heuristics generated the expected data. // That is recorded via `result_analyzer`. // Test field classification resutls are updated in `site` in the // `.sites[].forms[].fields[].last_classification` field. This is why the `site` // is a mutable parameter. // `site` corresponds to an entry of `.sites[]` in the JSON input file in jq // syntax (https://jqlang.github.io/jq/) [[nodiscard]] AssertionResult ClassifyFieldsOfSite( base::Value::Dict& site, const GeoIpCountryCode& client_country, LanguageCode page_language, ResultAnalyzer& result_analyzer, LogManager* log_manager) { … } // Creates a textual description of the statistics. This is good for a quick // view in the delta for an EXPECT_EQ(). [[nodiscard]] std::string SummarizeStatistics( const base::Value::Dict& json_file) { … } class HeuristicClassificationTests : public testing::Test, public testing::WithParamInterface<base::FilePath> { … }; void HeuristicClassificationTests::SetUp() { … } TEST_P(HeuristicClassificationTests, EndToEnd) { … } // Maps a test file name to a short string that is used in the test name. // E.g. a file "internal/DE.json" becomes "DE" such that the test is called // AllForms/HeuristicClassificationTests.EndToEnd/DE. std::string GenerateTestName( const testing::TestParamInfo<base::FilePath>& info) { … } INSTANTIATE_TEST_SUITE_P(…); } // namespace } // namespace autofill