chromium/tools/crates/gnrt/lib/crates.rs

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

//! Utilities to handle vendored third-party crates.

use crate::config::BuildConfig;
use crate::deps;
use crate::manifest;

use std::fmt::{self, Display};
use std::fs;
use std::hash::Hash;
use std::io;
use std::path::{Path, PathBuf};
use std::str::FromStr;

use anyhow::Context;
use log::error;
use semver::Version;
use serde::{Deserialize, Serialize};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Visibility {
    /// The crate can be used by any build targets.
    Public,
    /// The crate can be used by only third-party crates.
    ThirdParty,
    /// The crate can be used by any test target, and in production by
    /// third-party crates.
    TestOnlyAndThirdParty,
}

/// Returns a default of `ThirdParty`, which is the most conservative option and
/// generally what we want if one isn't explicitly computed.
impl std::default::Default for Visibility {
    fn default() -> Self {
        Visibility::ThirdParty
    }
}

/// A normalized version as used in third_party/rust crate paths.
///
/// A crate version is identified by the major version, if it's >= 1, or the
/// minor version, if the major version is 0. There is a many-to-one
/// relationship between crate versions and epochs.
///
/// `Epoch` is serialized as a version string: e.g. "1" or "0.2".
#[derive(Clone, Copy, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
#[serde(from = "EpochString", into = "EpochString")]
pub enum Epoch {
    /// Epoch with major version == 0. The field is the minor version. It is an
    /// error to use 0: methods may panic in this case.
    Minor(u64),
    /// Epoch with major version >= 1. It is an error to use 0: methods may
    /// panic in this case.
    Major(u64),
}

impl Epoch {
    /// Get the semver version string for this Epoch. This will only have a
    /// non-zero major component, or a zero major component and a non-zero minor
    /// component. Note this differs from Epoch's `fmt::Display` impl.
    pub fn to_version_string(&self) -> String {
        match *self {
            // These should never return Err since formatting an integer is
            // infallible.
            Epoch::Minor(minor) => {
                assert_ne!(minor, 0);
                format!("0.{minor}")
            }
            Epoch::Major(major) => {
                assert_ne!(major, 0);
                format!("{major}")
            }
        }
    }

    /// A `semver::VersionReq` that matches any version of this epoch.
    pub fn to_version_req(&self) -> semver::VersionReq {
        let (major, minor) = match self {
            Self::Minor(x) => (0, Some(*x)),
            Self::Major(x) => (*x, None),
        };
        semver::VersionReq {
            comparators: vec![semver::Comparator {
                // "^1" is the same as "1" in Cargo.toml.
                op: semver::Op::Caret,
                major,
                minor,
                patch: None,
                pre: semver::Prerelease::EMPTY,
            }],
        }
    }

    /// Compute the Epoch from a `semver::Version`. This is useful since we can
    /// parse versions from `cargo_metadata` and in Cargo.toml files using the
    /// `semver` library.
    pub fn from_version(version: &Version) -> Self {
        match version.major {
            0 => Self::Minor(version.minor),
            x => Self::Major(x),
        }
    }

    /// Get the requested epoch from a supported dependency version string.
    /// `req` should be a version request as used in Cargo.toml's [dependencies]
    /// section.
    ///
    /// `req` must use the default strategy as defined in
    /// https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies-from-cratesio
    pub fn from_version_req_str(req: &str) -> Self {
        // For convenience, leverage semver::VersionReq for parsing even
        // though we don't need the full expressiveness.
        let req = semver::VersionReq::from_str(req).unwrap();
        // We require the spec to have exactly one comparator, which must use
        // the default strategy.
        assert_eq!(req.comparators.len(), 1);
        let comp: &semver::Comparator = &req.comparators[0];
        // Caret is semver's name for the default strategy.
        assert_eq!(comp.op, semver::Op::Caret);
        match (comp.major, comp.minor) {
            (0, Some(0) | None) => panic!("invalid version req {req}"),
            (0, Some(x)) => Epoch::Minor(x),
            (x, _) => Epoch::Major(x),
        }
    }
}

// This gives us a ToString implementation for free.
impl Display for Epoch {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            // These should never return Err since formatting an integer is
            // infallible.
            Epoch::Minor(minor) => {
                assert_ne!(minor, 0);
                f.write_fmt(format_args!("v0_{minor}")).unwrap()
            }
            Epoch::Major(major) => {
                assert_ne!(major, 0);
                f.write_fmt(format_args!("v{major}")).unwrap()
            }
        }

        Ok(())
    }
}

impl FromStr for Epoch {
    type Err = EpochParseError;

    /// A valid input string is of the form:
    /// * "v{i}", where i >= 1, or
    /// * "v0_{i}", where i >= 1
    ///
    /// Any other string is invalid. If the "v" is missing, there are extra
    /// underscore-separated components, or there are two numbers but both
    /// are 0 or greater than zero are all invalid strings.
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        // Split off the "v" prefix.
        let mut iter = s.split_inclusive('v');
        if iter.next() != Some("v") {
            return Err(EpochParseError::BadFormat);
        }
        let s = iter.next().ok_or(EpochParseError::BadFormat)?;
        if iter.next().is_some() {
            return Err(EpochParseError::BadFormat);
        }

        // Split the major and minor version numbers.
        let mut parts = s.split('_');
        let major: Option<u64> =
            parts.next().map(|s| s.parse().map_err(EpochParseError::InvalidInt)).transpose()?;
        let minor: Option<u64> =
            parts.next().map(|s| s.parse().map_err(EpochParseError::InvalidInt)).transpose()?;

        // Get the final epoch, checking that the (major, minor) pair is valid.
        let result = match (major, minor) {
            (Some(0), Some(0)) => Err(EpochParseError::BadVersion),
            (Some(0), Some(minor)) => Ok(Epoch::Minor(minor)),
            (Some(major), None) => Ok(Epoch::Major(major)),
            (Some(_), Some(_)) => Err(EpochParseError::BadVersion),
            (None, None) => Err(EpochParseError::BadFormat),
            _ => unreachable!(),
        }?;

        // Ensure there's no remaining parts.
        if parts.next().is_none() { Ok(result) } else { Err(EpochParseError::BadFormat) }
    }
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum EpochParseError {
    /// An integer could not be parsed where expected.
    InvalidInt(std::num::ParseIntError),
    /// The string was not formatted correctly. It was missing the 'v' prefix,
    /// was missing the '_' separator, or had a tail after the last integer.
    BadFormat,
    /// The epoch had an invalid combination of versions: e.g. "v0_0", "v1_0",
    /// "v1_1".
    BadVersion,
}

impl Display for EpochParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use EpochParseError::*;
        match self {
            InvalidInt(parse_int_error) => parse_int_error.fmt(f),
            BadFormat => f.write_str("epoch string had incorrect format"),
            BadVersion => f.write_str("epoch string had invalid version"),
        }
    }
}

impl std::error::Error for EpochParseError {}

/// A crate name normalized to the format we use in //third_party.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct NormalizedName(String);

impl NormalizedName {
    /// Wrap a normalized name, checking that it is valid.
    pub fn new(normalized_name: &str) -> Option<NormalizedName> {
        let converted_name = Self::from_crate_name(normalized_name);
        if converted_name.0 == normalized_name { Some(converted_name) } else { None }
    }

    /// Normalize a crate name. `crate_name` is the name Cargo uses to refer to
    /// the crate.
    pub fn from_crate_name(crate_name: &str) -> NormalizedName {
        NormalizedName(
            crate_name
                .chars()
                .map(|c| match c {
                    '-' | '.' => '_',
                    c => c,
                })
                .collect(),
        )
    }

    /// Get the wrapped string.
    pub fn as_str(&self) -> &str {
        &self.0
    }
}

impl fmt::Display for NormalizedName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(&self.0)
    }
}

/// Identifies a crate available in some vendored source. Each crate is uniquely
/// identified by its Cargo.toml package name and version.
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct VendoredCrate {
    pub name: String,
    pub version: Version,
}

impl fmt::Display for VendoredCrate {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{} {}", self.name, self.version)
    }
}

impl VendoredCrate {
    pub fn normalized_name(&self) -> NormalizedName {
        NormalizedName::from_crate_name(&self.name)
    }
}

pub struct CrateFiles {
    /// The list of all source files that are part of the crate and may be used
    /// by rustc when building any part of the crate, as absolute paths. These
    /// files are those found under the crate root.
    pub sources: Vec<PathBuf>,
    /// The list of all input files that are part of the crate and may be used
    /// by rustc when building any part of the crate, as absolute paths. This
    /// may contain .rs files as well that are part of other crates and which
    /// may be include()'d or used through module paths.
    pub inputs: Vec<PathBuf>,
    /// The list of all native lib files that are part of the crate and may be
    /// depended on through `#[link]` directives. These files are those found
    /// under the crate root.
    pub native_libs: Vec<PathBuf>,
    /// Like `sources` but for the crate's build script.
    pub build_script_sources: Vec<PathBuf>,
    /// Like `inputs` but for the crate's build script.
    pub build_script_inputs: Vec<PathBuf>,
}

impl CrateFiles {
    fn new() -> Self {
        Self {
            sources: vec![],
            inputs: vec![],
            native_libs: vec![],
            build_script_sources: vec![],
            build_script_inputs: vec![],
        }
    }

    /// Sorts the CrateFiles for a deterministic output.
    fn sort(&mut self) {
        self.sources.sort_unstable();
        self.inputs.sort_unstable();
        self.native_libs.sort_unstable();
        self.build_script_sources.sort_unstable();
        self.build_script_inputs.sort_unstable();
    }
}

/// Get the subdir name containing `id` in a `cargo vendor` directory.
pub fn std_crate_path(id: &VendoredCrate) -> PathBuf {
    format!("{}-{}", id.name, id.version).into()
}

#[derive(Debug, PartialEq, Eq)]
pub enum IncludeCrateTargets {
    LibOnly,
    LibAndBin,
}

/// Collect the source and input files (i.e. `CrateFiles`) for each library that
/// is part of the build.
pub fn collect_crate_files(
    p: &deps::Package,
    config: &BuildConfig,
    include_targets: IncludeCrateTargets,
) -> anyhow::Result<(VendoredCrate, CrateFiles)> {
    let crate_config = config.per_crate_config.get(&p.crate_id().name);

    let mut files = CrateFiles::new();

    struct RootDir {
        path: PathBuf,
        collect: CollectCrateFiles,
    }

    let mut root_dirs = Vec::new();
    if let Some(lib_target) = p.lib_target.as_ref() {
        let lib_root = lib_target.root.parent().expect("lib target has no directory in its path");
        root_dirs.push(RootDir { path: lib_root.to_owned(), collect: CollectCrateFiles::Internal });

        root_dirs.extend(
            crate_config
                .iter()
                .flat_map(|crate_config| &crate_config.extra_src_roots)
                .chain(&config.all_config.extra_src_roots)
                .map(|path| RootDir {
                    path: lib_root.join(path),
                    collect: CollectCrateFiles::ExternalSourcesAndInputs,
                }),
        );
        root_dirs.extend(
            crate_config
                .iter()
                .flat_map(|crate_config| &crate_config.extra_input_roots)
                .chain(&config.all_config.extra_input_roots)
                .map(|path| RootDir {
                    path: lib_root.join(path),
                    collect: CollectCrateFiles::ExternalInputsOnly,
                }),
        );
        root_dirs.extend(
            crate_config
                .iter()
                .flat_map(|crate_config| &crate_config.extra_build_script_src_roots)
                .chain(&config.all_config.extra_build_script_src_roots)
                .map(|path| RootDir {
                    path: lib_root.join(path),
                    collect: CollectCrateFiles::BuildScriptExternalSourcesAndInputs,
                }),
        );
        root_dirs.extend(
            crate_config
                .iter()
                .flat_map(|crate_config| &crate_config.extra_build_script_input_roots)
                .chain(&config.all_config.extra_build_script_input_roots)
                .map(|path| RootDir {
                    path: lib_root.join(path),
                    collect: CollectCrateFiles::BuildScriptExternalInputsOnly,
                }),
        );

        root_dirs.extend(
            crate_config
                .iter()
                .flat_map(|crate_config| &crate_config.native_libs_roots)
                .chain(&config.all_config.native_libs_roots)
                .map(|path| RootDir {
                    path: lib_root.join(path),
                    collect: CollectCrateFiles::LibsOnly,
                }),
        );
    }
    if include_targets == IncludeCrateTargets::LibAndBin {
        for bin in &p.bin_targets {
            let bin_root = bin.root.parent().expect("bin target has no directory in its path");
            root_dirs
                .push(RootDir { path: bin_root.to_owned(), collect: CollectCrateFiles::Internal });
        }
    }

    for root_dir in root_dirs {
        recurse_crate_files(&root_dir.path, &mut |filepath| {
            collect_crate_file(&mut files, root_dir.collect, filepath)
        })?;
    }
    files.sort();

    let crate_id = VendoredCrate { name: p.package_name.clone(), version: p.version.clone() };
    Ok((crate_id, files))
}

/// Traverse vendored third-party crates in the Rust source package. Each
/// `VendoredCrate` is paired with the package metadata from its manifest. The
/// returned list is in unspecified order.
pub fn collect_std_vendored_crates(vendor_path: &Path) -> io::Result<Vec<VendoredCrate>> {
    let mut crates = Vec::new();

    for vendored_crate in fs::read_dir(vendor_path)? {
        let vendored_crate: fs::DirEntry = vendored_crate?;
        if !vendored_crate.file_type()?.is_dir() {
            continue;
        }

        let Some(crate_id) = get_vendored_crate_id(&vendored_crate.path())? else {
            error!(
                "Cargo.toml not found at {}. cargo vendor would not do that to us.",
                vendored_crate.path().to_string_lossy()
            );
            panic!()
        };

        // Vendored crate directories can be named "{package_name}" or
        // "{package_name}-{version}", but for now we only use the latter for
        // std vendored deps. For simplicity, accept only that.
        let dir_name = vendored_crate.file_name().to_string_lossy().into_owned();
        let std_path = std_crate_path(&crate_id).to_str().unwrap().to_string();
        let std_path_no_version = std_path
            .rfind('-')
            .map(|pos| std_path[..pos].to_string())
            .unwrap_or(std_path.to_string());
        if std_path != dir_name && std_path_no_version != dir_name {
            return Err(io::Error::new(
                io::ErrorKind::Other,
                format!(
                    "directory name {dir_name} does not match package information for {crate_id:?}"
                ),
            ));
        }
        crates.push(crate_id);
    }

    Ok(crates)
}

#[derive(Copy, Clone, PartialEq, Eq)]
enum CollectCrateFiles {
    /// Collect .rs files and store them as `sources` and other files as
    /// `inputs`. These are part of the crate directly.
    Internal,
    /// Collect .rs files, .md files and other file types that may be
    /// include!()'d into the crate, and store them as `inputs`. These are not
    /// directly part of the crate.
    ExternalSourcesAndInputs,
    /// Like ExternalSourcesAndInputs but excludes .rs files.
    ExternalInputsOnly,
    /// Like `ExternalSourcesAndInputs` but for build scripts.
    BuildScriptExternalSourcesAndInputs,
    /// Like `ExternalInputsOnly` but for build scripts.
    BuildScriptExternalInputsOnly,
    /// Collect .lib files and store them as `native_libs`. These can be
    /// depended on by the crate through `#[link]` directives.
    LibsOnly,
}

// Adds a `filepath` to `CrateFiles` depending on the type of file and the
// `mode` of collection.
fn collect_crate_file(files: &mut CrateFiles, mode: CollectCrateFiles, filepath: &Path) {
    use CollectCrateFiles::*;
    match filepath.extension().and_then(std::ffi::OsStr::to_str) {
        Some("rs") => match mode {
            Internal => files.sources.push(filepath.to_owned()),
            ExternalSourcesAndInputs => files.inputs.push(filepath.to_owned()),
            ExternalInputsOnly => (),
            BuildScriptExternalSourcesAndInputs => {
                files.build_script_inputs.push(filepath.to_owned())
            }
            BuildScriptExternalInputsOnly => (),
            LibsOnly => (),
        },
        // md: Markdown files are commonly include!()'d into source code as docs.
        // h: cxxbridge_cmd include!()'s its .h file into it.
        // json: json files are include!()'d into source code in the wycheproof crate
        Some("md") | Some("h") | Some("json") => match mode {
            Internal | ExternalSourcesAndInputs | ExternalInputsOnly => {
                files.inputs.push(filepath.to_owned())
            }
            BuildScriptExternalSourcesAndInputs | BuildScriptExternalInputsOnly => {
                files.build_script_inputs.push(filepath.to_owned())
            }
            LibsOnly => (),
        },
        Some("lib") if mode == LibsOnly => files.native_libs.push(filepath.to_owned()),
        _ => (),
    };
}

/// Recursively visits all files under `path` and calls `f` on each one.
///
/// The `path` may be a single file or a directory.
pub fn recurse_crate_files(path: &Path, f: &mut dyn FnMut(&Path)) -> anyhow::Result<()> {
    fn recurse(path: &Path, root: &Path, f: &mut dyn FnMut(&Path)) -> anyhow::Result<()> {
        let meta = std::fs::metadata(path).with_context(|| format!("missing path {:?}", path))?;
        if !meta.is_dir() {
            // Working locally can produce files in tree that should not be considered, and
            // which are not part of the git repository.
            //
            // * `.devcontainer/` may contain .md files such as a README.md that are never
            //   part of the build.
            // * `.vscode/` may contain .md files such as a README.md generated there.
            // * `target/` may contain .rs files generated by build scripts when compiling
            //   the crate with cargo or rust-analyzer.
            //
            // Ideally we should just include files that are listed in `git ls-files`.
            const SKIP_PREFIXES: [&str; 3] = [".devcontainer", ".vscode", "target"];
            for skip in SKIP_PREFIXES {
                if path.starts_with(root.join(Path::new(skip))) {
                    return Ok(());
                }
            }
            f(path)
        } else {
            for r in std::fs::read_dir(path).with_context(|| format!("dir at {:?}", path))? {
                let entry = r?;
                let path = entry.path();
                recurse(&path, root, f)?;
            }
        }
        Ok(())
    }
    recurse(path, path, f)
}

/// Get a crate's ID and parsed manifest from its path. Returns `Ok(None)` if
/// there was no Cargo.toml, or `Err(_)` for other IO errors.
fn get_vendored_crate_id(package_path: &Path) -> io::Result<Option<VendoredCrate>> {
    let manifest_file = match fs::read_to_string(package_path.join("Cargo.toml")) {
        Ok(f) => f,
        Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
        Err(e) => return Err(e),
    };

    let manifest: manifest::CargoManifest = toml::de::from_str(&manifest_file).unwrap();
    let crate_id = VendoredCrate {
        name: manifest.package.name.as_str().into(),
        version: manifest.package.version.clone(),
    };
    Ok(Some(crate_id))
}

/// Proxy for [de]serializing epochs to/from strings. This uses the "1" or "0.1"
/// format rather than the `Display` format for `Epoch`.
#[derive(Debug, Deserialize, Serialize)]
struct EpochString(String);

impl From<Epoch> for EpochString {
    fn from(epoch: Epoch) -> Self {
        Self(epoch.to_version_string())
    }
}

impl From<EpochString> for Epoch {
    fn from(epoch: EpochString) -> Self {
        Epoch::from_version_req_str(&epoch.0)
    }
}

#[cfg(test)]
mod tests {
    use super::Epoch::*;
    use super::*;

    #[test]
    fn epoch_from_str() {
        use EpochParseError::*;
        assert_eq!(Epoch::from_str("v1"), Ok(Major(1)));
        assert_eq!(Epoch::from_str("v2"), Ok(Major(2)));
        assert_eq!(Epoch::from_str("v0_3"), Ok(Minor(3)));
        assert_eq!(Epoch::from_str("0_1"), Err(BadFormat));
        assert_eq!(Epoch::from_str("v1_9"), Err(BadVersion));
        assert_eq!(Epoch::from_str("v0_0"), Err(BadVersion));
        assert_eq!(Epoch::from_str("v0_1_2"), Err(BadFormat));
        assert_eq!(Epoch::from_str("v1_0"), Err(BadVersion));
        assert!(matches!(Epoch::from_str("v1_0foo"), Err(InvalidInt(_))));
        assert!(matches!(Epoch::from_str("vx_1"), Err(InvalidInt(_))));
    }

    #[test]
    fn epoch_to_string() {
        assert_eq!(Major(1).to_string(), "v1");
        assert_eq!(Major(2).to_string(), "v2");
        assert_eq!(Minor(3).to_string(), "v0_3");
    }

    #[test]
    fn epoch_from_version() {
        use semver::Version;

        assert_eq!(Epoch::from_version(&Version::new(0, 1, 0)), Minor(1));
        assert_eq!(Epoch::from_version(&Version::new(1, 2, 0)), Major(1));
    }

    #[test]
    fn epoch_from_version_req_string() {
        assert_eq!(Epoch::from_version_req_str("0.1.0"), Minor(1));
        assert_eq!(Epoch::from_version_req_str("1.0.0"), Major(1));
        assert_eq!(Epoch::from_version_req_str("2.3.0"), Major(2));
    }
}