chromium/third_party/rust/chromium_crates_io/vendor/read-fonts-0.20.0/src/tables/gsub/closure.rs

//! Computing the closure over a set of glyphs
//!
//! This means taking a set of glyphs and updating it to include any other glyphs
//! reachable from those glyphs via substitution, recursively.

use std::collections::HashSet;

use font_types::GlyphId16;

use crate::{
    tables::layout::{
        ChainedSequenceContextFormat1, ChainedSequenceContextFormat2,
        ChainedSequenceContextFormat3, ExtensionLookup, SequenceContextFormat1,
        SequenceContextFormat2, SequenceContextFormat3, Subtables,
    },
    FontRead, ReadError,
};

use super::{
    AlternateSubstFormat1, ChainedSequenceContext, ClassDef, Gsub, LigatureSubstFormat1,
    MultipleSubstFormat1, ReverseChainSingleSubstFormat1, SequenceContext, SingleSubst,
    SingleSubstFormat1, SingleSubstFormat2, SubstitutionSubtables,
};

/// A trait for tables which participate in closure
pub(crate) trait GlyphClosure {
    /// Update the set of glyphs with any glyphs reachable via substitution.
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError>;
}

impl<'a> Gsub<'a> {
    /// Return the set of glyphs reachable from the input set via any substitution.
    pub fn closure_glyphs(
        &self,
        mut glyphs: HashSet<GlyphId16>,
    ) -> Result<HashSet<GlyphId16>, ReadError> {
        // we need to do this iteratively, since any glyph found in one pass
        // over the lookups could also be the target of substitutions.

        // we always call this once, and then keep calling if it produces
        // additional glyphs
        let mut prev_glyph_count = glyphs.len();
        self.closure_glyphs_once(&mut glyphs)?;
        let mut new_glyph_count = glyphs.len();

        while prev_glyph_count != new_glyph_count {
            prev_glyph_count = new_glyph_count;
            self.closure_glyphs_once(&mut glyphs)?;
            new_glyph_count = glyphs.len();
        }

        Ok(glyphs)
    }

    fn closure_glyphs_once(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        let lookups_to_use = self.find_reachable_lookups(glyphs)?;
        let lookup_list = self.lookup_list()?;
        for (i, lookup) in lookup_list.lookups().iter().enumerate() {
            if !lookups_to_use.contains(&(i as u16)) {
                continue;
            }
            let subtables = lookup?.subtables()?;
            subtables.add_reachable_glyphs(glyphs)?;
        }
        Ok(())
    }

    fn find_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
    ) -> Result<HashSet<u16>, ReadError> {
        let feature_list = self.feature_list()?;
        let lookup_list = self.lookup_list()?;
        // first we want to get the lookups that are directly referenced by a feature
        // (including in a feature variation table)
        let mut lookup_ids = HashSet::with_capacity(lookup_list.lookup_count() as _);
        let feature_variations = self
            .feature_variations()
            .transpose()?
            .map(|vars| {
                let data = vars.offset_data();
                vars.feature_variation_records()
                    .iter()
                    .filter_map(move |rec| {
                        rec.feature_table_substitution(data)
                            .transpose()
                            .ok()
                            .flatten()
                    })
                    .flat_map(|subs| {
                        subs.substitutions()
                            .iter()
                            .map(move |sub| sub.alternate_feature(subs.offset_data()))
                    })
            })
            .into_iter()
            .flatten();
        for feature in feature_list
            .feature_records()
            .iter()
            .map(|rec| rec.feature(feature_list.offset_data()))
            .chain(feature_variations)
        {
            lookup_ids.extend(feature?.lookup_list_indices().iter().map(|idx| idx.get()));
        }

        // and now we need to add lookups referenced by contextual lookups,
        // IFF they are reachable via the current set of glyphs:
        for lookup in lookup_list.lookups().iter() {
            let subtables = lookup?.subtables()?;
            match subtables {
                SubstitutionSubtables::Contextual(tables) => tables
                    .iter()
                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
                SubstitutionSubtables::ChainContextual(tables) => tables
                    .iter()
                    .try_for_each(|t| t?.add_reachable_lookups(glyphs, &mut lookup_ids)),
                _ => Ok(()),
            }?;
        }
        Ok(lookup_ids)
    }
}

impl<'a> GlyphClosure for SubstitutionSubtables<'a> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        match self {
            SubstitutionSubtables::Single(tables) => tables.add_reachable_glyphs(glyphs),
            SubstitutionSubtables::Multiple(tables) => tables.add_reachable_glyphs(glyphs),
            SubstitutionSubtables::Alternate(tables) => tables.add_reachable_glyphs(glyphs),
            SubstitutionSubtables::Ligature(tables) => tables.add_reachable_glyphs(glyphs),
            SubstitutionSubtables::Reverse(tables) => tables.add_reachable_glyphs(glyphs),
            _ => Ok(()),
        }
    }
}

impl<'a, T: FontRead<'a> + GlyphClosure + 'a, Ext: ExtensionLookup<'a, T> + 'a> GlyphClosure
    for Subtables<'a, T, Ext>
{
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        self.iter()
            .try_for_each(|t| t?.add_reachable_glyphs(glyphs))
    }
}

impl<'a> GlyphClosure for SingleSubst<'a> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        for (target, replacement) in self.iter_subs()? {
            if glyphs.contains(&target) {
                glyphs.insert(replacement);
            }
        }
        Ok(())
    }
}

impl<'a> SingleSubst<'a> {
    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
        let (left, right) = match self {
            SingleSubst::Format1(t) => (Some(t.iter_subs()?), None),
            SingleSubst::Format2(t) => (None, Some(t.iter_subs()?)),
        };
        Ok(left
            .into_iter()
            .flatten()
            .chain(right.into_iter().flatten()))
    }
}

impl<'a> SingleSubstFormat1<'a> {
    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
        let delta = self.delta_glyph_id();
        let coverage = self.coverage()?;
        Ok(coverage.iter().filter_map(move |gid| {
            let raw = (gid.to_u16() as i32).checked_add(delta as i32);
            let raw = raw.and_then(|raw| u16::try_from(raw).ok())?;
            Some((gid, GlyphId16::new(raw)))
        }))
    }
}

impl<'a> SingleSubstFormat2<'a> {
    fn iter_subs(&self) -> Result<impl Iterator<Item = (GlyphId16, GlyphId16)> + '_, ReadError> {
        let coverage = self.coverage()?;
        let subs = self.substitute_glyph_ids();
        Ok(coverage.iter().zip(subs.iter().map(|id| id.get())))
    }
}

impl<'a> GlyphClosure for MultipleSubstFormat1<'a> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        let coverage = self.coverage()?;
        let sequences = self.sequences();
        for (gid, replacements) in coverage.iter().zip(sequences.iter()) {
            let replacements = replacements?;
            if glyphs.contains(&gid) {
                glyphs.extend(
                    replacements
                        .substitute_glyph_ids()
                        .iter()
                        .map(|gid| gid.get()),
                );
            }
        }
        Ok(())
    }
}

impl<'a> GlyphClosure for AlternateSubstFormat1<'a> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        let coverage = self.coverage()?;
        let alts = self.alternate_sets();
        for (gid, alts) in coverage.iter().zip(alts.iter()) {
            let alts = alts?;
            if glyphs.contains(&gid) {
                glyphs.extend(alts.alternate_glyph_ids().iter().map(|gid| gid.get()));
            }
        }
        Ok(())
    }
}

impl<'a> GlyphClosure for LigatureSubstFormat1<'a> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        let coverage = self.coverage()?;
        let ligs = self.ligature_sets();
        for (gid, lig_set) in coverage.iter().zip(ligs.iter()) {
            let lig_set = lig_set?;
            if glyphs.contains(&gid) {
                for lig in lig_set.ligatures().iter() {
                    let lig = lig?;
                    if lig
                        .component_glyph_ids()
                        .iter()
                        .all(|gid| glyphs.contains(&gid.get()))
                    {
                        glyphs.insert(lig.ligature_glyph());
                    }
                }
            }
        }
        Ok(())
    }
}

impl GlyphClosure for ReverseChainSingleSubstFormat1<'_> {
    fn add_reachable_glyphs(&self, glyphs: &mut HashSet<GlyphId16>) -> Result<(), ReadError> {
        for coverage in self
            .backtrack_coverages()
            .iter()
            .chain(self.lookahead_coverages().iter())
        {
            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
                return Ok(());
            }
        }

        for (gid, sub) in self.coverage()?.iter().zip(self.substitute_glyph_ids()) {
            if glyphs.contains(&gid) {
                glyphs.insert(sub.get());
            }
        }

        Ok(())
    }
}

impl SequenceContext<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        match self {
            SequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
            SequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
            SequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
        }
    }
}

impl SequenceContextFormat1<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        let coverage = self.coverage()?;
        for seq in coverage
            .iter()
            .zip(self.seq_rule_sets().iter())
            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid)))
        {
            for rule in seq?.seq_rules().iter() {
                let rule = rule?;
                if rule
                    .input_sequence()
                    .iter()
                    .all(|gid| glyphs.contains(&gid.get()))
                {
                    lookups.extend(
                        rule.seq_lookup_records()
                            .iter()
                            .map(|rec| rec.lookup_list_index()),
                    );
                }
            }
        }
        Ok(())
    }
}

impl SequenceContextFormat2<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        let classdef = self.class_def()?;
        let our_classes = make_class_set(glyphs, &classdef);
        for seq in self
            .class_seq_rule_sets()
            .iter()
            .enumerate()
            .filter_map(|(i, seq)| seq.filter(|_| our_classes.contains(&(i as u16))))
        {
            for rule in seq?.class_seq_rules().iter() {
                let rule = rule?;
                if rule
                    .input_sequence()
                    .iter()
                    .all(|class_id| our_classes.contains(&class_id.get()))
                {
                    lookups.extend(
                        rule.seq_lookup_records()
                            .iter()
                            .map(|rec| rec.lookup_list_index()),
                    )
                }
            }
        }
        Ok(())
    }
}

impl SequenceContextFormat3<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        for coverage in self.coverages().iter() {
            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
                return Ok(());
            }
        }
        lookups.extend(
            self.seq_lookup_records()
                .iter()
                .map(|rec| rec.lookup_list_index()),
        );
        Ok(())
    }
}

impl ChainedSequenceContext<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        match self {
            ChainedSequenceContext::Format1(table) => table.add_reachable_lookups(glyphs, lookups),
            ChainedSequenceContext::Format2(table) => table.add_reachable_lookups(glyphs, lookups),
            ChainedSequenceContext::Format3(table) => table.add_reachable_lookups(glyphs, lookups),
        }
    }
}

impl ChainedSequenceContextFormat1<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        let coverage = self.coverage()?;
        for seq in coverage
            .iter()
            .zip(self.chained_seq_rule_sets().iter())
            .filter_map(|(gid, seq)| seq.filter(|_| glyphs.contains(&gid)))
        {
            for rule in seq?.chained_seq_rules().iter() {
                let rule = rule?;
                if rule
                    .input_sequence()
                    .iter()
                    .chain(rule.backtrack_sequence())
                    .chain(rule.lookahead_sequence())
                    .all(|gid| glyphs.contains(&gid.get()))
                {
                    lookups.extend(
                        rule.seq_lookup_records()
                            .iter()
                            .map(|rec| rec.lookup_list_index()),
                    );
                }
            }
        }
        Ok(())
    }
}

impl ChainedSequenceContextFormat2<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        let input = self.input_class_def()?;
        let backtrack = self.backtrack_class_def()?;
        let lookahead = self.lookahead_class_def()?;

        let input_classes = make_class_set(glyphs, &input);
        let backtrack_classes = make_class_set(glyphs, &backtrack);
        let lookahead_classes = make_class_set(glyphs, &lookahead);
        for seq in self
            .chained_class_seq_rule_sets()
            .iter()
            .enumerate()
            .filter_map(|(i, seq)| seq.filter(|_| input_classes.contains(&(i as u16))))
        {
            for rule in seq?.chained_class_seq_rules().iter() {
                let rule = rule?;
                if rule
                    .input_sequence()
                    .iter()
                    .all(|cls| input_classes.contains(&cls.get()))
                    && rule
                        .backtrack_sequence()
                        .iter()
                        .all(|cls| backtrack_classes.contains(&cls.get()))
                    && rule
                        .lookahead_sequence()
                        .iter()
                        .all(|cls| lookahead_classes.contains(&cls.get()))
                {
                    lookups.extend(
                        rule.seq_lookup_records()
                            .iter()
                            .map(|rec| rec.lookup_list_index()),
                    )
                }
            }
        }
        Ok(())
    }
}

impl ChainedSequenceContextFormat3<'_> {
    fn add_reachable_lookups(
        &self,
        glyphs: &HashSet<GlyphId16>,
        lookups: &mut HashSet<u16>,
    ) -> Result<(), ReadError> {
        for coverage in self
            .backtrack_coverages()
            .iter()
            .chain(self.input_coverages().iter())
            .chain(self.lookahead_coverages().iter())
        {
            if !coverage?.iter().any(|gid| glyphs.contains(&gid)) {
                return Ok(());
            }
        }
        lookups.extend(
            self.seq_lookup_records()
                .iter()
                .map(|rec| rec.lookup_list_index()),
        );
        Ok(())
    }
}

fn make_class_set(glyphs: &HashSet<GlyphId16>, classdef: &ClassDef) -> HashSet<u16> {
    glyphs.iter().map(|gid| classdef.get(*gid)).collect()
}

#[cfg(test)]
mod tests {
    use std::collections::HashMap;

    use crate::{FontRef, TableProvider};

    use super::*;
    use font_test_data::closure as test_data;

    struct GlyphMap {
        to_gid: HashMap<&'static str, GlyphId16>,
        from_gid: HashMap<GlyphId16, &'static str>,
    }

    impl GlyphMap {
        fn new(raw_order: &'static str) -> GlyphMap {
            let to_gid: HashMap<_, _> = raw_order
                .split('\n')
                .map(|line| line.trim())
                .filter(|line| !(line.starts_with('#') || line.is_empty()))
                .enumerate()
                .map(|(gid, name)| (name, GlyphId16::new(gid.try_into().unwrap())))
                .collect();
            let from_gid = to_gid.iter().map(|(name, gid)| (*gid, *name)).collect();
            GlyphMap { from_gid, to_gid }
        }

        fn get_gid(&self, name: &str) -> Option<GlyphId16> {
            self.to_gid.get(name).copied()
        }

        fn get_name(&self, gid: GlyphId16) -> Option<&str> {
            self.from_gid.get(&gid).copied()
        }
    }

    fn get_gsub(test_data: &'static [u8]) -> Gsub<'_> {
        let font = FontRef::new(test_data).unwrap();
        font.gsub().unwrap()
    }

    fn compute_closure(gsub: &Gsub, glyph_map: &GlyphMap, input: &[&str]) -> HashSet<GlyphId16> {
        let input_glyphs = input
            .iter()
            .map(|name| glyph_map.get_gid(name).unwrap())
            .collect();
        gsub.closure_glyphs(input_glyphs).unwrap()
    }

    /// assert a set of glyph ids matches a slice of names
    macro_rules! assert_closure_result {
        ($glyph_map:expr, $result:expr, $expected:expr) => {
            let result = $result
                .iter()
                .map(|gid| $glyph_map.get_name(*gid).unwrap())
                .collect::<HashSet<_>>();
            let expected = $expected.iter().copied().collect::<HashSet<_>>();
            if expected != result {
                let in_output = result.difference(&expected).collect::<Vec<_>>();
                let in_expected = expected.difference(&result).collect::<Vec<_>>();
                let mut msg = format!("Closure output does not match\n");
                if !in_expected.is_empty() {
                    msg.push_str(format!("missing {in_expected:?}\n").as_str());
                }
                if !in_output.is_empty() {
                    msg.push_str(format!("unexpected {in_output:?}").as_str());
                }
                panic!("{msg}")
            }
        };
    }

    #[test]
    fn smoke_test() {
        // tests various lookup types.
        // test input is font-test-data/test_data/fea/simple_closure.fea
        let gsub = get_gsub(test_data::SIMPLE);
        let glyph_map = GlyphMap::new(test_data::SIMPLE_GLYPHS);
        let result = compute_closure(&gsub, &glyph_map, &["a"]);

        assert_closure_result!(
            glyph_map,
            result,
            &["a", "A", "b", "c", "d", "a_a", "a.1", "a.2", "a.3"]
        );
    }

    #[test]
    fn recursive() {
        // a scenario in which one substitution adds glyphs that trigger additional
        // substitutions.
        //
        // test input is font-test-data/test_data/fea/recursive_closure.fea
        let gsub = get_gsub(test_data::RECURSIVE);
        let glyph_map = GlyphMap::new(test_data::RECURSIVE_GLYPHS);
        let result = compute_closure(&gsub, &glyph_map, &["a"]);
        assert_closure_result!(glyph_map, result, &["a", "b", "c", "d"]);
    }

    #[test]
    fn contextual_lookups() {
        let gsub = get_gsub(test_data::CONTEXTUAL);
        let glyph_map = GlyphMap::new(test_data::CONTEXTUAL_GLYPHS);

        // these match the lookups but not the context
        let nop = compute_closure(&gsub, &glyph_map, &["three", "four", "e", "f"]);
        assert_closure_result!(glyph_map, nop, &["three", "four", "e", "f"]);

        let gsub6f1 = compute_closure(
            &gsub,
            &glyph_map,
            &["one", "two", "three", "four", "five", "six", "seven"],
        );
        assert_closure_result!(
            glyph_map,
            gsub6f1,
            &["one", "two", "three", "four", "five", "six", "seven", "X", "Y"]
        );

        let gsub6f3 = compute_closure(&gsub, &glyph_map, &["space", "e"]);
        assert_closure_result!(glyph_map, gsub6f3, &["space", "e", "e.2"]);

        let gsub5f3 = compute_closure(&gsub, &glyph_map, &["f", "g"]);
        assert_closure_result!(glyph_map, gsub5f3, &["f", "g", "f.2"]);
    }

    #[test]
    fn recursive_context() {
        let gsub = get_gsub(test_data::RECURSIVE_CONTEXTUAL);
        let glyph_map = GlyphMap::new(test_data::RECURSIVE_CONTEXTUAL_GLYPHS);

        let nop = compute_closure(&gsub, &glyph_map, &["b", "B"]);
        assert_closure_result!(glyph_map, nop, &["b", "B"]);

        let full = compute_closure(&gsub, &glyph_map, &["a", "b", "c"]);
        assert_closure_result!(glyph_map, full, &["a", "b", "c", "B", "B.2", "B.3"]);

        let intermediate = compute_closure(&gsub, &glyph_map, &["a", "B.2"]);
        assert_closure_result!(glyph_map, intermediate, &["a", "B.2", "B.3"]);
    }

    #[test]
    fn feature_variations() {
        let gsub = get_gsub(test_data::VARIATIONS_CLOSURE);
        let glyph_map = GlyphMap::new(test_data::VARIATIONS_GLYPHS);

        let input = compute_closure(&gsub, &glyph_map, &["a"]);
        assert_closure_result!(glyph_map, input, &["a", "b", "c"]);
    }
}