chromium/chrome/renderer/resources/cart/cart-product-extraction.js

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

var verbose = 0;

// Aliexpress uses 'US $12.34' format in the price.
// Macy's uses "$12.34 to 56.78" format.
var priceCleanupPrefix = 'total price|sale price|price|sale|' +
    'with offer|only|our price|now|starting at';
var priceCleanupPostfix = '(/(each|set))';
var priceRegexTemplate = '((reg|regular|orig|from|' + priceCleanupPrefix +
    ')\\s+)?' +
    '(\\d+\\s*/\\s*)?(US(D)?\\s*)?' +
    '\\$\\s*[\\d.,]+(\\s+(to|-|–)\\s+(\\$)?[\\d.,]+)?' +
    priceCleanupPostfix + '?';
var priceRegexFull = new RegExp('^' + priceRegexTemplate + '( ea)?$', 'i');
var priceRegex = new RegExp(priceRegexTemplate, 'i');
var priceCleanupRegex = new RegExp(
    '^((' + priceCleanupPrefix + ')\\s+)|' + priceCleanupPostfix + '$', 'i');
var cartItemHTMLRegex = new RegExp(
    '(cart|basket|bundle)[-_]?((\\w+)[-_])?(item|product)', 'i');
var cartItemTextRegex = new RegExp(
    'remove|delete|save for later|move to (favo(u?)rite|list|wish( ?)list)s?',
    'i');
var cartItemQtyRegex = new RegExp('qty', 'i');
var moveToCartTextRegex = new RegExp('move to (cart|bag)', 'i');
var addToCartTextRegex = new RegExp('add to cart', 'i');
var cartPriceTextRegex = new RegExp('((estimated (sales )?)|(sales ))tax', 'i');
var minicartHTMLRegex = new RegExp('mini-cart-product', 'i');
var productIdHTMLRegex = new RegExp('<a href="#modal-(\\w+)', 'i');
var productIdURLRegex = new RegExp(
    '((\\w+)-\\d+-medium)|(images.cymax.com/Images/\\d+/(\\w+)-)', 'i');
var saveForLaterRegex = new RegExp('save for later', 'i');

function getLazyLoadingURL(image) {
  // FIXME: some lazy images in Nordstrom and Staples don't have URLs in the
  // DOM.
  // TODO: add more lazy-loading attributes.
  for (const attribute
           of ['data-src', 'data-img-url', 'data-config-src', 'data-echo',
               'data-lazy']) {
    let url = image.getAttribute(attribute);
    if (url == null)
      continue;
    if (url.substr(0, 2) == '//')
      url = 'https:' + url;
    if (url.substr(0, 4) != 'http')
      continue;
    return url;
  }
}

function getLargeImages(root, atLeast, relaxed = false) {
  let candidates = root.querySelectorAll('img');
  if (candidates.length == 0) {
    // Aliexpress
    candidates = root.querySelectorAll('amp-img');
  }
  if (candidates.length == 0) {
    // Google store
    candidates = root.querySelectorAll('.bg-img');
  }
  images = [];
  function shouldStillKeep(image) {
    if (!relaxed)
      return false;
    if (image.getAttribute('aria-hidden') == 'true')
      return true;
    if (getLazyLoadingURL(image) != null)
      return true;
    // For test files on target.com the images aren't preserved for
    // some products so we need to look for the images in the parent
    // picture tag.
    if (image.parentElement.tagName == 'PICTURE')
      return true;
    return false;
  }
  for (const image of candidates) {
    if (verbose > 1)
      console.log('offsetHeight', image, image.offsetHeight);
    if (image.offsetHeight < atLeast) {
      if (!shouldStillKeep(image))
        continue;
    }
    if (window.getComputedStyle(image)['visibility'] == 'hidden')
      continue;
    images.push(image);
  }
  return images;
}

function getVisibleElements(list) {
  visible = [];
  for (const ele of list) {
    if (ele.offsetHeight == 0 || ele.offsetHeight == 0)
      continue;
    visible.push(ele);
  }
  return visible;
}

// Some sites e.g. CraigsList have multiple images per product
function multipleImagesSupported() {
  const hostname = new URL(document.baseURI).hostname;
  // When saving target.com to mhtml, the color selecting images become very
  // large and are picked up. Adding in hostname.endsWith('target.com') is a
  // workaround for this problem. In target we only get one image per product.
  return hostname.endsWith('craigslist.org') || hostname.endsWith('target.com')
      || hostname.endsWith('zazzle.com')
      || hostname.endsWith("ashleyfurniture.com")
      || hostname.endsWith("chewy.com");
}

function extractImage(item) {
  const hostname = new URL(document.baseURI).hostname;
  // Some merchant sites have product images as background of a div element.
  // Below logic handles them separately.
  if (hostname.endsWith("americastire.com")
    || hostname.endsWith("discounttire.com")) {
    const image = item.querySelector(".product-image__image-block");
    if (image == null) {
      return null;
    }
    return extractImageUrl(image);
  }
  if (hostname.endsWith("discounttiredirect.com")) {
    const image = item.querySelector(".cart-item__product-image");
    if (image == null) {
      return null;
    }
    return extractImageUrl(image);
  }
  // Sometimes an item contains small icons, which need to be filtered out.
  // TODO: two pass getLargeImages() is probably too slow.
  let images = getLargeImages(item, 40);
  if (images.length == 0) {
    images = getLargeImages(item, 30, true);
  }

  if (images.length == 0) {
    return null;
  }
  if (!multipleImagesSupported()) {
    if (verbose > 0)
      console.assert(
          images.length == 1, 'image extraction error', item, images);
    if (images.length != 1) {
      return null;
    }
  }
  if (!document.URL.includes("chewy.com")) {
    images = images.slice(0, 1);
  }
  for (const image of images) {
    const currentUrl = extractImageUrl(image);
    if (currentUrl !== null) return currentUrl;
  }
  return null;
}

function extractImageUrl(image) {
  const lazyUrl = getLazyLoadingURL(image);
  if (lazyUrl != null)
    return lazyUrl;

  // Special handling for Google store, America's Tire and Discount
  // Tire Direct.
  if (image.className === "bg-img"
    || image.className.includes("product-image__image-block")
    || image.className.includes("cart-item__product-image")) {
    if (image.style.backgroundImage == undefined) {
      return null;
    }
    const matches = image.style.backgroundImage.match('[\"\'](.*)[\"\']');
    if (matches === null) {
      return null;
    } else {
      return matches[1];
    }
  }
  // If |image| is <amp-img>, image.src won't work.
  const src = image.src || image.getAttribute('src');
  if (verbose > 1)
    console.log('image src', src);
  if (src != null) {
    // data: images are usually placeholders.
    // Even if it's valid, we prefer http(s) URLs.
    if (!src.startsWith('data:')) {
      // Get absolute URL in case it's <amp-img>.
      return (new URL(src, document.location)).href
    }
  }
  let sourceSet = image.getAttribute('data-search-image-source-set');
  if (sourceSet == null && image.parentElement.tagName == 'PICTURE') {
    let sources = image.parentElement.querySelectorAll('source');
    if (sources.length >= 1) {
      sourceSet = getAbsoluteUrlOfSrcSet(sources[0]);
    }
  }
  if (sourceSet == null)
    return null;
  console.assert(sourceSet.includes(' '), 'image extraction error', image);
  // TODO: Pick the one with right pixel density?
  imageUrl = sourceSet.split(' ')[0];
  console.assert(imageUrl.length > 0, 'image extraction error', sourceSet);
  return imageUrl;
}

// Use self assigning trick to get absolute URL
// https://github.com/chromium/dom-distiller/blob/ccfe233400cc214717ccc80973be431ab0e33cf7/java/org/chromium/distiller/DomUtil.java#L438
function getAbsoluteUrlOfSrcSet(image) {
  // preserve src
  const backup = image.src;
  // use self assigning trick
  image.src = image.srcset;
  // clean up and return absolute url
  const ret = image.src;
  image.src = backup;
  return ret;
}

function extractUrl(item) {
  // Some sites doesn't use <a> tag or explicitly state href. E.g. ae.com
  // shows side panel after clicking on each item instead of directing to
  // product page, and some sites might trigger JS to initiate navigation
  // instead of <a>.
  if (document.URL.includes("ae.com")
      || document.URL.includes("kiehls.com")
      || document.URL.includes("discounttiredirect.com")
      || document.URL.includes("shutterfly.com")
      || document.URL.includes("bkstr.com")) {
    return "";
  }
  let anchors;
  if (item.tagName == 'A') {
    anchors = [item];
  } else {
    anchors = item.querySelectorAll('a');
  }
  console.assert(anchors.length >= 1, 'url extraction error', item);
  if (anchors.length == 0) {
    return null;
  }
  const filtered = [];
  for (const anchor of anchors) {
    if (anchor.href.match(/\/#$/))
      continue;
    // href="javascript:" would be sanitized when serialized to MHTML.
    if (anchor.href.match(/^javascript:/))
      continue;
    if (anchor.href == '') {
      // For Sears
      let href = anchor.getAttribute('bot-href');
      if (href != null && href.length > 0) {
        // Resolve to absolute URL.
        anchor.href = href;
        href = anchor.href;
        anchor.removeAttribute('href');
        if (href != '')
          return href;
      }
      continue;
    }
    filtered.push(anchor);
    // TODO: This returns the first URL in DOM order.
    //       Use the one with largest area instead?
    return anchor.href;
  }
  if (filtered.length == 0)
    return null;
  return filtered
      .reduce(function(a, b) {
        return a.offsetHeight * a.offsetWidth > b.offsetHeight * b.offsetWidth ?
            a :
            b;
      })
      .href;
}

function isInlineDisplay(element) {
  const display = window.getComputedStyle(element)['display'];
  return display.indexOf('inline') != -1;
}

function childElementCountExcludingInline(element) {
  let count = 0;
  for (const child of element.children) {
    if (isInlineDisplay(child))
      count += 1;
  }
  return count;
}

function hasNonInlineDescendentsInclusive(element) {
  if (!isInlineDisplay(element))
    return true;
  return hasNonInlineDescendents(element);
}

function hasNonInlineDescendents(element) {
  for (const child of element.children) {
    if (hasNonInlineDescendentsInclusive(child))
      return true;
  }
  return false;
}

function hasNonWhiteTextNodes(element) {
  for (const child of element.childNodes) {
    if (child.nodeType != document.TEXT_NODE)
      continue;
    if (child.nodeValue.trim() != '')
      return true;
  }
  return false;
}

// Concat classNames and IDs of ancestors up to |maxDepth|, while not containing
// |excludingElement|.
// If |excludingElement| is already a descendent of |element|, still return the
// className of |element|.
// |maxDepth| include current level, so maxDepth = 1 means just |element|.
// maxDepth >= 3 causes error in Walmart deals if not deducting "price".
function ancestorIdAndClassNames(element, excludingElement, maxDepth = 3) {
  let name = '';
  let depth = 0;
  while (true) {
    name += element.className + element.id;
    element = element.parentElement;
    depth += 1;
    if (depth >= maxDepth)
      break;
    if (!element)
      break;
    if (element.contains(excludingElement))
      break;
  }
  return name;
}

/*
  Returns top-ranked element with the following criteria, with decreasing
  priority:
  - score based on whether ancestorIdAndClassNames contains "title", "price",
  etc.
  - largest area
  - largest font size
  - longest text
 */
function chooseTitle(elementArray) {
  return elementArray.reduce(function(a, b) {
    // Titles are typically 2 characters or more - if one element
    // has less than 2 characters, don't use it.
    const a_len_score = (a.innerText.trim().length >= 2);
    const b_len_score = (b.innerText.trim().length >= 2);
    if (a_len_score != b_len_score) {
      return a_len_score > b_len_score ? a : b;
    }

    const titleRegex = /name|title|truncate|desc|brand/i;
    const negativeRegex = /price|model/i;
    const a_str = ancestorIdAndClassNames(a, b);
    const b_str = ancestorIdAndClassNames(b, a);
    const a_score = (a_str.match(titleRegex) != null) -
        (a_str.match(negativeRegex) != null);
    const b_score = (b_str.match(titleRegex) != null) -
        (b_str.match(negativeRegex) != null);
    if (verbose > 1)
      console.log('className score', a_score, b_score, a_str, b_str, a, b);

    if (a_score != b_score) {
      return a_score > b_score ? a : b;
    }

    // Use getBoundingClientRect() to avoid int rounding error in
    // offsetHeight/Width.
    const a_area =
        a.getBoundingClientRect().width * a.getBoundingClientRect().height;
    const b_area =
        b.getBoundingClientRect().width * b.getBoundingClientRect().height;
    if (verbose > 1)
      console.log(
          'getBoundingClientRect', a.getBoundingClientRect(),
          b.getBoundingClientRect(), a, b);

    if (a_area != b_area) {
      return a_area > b_area ? a : b;
    }

    const a_size = parseFloat(window.getComputedStyle(a)['font-size']);
    const b_size = parseFloat(window.getComputedStyle(b)['font-size']);
    if (verbose > 1)
      console.log('font size', a_size, b_size, a, b);

    if (a_size != b_size) {
      return a_size > b_size ? a : b;
    }

    return a.innerText.length > b.innerText.length ? a : b;
  });
}

function extractTitle(item) {
  const possible_titles =
      item.querySelectorAll('a, span, p, div, h1, h2, h3, h4, h5, strong');
  let titles = [];
  for (const title of possible_titles) {
    if (hasNonInlineDescendents(title) && !hasNonWhiteTextNodes(title)) {
      continue;
    }
    // Too small to be a title.
    if (title.offsetWidth <= 1 || title.offsetHeight <= 1)
      continue;
    if (title.innerText.trim() == '')
      continue;
    if (title.innerText.trim().toLowerCase() == 'sponsored')
      continue;
    if (title.childElementCount > 0) {
      if (title.textContent.trim() ==
              title.lastElementChild.textContent.trim() ||
          title.textContent.trim() ==
              title.firstElementChild.textContent.trim()) {
        continue;
      }
    }
    // Aliexpress has many items without title. Without the following filter,
    // the title would be the price.
    // if (title.innerText.trim().match(priceRegexFull)) continue;
    titles.push(title);
  }
  if (titles.length > 1) {
    if (verbose > 1)
      console.log('all generic titles', item, titles);
    titles = [chooseTitle(titles)];
  }

  if (verbose > 0)
    console.log('titles', item, titles);
  console.assert(titles.length == 1, 'titles extraction error', item, titles);
  if (titles.length != 1)
    return null;
  title = titles[0].innerText.trim();
  return title;
}

function adjustBeautifiedCents(priceElement) {
  const text = priceElement.innerText.trim().replace(/\/(each|set)$/i, '');
  let cents;
  const children = priceElement.children;
  for (let i = children.length - 1; i >= 0; i--) {
    const t = children[i].innerText.trim();
    if (t == '')
      continue;
    if (t.indexOf('/') != -1)
      continue;
    cents = t;
    break;
  }
  if (cents == null)
    return null;
  if (verbose > 0)
    console.log('cents', cents, priceElement);
  if (cents.length == 2 && cents == text.slice(-cents.length) &&
      text.slice(-3, -2).match(/\d/)) {
    return text.substr(0, text.length - cents.length) + '.' + cents;
  }
}

function anyLineThroughInAncentry(element, maxDepth = 2) {
  let depth = 0;
  while (element != null && element.tagName != 'BODY') {
    if (window.getComputedStyle(element)['text-decoration'].indexOf(
            'line-through') != -1)
      return true;
    element = element.parentElement;
    depth += 1;
    if (depth >= maxDepth)
      break;
  }
  return false;
}

function forgivingParseFloat(str) {
  return parseFloat(str.replace(priceCleanupRegex, '').replace(/^[$]*/, ''));
}

function choosePrice(priceArray) {
  if (priceArray.length == 0)
    return null;
  return priceArray
      .reduce(function(a, b) {
        // Positive tags
        for (const pattern of ['with offer', 'sale', 'now']) {
          const a_val = a.toLowerCase().indexOf(pattern) != -1;
          const b_val = b.toLowerCase().indexOf(pattern) != -1;
          if (a_val != b_val) {
            return a_val > b_val ? a : b;
          }
        }
        // Negative tags
        for (const pattern of ['/set', '/each']) {
          const a_val = a.toLowerCase().indexOf(pattern) != -1;
          const b_val = b.toLowerCase().indexOf(pattern) != -1;
          if (a_val != b_val) {
            return a_val < b_val ? a : b;
          }
        }
        // Guess the smallest numerical value.
        // The tags like "now" don't always fall inside element boundary.
        // See Nordstrom/homepage-eager.mhtml.
        return forgivingParseFloat(a) > forgivingParseFloat(b) ? b : a;
      })
      .replace(priceCleanupRegex, '');
}

function extractPrice(item) {
  const hostname = new URL(document.baseURI).hostname;
  // shein.com shows price by one element per digit and it's challenging
  // to decide based on textContent.
  if (hostname.endsWith("shein.com")) {
    return "";
  }
  // Etsy mobile
  const prices = item.querySelectorAll(`
      .currency-value
  `);
  if (prices.length == 1) {
    let ans = prices[0].textContent.trim();
    if (ans.match(/^\d/))
      ans = '$' + ans;  // for Etsy
    if (ans != '')
      return ans;
  }
  // Generic heuristic to search for price elements.
  let captured_prices = [];
  for (const price of item.querySelectorAll(
    'span, b, p, div, h3, td, li, em, strong, ins')) {
    let candidate = price.innerText.trim();
    if (hostname.endsWith("urbanoutfitters.com") ||
        hostname.endsWith("freepeople.com")) {
      priceParts = candidate.split("\n");
      if (priceParts.length >= 2){
        candidate = priceParts[1];
      }
    } else if (hostname.endsWith("thecompanystore.com") ||
        hostname.endsWith("childrensplace.com") ||
        hostname.endsWith("chewy.com")) {
      candidate = candidate.split("\n")[0];
    }
    if (!candidate.match(priceRegexFull))
      continue;
    if (verbose > 1)
      console.log('price candidate', candidate, price);
    if (price.childElementCount > 0) {
      // Avoid matching the parent element of the real price element.
      // Otherwise adjustBeautifiedCents would break.
      if (price.innerText.trim() == price.lastElementChild.innerText.trim() ||
          price.innerText.trim() == price.firstElementChild.innerText.trim()) {
        // If the wanted child is not scanned, change the querySelectorAll
        // string.
        if (verbose > 1)
          console.log('skip redundant parent', price);
        continue;
      }
    }
    // TODO: check child elements recursively.
    if (anyLineThroughInAncentry(price)) {
      if (verbose > 1)
        console.log('line-through', price);
      continue;
    }
    // for Amazon and HomeDepot
    if (candidate.indexOf('.') == -1 && price.lastElementChild != null) {
      const adjusted = adjustBeautifiedCents(price);
      if (adjusted != null)
        return adjusted;
    }
    captured_prices.push(candidate);
  }
  if (verbose > 0)
    console.log('captured_prices', captured_prices);
  return choosePrice(captured_prices);
}

function getProductIdFromMatches(productIdMatches, matchIndex = undefined) {
  if (productIdMatches === null) {
    return null;
  }
  if (matchIndex !== undefined) {
    return productIdMatches[matchIndex];
  }
  for (var i = productIdMatches.length - 1; i >= 0; i--) {
    if (productIdMatches[i] !== undefined) {
      return productIdMatches[i];
    }
  }
  return null;
}

function getProductIdWithPattern(sourceMap, patternMap) {
  const hostname = window.location.hostname;
  for (const sourceName of Object.keys(sourceMap)) {
    if (patternMap[sourceName] === undefined ||
      !(hostname in patternMap[sourceName])) {
      continue;
    }
    const source = sourceMap[sourceName];
    const heuristic = patternMap[sourceName][hostname];
    if (Array.isArray(heuristic)) {
      return getProductIdFromMatches(source.match(
        new RegExp(heuristic[0], 'i')), heuristic[1]);
    } else {
      return getProductIdFromMatches(source.match(
        new RegExp(heuristic, 'i')));
    }
  }
  return null;
}

function extractProductId(url, imageUrl, item) {
  const idExtractionMapNotExist =
    typeof idExtractionMap === 'undefined' ||
    idExtractionMap === undefined;
  const couponIdExtractionMapNotExist =
    typeof couponIdExtractionMap === 'undefined' ||
    couponIdExtractionMap === undefined;
  if (idExtractionMapNotExist && couponIdExtractionMapNotExist) {
    return null;
  }
  let productId = null;
  const sourceMap = {"product_url": url,
    "product_image_url": imageUrl,
    "product_element": item.outerHTML};
  if (!idExtractionMapNotExist) {
    productId = getProductIdWithPattern(sourceMap, idExtractionMap);
    if (productId !== null) return productId;
  }
  if (!couponIdExtractionMapNotExist) {
    productId = getProductIdWithPattern(sourceMap, couponIdExtractionMap);
    if (productId !== null) return productId;
  }
  return null;
}

function extractItem(item) {
  imageUrl = extractImage(item);
  if (imageUrl == null) {
    if (verbose > 0)
      console.warn('no images found', item);
    return null;
  }
  url = extractUrl(item);
  // Some items in Sears and Staples only have ng-click or onclick handlers,
  // so it's impossible to extract URL.
  if (url == null) {
    if (verbose > 0)
      console.warn('no url found', item);
    return null;
  }
  title = extractTitle(item);
  if (title == null) {
    if (verbose > 0)
      console.warn('no title found', item);
    return null;
  }
  let price = extractPrice(item);
  // eBay "You may also like" and "Guides" are not product items.
  // Not having price is one hint.
  // FIXME: "Also viewed" items in Gap doesn't have prices.
  if (price == null) {
    if (verbose > 0)
      console.warn('no price found', item);
    return null;
  }
  let extractionResult =
      {'url': url, 'imageUrl': imageUrl, 'title': title, 'price': price};
  // productId is an optional field for extraction.
  const productId = extractProductId(url, imageUrl, item);
  if (productId !== null) {
    extractionResult['productId'] = productId;
  }
  return extractionResult;
}

function commonAncestor(a, b) {
  while (!a.contains(b)) {
    a = a.parentElement;
  }
  return a;
}

function commonAncestorList(list) {
  return list.reduce(function(a, b) {
    return commonAncestor(a, b);
  });
}

function hasOverlap(target, list) {
  for (const element of list) {
    if (element.contains(target) || target.contains(element)) {
      return true;
    }
  }
  return false;
}

function matchPattern(item, pattern, matchText) {
  if (item === null) return false;
  const textToMatch = matchText ? item.textContent : item.outerHTML;
  return textToMatch.toLowerCase().match(pattern);
}

function isCartItem(item) {
  // TODO: Improve the heuristic here to accommodate more formats of cart item.
  if (matchPattern(item, moveToCartTextRegex, true)) return false;
  // Walmart has 'move to cart' outside of the div.cart-item.
  if (matchPattern(item.parentElement, moveToCartTextRegex, true)) return false;
  if (matchPattern(item, cartPriceTextRegex, true)) return false;
  // Item element in bestbuy.com contains "add to cart" for things
  // like protection plans.
  if (!document.URL.includes("bestbuy.com")
      && !document.URL.includes("orientaltrading.com")
      && matchPattern(item, addToCartTextRegex, true)) return false;
  if ((document.URL.includes("ashleyfurniture.com")
      || document.URL.includes("gnc.com")
      || document.URL.includes("bathandbodyworks.com"))
      && matchPattern(item, minicartHTMLRegex, false)) return false;
  if (document.URL.includes("ashleyfurniture.com")
      && matchPattern(item, cartItemQtyRegex, true) === null)
    return false;
  return matchPattern(item, cartItemTextRegex, true) ||
    matchPattern(item, cartItemQtyRegex, true) ||
    matchPattern(item, cartItemHTMLRegex, false);
}

function extractOneItem(item, extracted_items, processed, output,
  savedForLaterSection, skipFiltering) {
  if (skipFiltering) {
    const extraction = extractItem(item);
    if (extraction != null) {
      output.set(item, extraction);
      extracted_items.push(item);
    }
    return;
  }
  if (verbose > 1) {
    console.log('trying', item);
  }
  if (item.childElementCount == 0 && item.parentElement.tagName != 'BODY') {
    // Amazone store page uses overlay <a>.
    item = item.parentElement;
    if (item == null)
      return;
  }
  if (processed.has(item)) {
    if (verbose > 0)
      console.log('processed', item);
    return;
  }
  processed.add(item);
  if (item.scrollHeight > 1000) {
    if (verbose > 0)
      console.log('too tall', item);
    return;
  }
  if (hasOverlap(item, extracted_items)) {
    if (verbose > 0)
      console.log('overlap', item);
    return;
  }
  // scrollHeight could be 0 while getBoundingClientRect().height > 0.
  const bounding_rect = item.getBoundingClientRect();
  if (bounding_rect.height < 50) {
    if (verbose > 0)
      console.log('too short', item);
    return;
  }
  if (bounding_rect.height * bounding_rect.width > 800 * window.innerWidth) {
    if (verbose > 0)
      console.log('too tall', item);
    return;
  }
  if (item.querySelectorAll('img, amp-img, .bg-img').length == 0) {
    if (verbose > 0)
      console.log('no image', item);
    return;
  }
  if (!item.textContent.match(priceRegex)) {
    if (verbose > 0)
      console.log('no price', item);
    return;
  }
  if (bounding_rect.top <= 10 &&
      (document.URL.includes('partycity.com') ||
       document.URL.includes('chewy.com'))) {
    if (verbose > 0)
      console.log('likely cart page header', item);
    return;
  }
  if (isInSavedForLater(item, savedForLaterSection)) {
    if (verbose > 0)
      console.log('in save for later', item);
    return;
  }
  if (!isCartItem(item)) {
    if (verbose > 0)
      console.log('not cart item', item);
    return;
  }
  if (verbose > 0)
    console.log('try extracting', item);
  const extraction = extractItem(item);
  if (extraction != null) {
    output.set(item, extraction);
    extracted_items.push(item);
  }
}

function isInSavedForLater(item, savedForLaterSection) {
  return savedForLaterSection !== null
    && savedForLaterSection.getBoundingClientRect().top
    < item.getBoundingClientRect().top
    && !item.textContent.toLowerCase().match(saveForLaterRegex);
}

function getSavedForLaterSection() {
  // This regex should match the XPath pattern below.
  const shortCutRegex = new RegExp(
      '(your saved items)|(saved for later)|(my saved items)|(wishlist items)',
      'i');
  if (!document.body.innerText.match(shortCutRegex))
    return null;

  const nodes = document.evaluate(
    "//*[contains(translate(" +
    "text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), " +
    "'your saved items')" +
    "or contains(translate(" +
    "text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), " +
    "'saved for later')" +
    "or contains(translate(" +
    "text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), " +
    "'my saved items')" +
    "or contains(translate(" +
    "text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), " +
    "'wishlist items')]", document,
  null, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
  let node = nodes.iterateNext();
  let section = null;
  while (node) {
    if (node!= null && node.offsetHeight >= 1 && node.offsetWidth >= 1) {
      section = node;
    }
    node = nodes.iterateNext();
  }
  return section
}

function isHeuristicsImprovementEnabled() {
  if (typeof isImprovementEnabled === 'undefined'
    || typeof isImprovementEnabled !== 'boolean') {
    return false;
  }
  return isImprovementEnabled;
}

function documentPositionComparator(a, b) {
  if (a === b)
    return 0;
  const position = a.compareDocumentPosition(b);

  if (position & Node.DOCUMENT_POSITION_FOLLOWING ||
      position & Node.DOCUMENT_POSITION_CONTAINED_BY) {
    return -1;
  } else if (
      position & Node.DOCUMENT_POSITION_PRECEDING ||
      position & Node.DOCUMENT_POSITION_CONTAINS) {
    return 1;
  } else {
    return 0;
  }
}

// Remove duplicate products with identical product URLs.
function deduplicateResults(output) {
  if (!document.URL.includes("sourcebmx.com")) return output;
  const productUrls = new Set();
  let filteredOutput = [];
  for (let i = 0; i < output.length; i++) {
    const productUrl = output[i]["url"];
    if (!productUrls.has(productUrl)) {
      filteredOutput.push(output[i]);
      productUrls.add(productUrl)
    }
  }
  return filteredOutput;
}

if (typeof Sleeper === 'undefined') {
  var Sleeper = class {
    constructor() {
      // 99.9th percentile of the individual task execution times should be
      // < 50ms.
      // The task time is defined as exclusive CPU usage, from last time
      // sleeping is done to the beginning of the next sleep.
      let min_task_time = 10;
      if (typeof kSleeperMinTaskTimeMs !== 'undefined') {
        min_task_time = kSleeperMinTaskTimeMs;
      }
      this.min_task_time = min_task_time;

      // Avoid monopolizing JavaScript main thread execution time.
      let duty_cycle = 0.05;
      if (typeof kSleeperDutyCycle !== 'undefined') {
        duty_cycle = kSleeperDutyCycle;
      }
      this.duty_cycle = Math.max(0.01, Math.min(duty_cycle, 1));

      this.last_sleep = performance.now();
      this.start = performance.now();
      this.longest_task = 0;
      this.total_tasks_time = 0;
    }

    async maybeSleep() {
      const elapsed = performance.now() - this.last_sleep;
      if (elapsed <= this.min_task_time)
        return;
      this.longest_task = Math.max(this.longest_task, elapsed);
      this.total_tasks_time += elapsed;
      if (verbose > 1) {
        console.log('longest task', this.longest_task);
      }

      // Calculate the delay aiming for the target duty cycle.
      // duty_cycle = (working time) / (working time + sleeping time)
      //            = elapsed / (elapsed + delay)
      const delay = elapsed * (1 - this.duty_cycle) / this.duty_cycle;
      await new Promise(r => setTimeout(r, delay));
      this.last_sleep = performance.now();
    }

    get longestTask() {
      const elapsed = performance.now() - this.last_sleep;
      return Math.max(this.longest_task, elapsed);
    }

    get totalTasksTime() {
      const elapsed = performance.now() - this.last_sleep;
      return this.total_tasks_time + elapsed;
    }

    get elapsed() {
      return performance.now() - this.start;
    }
  }
}

async function extractAllItems(root) {
  let timeout = 250;
  if (typeof kTimeoutMs !== 'undefined') {
    timeout = kTimeoutMs;
  }

  let items = [];
  const sleeper = new Sleeper();
  // Root element being null could be due to the
  // fact that the cart is emptied, or the cart
  // element has not been loaded yet.
  if (root == null) {
    if (document.readyState == 'complete') {
      return [];
    } else {
      return false;
    }
  }
  let skipFiltering = true;
  if (document.URL.includes("kiehls.com")
    || document.URL.includes("laroche-posay.us")) {
    items = root.querySelectorAll(".c-product-table__row");
  } else if (document.URL.includes("americastire.com")
    || document.URL.includes("discounttire.com")) {
    items = root.querySelectorAll("[role=\"listitem\"]");
  } else if (document.URL.includes("discounttiredirect.com")) {
    items = root.querySelectorAll(".cart-item");
  } else if (document.URL.includes("shutterfly.com")){
    items = root.querySelectorAll(".cartitem");
  } else {
    skipFiltering = false;
    // Generic pattern
    const candidates = new Set();
    items = root.querySelectorAll('a');

    const urlMap = new Map();
    for (const item of items) {
      if (!urlMap.has(item.href)) {
        urlMap.set(item.href, new Set());
      }
      urlMap.get(item.href).add(item);
    }

    for (const [key, value] of urlMap) {
      const ancestor = commonAncestorList(Array.from(value));
      if (!candidates.has(ancestor))
        candidates.add(ancestor);
    }
    for (const item of items) {
      candidates.add(item);
    }
    const ancestors = new Set();
    // TODO: optimize this part.
    for (let depth = 0; depth < 8; depth++) {
      for (let item of candidates) {
        for (let i = 0; i < depth; i++) {
          item = item.parentElement;
          if (!item)
            break;
        }
        if (item)
          ancestors.add(item);
      }
    }
    items = Array.from(ancestors);
  }
  await sleeper.maybeSleep();

  if (verbose > 0)
    console.log(items);
  const outputMap = new Map();
  const processed = new Set();
  const extracted_items = [];
  let savedForLaterSection = null;
  if (isHeuristicsImprovementEnabled()) {
    savedForLaterSection = getSavedForLaterSection();
    if (verbose > 0)
      console.log(savedForLaterSection);
    await sleeper.maybeSleep();
  }

  let i = 0;
  let early_abort = false;
  for (const item of items) {
    extractOneItem(item, extracted_items, processed, outputMap,
      savedForLaterSection, skipFiltering);
    // Checking for every item is too slow.
    if (i++ % 10 == 0) {
      await sleeper.maybeSleep();
      if (sleeper.totalTasksTime > timeout) {
        if (verbose > 0) {
          console.log('aborted due to timeout');
        }
        early_abort = true;
        break;
      }
    }
  }

  const keysInDocOrder =
      Array.from(outputMap.keys()).sort(documentPositionComparator);
  const output = [];
  for (const key of keysInDocOrder) {
    output.push(outputMap.get(key));
  }
  await sleeper.maybeSleep();
  return {
    'products': deduplicateResults(output),
    'longest_task_ms': sleeper.longestTask,
    'total_tasks_ms': sleeper.totalTasksTime,
    'elapsed_ms': sleeper.elapsed,
    'timedout': early_abort,
  };
}

extracted_results_promise = extractAllItems(document);