chromium/chrome/browser/ash/app_list/search/local_image_search/image_annotation_worker.h

// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CHROME_BROWSER_ASH_APP_LIST_SEARCH_LOCAL_IMAGE_SEARCH_IMAGE_ANNOTATION_WORKER_H_
#define CHROME_BROWSER_ASH_APP_LIST_SEARCH_LOCAL_IMAGE_SEARCH_IMAGE_ANNOTATION_WORKER_H_

#include <memory>
#include <vector>

#include "base/containers/queue.h"
#include "base/files/file_path.h"
#include "base/files/file_path_watcher.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "base/task/sequenced_task_runner.h"
#include "base/time/time.h"
#include "chrome/browser/ash/app_list/search/local_image_search/annotation_storage.h"
#include "chrome/browser/ash/app_list/search/local_image_search/image_content_annotator.h"
#include "chrome/browser/screen_ai/public/optical_character_recognizer.h"

class Profile;

namespace base {
class FilePathWatcher;
}

namespace gfx {
class ImageSkia;
}

namespace app_list {
struct ImageInfo;

// The worker watches `root_path_` for any image changes, runs ICA on every
// change, and saves the annotation to the AnnotationStorage.
// It can be created on any sequence but must be initialized on the same
// sequence as AnnotationStorage. It runs IO heavy tasks on a background
// task runner.
// The worker supports on-device Optical Character Recognition (OCR) and
// Image Content-based Annotation (ICA) via DLCs.
// TODO(b/260646344): Revisit the use of a FilePathWatcher for MyFiles
//  if needed. (It may hit the folder limit.)
class ImageAnnotationWorker {
 public:
  explicit ImageAnnotationWorker(
      const base::FilePath& root_path,
      const std::vector<base::FilePath>& excluded_paths,
      Profile* profile,
      bool use_file_watchers,
      bool use_ocr,
      bool use_ica);
  ~ImageAnnotationWorker();
  ImageAnnotationWorker(const ImageAnnotationWorker&) = delete;
  ImageAnnotationWorker& operator=(const ImageAnnotationWorker&) = delete;

  // Initializes a file watcher, connects to ICA and performs a file system
  // scan for new images. It must be called on the same sequence as
  // AnnotationStorage is bound to.
  void Initialize(AnnotationStorage* annotation_storage);

  // Deterministically triggers the event instead of using file watchers, which
  // cannot be awaited by `RunUntilIdle()` and introduce unwanted flakiness.
  void TriggerOnFileChangeForTests(const base::FilePath& path, bool error);

  void set_image_processing_delay_for_testing(
      base::TimeDelta image_processing_delay) {
    image_processing_delay_for_test_ = image_processing_delay;
  }

 private:
  void OnFileChange(const base::FilePath& path, bool error);

  // Processes the items from the `files_to_process_` queue. Do it in a
  // non-recursive way as recursion can lead to one stack frame per file and
  // result in chrome crash if there a long list of non-image files in the
  // queue.
  void ProcessItems();

  // This function should be called from the image processing callbacks, and
  // item processing has stopped at this time. Restarts the item processing from
  // the `files_to_process_` queue if the `file_path` matches the head of the
  // queue.
  // If `timeout_timer_` has started, sets `use_timer` to true and it will also
  // stop it if the `file_path` matches the head of the queue. Image processing
  // callback can return after `timeout_timer_` gets timeout, which starts a new
  // sequence. It results in multiple sequences executing on a single queue with
  // certain files get skipped and certain files are computed multiple times.
  // Thus, we should check if the callback is still up-to-date before we restart
  // the item processing.
  void MaybeProcessNextItem(const base::FilePath& file_path,
                            bool use_timer = false);

  // Processes the next directory from the `files_to_process_` queue.
  void ProcessNextDirectory();

  // Processes the next image from the `files_to_process_` queue. Return true if
  // the image needs to be decoded, and return false otherwise.
  bool ProcessNextImage();

  // Remove all the files from a deleted directory.
  void RemoveOldDirectory();

  // Removes deleted images from the annotation storage.
  void FindAndRemoveDeletedFiles(const std::vector<base::FilePath> images);

  // For testing. File name annotator.
  void RunFakeImageAnnotator(ImageInfo image_info);

  void EnsureOcrAnnotatorIsConnected();

  // Initializes the `file_watcher_` and does initial data checks.
  void OnDlcInstalled();

  void OnDecodeImageFile(ImageInfo image_info,
                         const gfx::ImageSkia& image_skia);

  void OnPerformIca(
      ImageInfo image_info,
      chromeos::machine_learning::mojom::ImageAnnotationResultPtr ptr);

  void OnPerformOcr(ImageInfo image_info,
                    screen_ai::mojom::VisualAnnotationPtr visual_annotation);

  void OnImageProcessTimeout();

  std::unique_ptr<base::FilePathWatcher> file_watcher_;
  base::FilePath root_path_;
  // Excludes any path matching the prefixes.
  std::vector<base::FilePath> excluded_paths_;

  base::FilePathWatcher::Callback on_file_change_callback_;

  // AnnotationStorage owns this ImageAnnotationWorker. All the methods must
  // be called from the main sequence.
  raw_ptr<AnnotationStorage, DanglingUntriaged> annotation_storage_;

  // ML models used as DLCs.
  ImageContentAnnotator image_content_annotator_;
  scoped_refptr<screen_ai::OpticalCharacterRecognizer>
      optical_character_recognizer_;

  const bool use_file_watchers_;
  const bool use_ica_;
  const bool use_ocr_;
  base::queue<base::FilePath> files_to_process_;
  int num_retries_passed_ = 0;

  // Indexing limit params.
  const int indexing_limit_;
  int num_indexing_images_ = 0;

  // Fake delay for image processing callback. Used in tests only.
  std::optional<base::TimeDelta> image_processing_delay_for_test_ =
      std::nullopt;

  base::OneShotTimer timeout_timer_;
  base::TimeTicks queue_processing_start_time_;
  // Owned by this class.
  const scoped_refptr<base::SequencedTaskRunner> task_runner_;
  SEQUENCE_CHECKER(sequence_checker_);
  base::WeakPtrFactory<ImageAnnotationWorker> weak_ptr_factory_{this};
};
}  // namespace app_list

#endif  // CHROME_BROWSER_ASH_APP_LIST_SEARCH_LOCAL_IMAGE_SEARCH_IMAGE_ANNOTATION_WORKER_H_