filedownloader.js | Explore in Territory

/**
 * @license
 * Copyright The Closure Library Authors.
 * SPDX-License-Identifier: Apache-2.0
 */

/**
 * @fileoverview A class for downloading remote files and storing them
 * locally using the HTML5 FileSystem API.
 *
 * The directory structure is of the form /HASH/URL/BASENAME:
 *
 * The HASH portion is a three-character slice of the hash of the URL. Since the
 * filesystem has a limit of about 5000 files per directory, this should divide
 * the downloads roughly evenly among about 5000 directories, thus allowing for
 * at most 5000^2 downloads.
 *
 * The URL portion is the (sanitized) full URL used for downloading the file.
 * This is used to ensure that each file ends up in a different location, even
 * if the HASH and BASENAME are the same.
 *
 * The BASENAME portion is the basename of the URL. It's used for the filename
 * proper so that the local filesystem: URL will be downloaded to a file with a
 * recognizable name.
 */

goog.provide('goog.net.FileDownloader');
goog.provide('goog.net.FileDownloader.Error');

goog.require('goog.Disposable');
goog.require('goog.asserts');
goog.require('goog.async.Deferred');
goog.require('goog.crypt.hash32');
goog.require('goog.debug.Error');
goog.require('goog.dispose');
goog.require('goog.events');
goog.require('goog.events.EventHandler');
goog.require('goog.fs');
goog.require('goog.fs.DirectoryEntry');
goog.require('goog.fs.Error');
goog.require('goog.fs.FileSaver');
goog.require('goog.fs.blob');
goog.require('goog.net.EventType');
goog.require('goog.net.XhrIo');
goog.require('goog.net.XhrIoPool');
goog.require('goog.object');
goog.requireType('goog.fs.FileEntry');
goog.requireType('goog.fs.FileWriter');
goog.requireType('goog.net.ErrorCode');



/**
 * A class for downloading remote files and storing them locally using the
 * HTML5 filesystem API.
 *
 * @param {!goog.fs.DirectoryEntry} dir The directory in which the downloaded
 *     files are stored. This directory should be solely managed by
 *     FileDownloader.
 * @param {goog.net.XhrIoPool=} opt_pool The pool of XhrIo objects to use for
 *     downloading files.
 * @constructor
 * @extends {goog.Disposable}
 * @final
 */
goog.net.FileDownloader = function(dir, opt_pool) {
  'use strict';
  goog.net.FileDownloader.base(this, 'constructor');

  /**
   * The directory in which the downloaded files are stored.
   * @type {!goog.fs.DirectoryEntry}
   * @private
   */
  this.dir_ = dir;

  /**
   * The pool of XHRs to use for capturing.
   * @type {!goog.net.XhrIoPool}
   * @private
   */
  this.pool_ = opt_pool || new goog.net.XhrIoPool();

  /**
   * A map from URLs to active downloads running for those URLs.
   * @type {!Object<!goog.net.FileDownloader.Download_>}
   * @private
   */
  this.downloads_ = {};

  /**
   * The handler for URL capturing events.
   * @type {!goog.events.EventHandler<!goog.net.FileDownloader>}
   * @private
   */
  this.eventHandler_ = new goog.events.EventHandler(this);
};
goog.inherits(goog.net.FileDownloader, goog.Disposable);


/**
 * Download a remote file and save its contents to the filesystem. A given file
 * is uniquely identified by its URL string; this means that the relative and
 * absolute URLs for a single file are considered different for the purposes of
 * the FileDownloader.
 *
 * Returns a Deferred that will contain the downloaded blob. If there's an error
 * while downloading the URL, this Deferred will be passed the
 * {@link goog.net.FileDownloader.Error} object as an errback.
 *
 * If a download is already in progress for the given URL, this will return the
 * deferred blob for that download. If the URL has already been downloaded, this
 * will fail once it tries to save the downloaded blob.
 *
 * When a download is in progress, all Deferreds returned for that download will
 * be branches of a single parent. If all such branches are cancelled, or if one
 * is cancelled with opt_deepCancel set, then the download will be cancelled as
 * well.
 *
 * @param {string} url The URL of the file to download.
 * @return {!goog.async.Deferred} The deferred result blob.
 */
goog.net.FileDownloader.prototype.download = function(url) {
  'use strict';
  if (this.isDownloading(url)) {
    return this.downloads_[url].deferred.branch(true /* opt_propagateCancel */);
  }

  const download = new goog.net.FileDownloader.Download_(url, this);
  this.downloads_[url] = download;
  this.pool_.getObject(goog.bind(this.gotXhr_, this, download));
  return download.deferred.branch(true /* opt_propagateCancel */);
};


/**
 * Return a Deferred that will fire once no download is active for a given URL.
 * If there's no download active for that URL when this is called, the deferred
 * will fire immediately; otherwise, it will fire once the download is complete,
 * whether or not it succeeds.
 *
 * @param {string} url The URL of the download to wait for.
 * @return {!goog.async.Deferred} The Deferred that will fire when the download
 *     is complete.
 */
goog.net.FileDownloader.prototype.waitForDownload = function(url) {
  'use strict';
  const deferred = new goog.async.Deferred();
  if (this.isDownloading(url)) {
    this.downloads_[url].deferred.addBoth(function() {
      'use strict';
      deferred.callback(null);
    }, this);
  } else {
    deferred.callback(null);
  }
  return deferred;
};


/**
 * Returns whether or not there is an active download for a given URL.
 *
 * @param {string} url The URL of the download to check.
 * @return {boolean} Whether or not there is an active download for the URL.
 */
goog.net.FileDownloader.prototype.isDownloading = function(url) {
  'use strict';
  return url in this.downloads_;
};


/**
 * Load a downloaded blob from the filesystem. Will fire a deferred error if the
 * given URL has not yet been downloaded.
 *
 * @param {string} url The URL of the blob to load.
 * @return {!goog.async.Deferred} The deferred Blob object. The callback will be
 *     passed the blob. If a file API error occurs while loading the blob, that
 *     error will be passed to the errback.
 */
goog.net.FileDownloader.prototype.getDownloadedBlob = function(url) {
  'use strict';
  return this.getFile_(url).addCallback(function(fileEntry) {
    'use strict';
    return fileEntry.file();
  });
};


/**
 * Get the local filesystem: URL for a downloaded file. This is different from
 * the blob: URL that's available from getDownloadedBlob(). If the end user
 * accesses the filesystem: URL, the resulting file's name will be determined by
 * the download filename as opposed to an arbitrary GUID. In addition, the
 * filesystem: URL is connected to a filesystem location, so if the download is
 * removed then that URL will become invalid.
 *
 * Warning: in Chrome 12, some filesystem: URLs are opened inline. This means
 * that e.g. HTML pages given to the user via filesystem: URLs will be opened
 * and processed by the browser.
 *
 * @param {string} url The URL of the file to get the URL of.
 * @return {!goog.async.Deferred} The deferred filesystem: URL. The callback
 *     will be passed the URL. If a file API error occurs while loading the
 *     blob, that error will be passed to the errback.
 */
goog.net.FileDownloader.prototype.getLocalUrl = function(url) {
  'use strict';
  return this.getFile_(url).addCallback(function(fileEntry) {
    'use strict';
    return fileEntry.toUrl();
  });
};


/**
 * Return (deferred) whether or not a URL has been downloaded. Will fire a
 * deferred error if something goes wrong when determining this.
 *
 * @param {string} url The URL to check.
 * @return {!goog.async.Deferred} The deferred boolean. The callback will be
 *     passed the boolean. If a file API error occurs while checking the
 *     existence of the downloaded URL, that error will be passed to the
 *     errback.
 */
goog.net.FileDownloader.prototype.isDownloaded = function(url) {
  'use strict';
  const deferred = new goog.async.Deferred();
  const blobDeferred = this.getDownloadedBlob(url);
  blobDeferred.addCallback(function() {
    'use strict';
    deferred.callback(true);
  });
  blobDeferred.addErrback(function(err) {
    'use strict';
    if (err.name == goog.fs.Error.ErrorName.NOT_FOUND) {
      deferred.callback(false);
    } else {
      deferred.errback(err);
    }
  });
  return deferred;
};


/**
 * Remove a URL from the FileDownloader.
 *
 * This returns a Deferred. If the removal is completed successfully, its
 * callback will be called without any value. If the removal fails, its errback
 * will be called with the {@link goog.fs.Error}.
 *
 * @param {string} url The URL to remove.
 * @return {!goog.async.Deferred} The deferred used for registering callbacks on
 *     success or on error.
 */
goog.net.FileDownloader.prototype.remove = function(url) {
  'use strict';
  return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT)
      .addCallback(function(dir) {
        'use strict';
        return dir.removeRecursively();
      });
};


/**
 * Save a blob for a given URL. This works just as through the blob were
 * downloaded form that URL, except you specify the blob and no HTTP request is
 * made.
 *
 * If the URL is currently being downloaded, it's indeterminate whether the blob
 * being set or the blob being downloaded will end up in the filesystem.
 * Whichever one doesn't get saved will have an error. To ensure that one or the
 * other takes precedence, use {@link #waitForDownload} to allow the download to
 * complete before setting the blob.
 *
 * @param {string} url The URL at which to set the blob.
 * @param {!Blob} blob The blob to set.
 * @param {string=} opt_name The name of the file. If this isn't given, it's
 *     determined from the URL.
 * @return {!goog.async.Deferred} The deferred used for registering callbacks on
 *     success or on error. This can be cancelled just like a {@link #download}
 *     Deferred. The objects passed to the errback will be
 *     {@link goog.net.FileDownloader.Error}s.
 */
goog.net.FileDownloader.prototype.setBlob = function(url, blob, opt_name) {
  'use strict';
  const name = this.sanitize_(opt_name || this.urlToName_(url));
  const download = new goog.net.FileDownloader.Download_(url, this);
  this.downloads_[url] = download;
  download.blob = blob;
  this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE)
      .addCallback(function(dir) {
        'use strict';
        return dir.getFile(
            name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);
      })
      .addCallback(goog.bind(this.fileSuccess_, this, download))
      .addErrback(goog.bind(this.error_, this, download));
  return download.deferred.branch(true /* opt_propagateCancel */);
};


/**
 * The callback called when an XHR becomes available from the XHR pool.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @param {!goog.net.XhrIo} xhr The XhrIo object for downloading the page.
 * @private
 */
goog.net.FileDownloader.prototype.gotXhr_ = function(download, xhr) {
  'use strict';
  if (download.cancelled) {
    this.freeXhr_(xhr);
    return;
  }

  this.eventHandler_.listen(
      xhr, goog.net.EventType.SUCCESS,
      goog.bind(this.xhrSuccess_, this, download));
  this.eventHandler_.listen(
      xhr, [goog.net.EventType.ERROR, goog.net.EventType.ABORT],
      goog.bind(this.error_, this, download));
  this.eventHandler_.listen(
      xhr, goog.net.EventType.READY, goog.bind(this.freeXhr_, this, xhr));

  download.xhr = xhr;
  xhr.setResponseType(goog.net.XhrIo.ResponseType.ARRAY_BUFFER);
  xhr.send(download.url);
};


/**
 * The callback called when an XHR succeeds in downloading a remote file.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @private
 */
goog.net.FileDownloader.prototype.xhrSuccess_ = function(download) {
  'use strict';
  if (download.cancelled) {
    return;
  }

  const name = this.sanitize_(this.getName_(
      /** @type {!goog.net.XhrIo} */ (download.xhr)));
  const resp = /** @type {ArrayBuffer} */ (download.xhr.getResponse());
  if (!resp) {
    // This should never happen - it indicates the XHR hasn't completed, has
    // failed or has been cleaned up.  If it does happen (eg. due to a bug
    // somewhere) we don't want to pass null to getBlob - it's not valid and
    // triggers a bug in some versions of WebKit causing it to crash.
    this.error_(download);
    return;
  }

  download.blob = goog.fs.blob.getBlob(resp);
  delete download.xhr;

  this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE)
      .addCallback(function(dir) {
        'use strict';
        return dir.getFile(
            name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);
      })
      .addCallback(goog.bind(this.fileSuccess_, this, download))
      .addErrback(goog.bind(this.error_, this, download));
};


/**
 * The callback called when a file that will be used for saving a file is
 * successfully opened.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @param {!goog.fs.FileEntry} file The newly-opened file object.
 * @private
 */
goog.net.FileDownloader.prototype.fileSuccess_ = function(download, file) {
  'use strict';
  if (download.cancelled) {
    file.remove();
    return;
  }

  download.file = file;
  file.createWriter()
      .addCallback(goog.bind(this.fileWriterSuccess_, this, download))
      .addErrback(goog.bind(this.error_, this, download));
};


/**
 * The callback called when a file writer is successfully created for writing a
 * file to the filesystem.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @param {!goog.fs.FileWriter} writer The newly-created file writer object.
 * @private
 */
goog.net.FileDownloader.prototype.fileWriterSuccess_ = function(
    download, writer) {
  'use strict';
  if (download.cancelled) {
    download.file.remove();
    return;
  }

  download.writer = writer;
  writer.write(/** @type {!Blob} */ (download.blob));
  this.eventHandler_.listenOnce(
      writer, goog.fs.FileSaver.EventType.WRITE_END,
      goog.bind(this.writeEnd_, this, download));
};


/**
 * The callback called when file writing ends, whether or not it's successful.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @private
 */
goog.net.FileDownloader.prototype.writeEnd_ = function(download) {
  'use strict';
  if (download.cancelled || download.writer.getError()) {
    this.error_(download, download.writer.getError());
    return;
  }

  delete this.downloads_[download.url];
  download.deferred.callback(download.blob);
};


/**
 * The error callback for all asynchronous operations. Ensures that all stages
 * of a given download are cleaned up, and emits the error event.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     this download.
 * @param {goog.fs.Error=} opt_err The file error object. Only defined if the
 *     error was raised by the file API.
 * @private
 */
goog.net.FileDownloader.prototype.error_ = function(download, opt_err) {
  'use strict';
  if (download.file) {
    download.file.remove();
  }

  if (download.cancelled) {
    return;
  }

  delete this.downloads_[download.url];
  download.deferred.errback(
      new goog.net.FileDownloader.Error(download, opt_err));
};


/**
 * Abort the download of the given URL.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download to abort.
 * @private
 */
goog.net.FileDownloader.prototype.cancel_ = function(download) {
  'use strict';
  goog.dispose(download);
  delete this.downloads_[download.url];
};


/**
 * Get the directory for a given URL. If the directory already exists when this
 * is called, it will contain exactly one file: the downloaded file.
 *
 * This not only calls the FileSystem API's getFile method, but attempts to
 * distribute the files so that they don't overload the filesystem. The spec
 * says directories can't contain more than 5000 files
 * (http://www.w3.org/TR/file-system-api/#directories), so this ensures that
 * each file is put into a subdirectory based on its SHA1 hash.
 *
 * All parameters are the same as in the FileSystem API's Entry#getFile method.
 *
 * @param {string} url The URL corresponding to the directory to get.
 * @param {goog.fs.DirectoryEntry.Behavior} behavior The behavior to pass to the
 *     underlying method.
 * @return {!goog.async.Deferred} The deferred DirectoryEntry object.
 * @private
 */
goog.net.FileDownloader.prototype.getDir_ = function(url, behavior) {
  'use strict';
  // 3 hex digits provide 16**3 = 4096 different possible dirnames, which is
  // less than the maximum of 5000 entries. Downloaded files should be
  // distributed roughly evenly throughout the directories due to the hash
  // function, allowing many more than 5000 files to be downloaded.
  //
  // The leading ` ensures that no illegal dirnames are accidentally used. % was
  // previously used, but Chrome has a bug (as of 12.0.725.0 dev) where
  // filenames are URL-decoded before checking their validity, so filenames
  // containing e.g. '%3f' (the URL-encoding of :, an invalid character) are
  // rejected.
  const dirname = '`' +
      Math.abs(goog.crypt.hash32.encodeString(url))
          .toString(16)
          .substring(0, 3);

  return this.dir_.getDirectory(dirname, goog.fs.DirectoryEntry.Behavior.CREATE)
      .addCallback(function(dir) {
        'use strict';
        return dir.getDirectory(this.sanitize_(url), behavior);
      }, this);
};


/**
 * Get the file for a given URL. This will only retrieve files that have already
 * been saved; it shouldn't be used for creating the file in the first place.
 * This is because the filename isn't necessarily determined by the URL, but by
 * the headers of the XHR response.
 *
 * @param {string} url The URL corresponding to the file to get.
 * @return {!goog.async.Deferred} The deferred FileEntry object.
 * @private
 */
goog.net.FileDownloader.prototype.getFile_ = function(url) {
  'use strict';
  return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT)
      .addCallback(function(dir) {
        'use strict';
        return dir.listDirectory().addCallback(function(files) {
          'use strict';
          goog.asserts.assert(files.length == 1);
          // If the filesystem somehow gets corrupted and we end up with an
          // empty directory here, it makes sense to just return the normal
          // file-not-found error.
          return files[0] || dir.getFile('file');
        });
      });
};


/**
 * Sanitize a string so it can be safely used as a file or directory name for
 * the FileSystem API.
 *
 * @param {string} str The string to sanitize.
 * @return {string} The sanitized string.
 * @private
 */
goog.net.FileDownloader.prototype.sanitize_ = function(str) {
  'use strict';
  // Add a prefix, since certain prefixes are disallowed for paths. None of the
  // disallowed prefixes start with '`'. We use ` rather than % for escaping the
  // filename due to a Chrome bug (as of 12.0.725.0 dev) where filenames are
  // URL-decoded before checking their validity, so filenames containing e.g.
  // '%3f' (the URL-encoding of :, an invalid character) are rejected.
  return '`' +
      str.replace(/[\/\\<>:?*"|%`]/g, encodeURIComponent).replace(/%/g, '`');
};


/**
 * Gets the filename specified by the XHR. This first attempts to parse the
 * Content-Disposition header for a filename and, failing that, falls back on
 * deriving the filename from the URL.
 *
 * @param {!goog.net.XhrIo} xhr The XHR containing the response headers.
 * @return {string} The filename.
 * @private
 */
goog.net.FileDownloader.prototype.getName_ = function(xhr) {
  'use strict';
  const disposition = xhr.getResponseHeader('Content-Disposition');
  const match =
      disposition && disposition.match(/^attachment *; *filename="(.*)"$/i);
  if (match) {
    // The Content-Disposition header allows for arbitrary backslash-escaped
    // characters (usually " and \). We want to unescape them before using them
    // in the filename.
    return match[1].replace(/\\(.)/g, '$1');
  }

  return this.urlToName_(xhr.getLastUri());
};


/**
 * Extracts the basename from a URL.
 *
 * @param {string} url The URL.
 * @return {string} The basename.
 * @private
 */
goog.net.FileDownloader.prototype.urlToName_ = function(url) {
  'use strict';
  const segments = url.split('/');
  return segments[segments.length - 1];
};


/**
 * Remove all event listeners for an XHR and release it back into the pool.
 *
 * @param {!goog.net.XhrIo} xhr The XHR to free.
 * @private
 */
goog.net.FileDownloader.prototype.freeXhr_ = function(xhr) {
  'use strict';
  goog.events.removeAll(xhr);
  this.pool_.addFreeObject(xhr);
};


/** @override */
goog.net.FileDownloader.prototype.disposeInternal = function() {
  'use strict';
  delete this.dir_;
  goog.dispose(this.eventHandler_);
  delete this.eventHandler_;
  goog.object.forEach(this.downloads_, function(download) {
    'use strict';
    download.deferred.cancel();
  }, this);
  delete this.downloads_;
  goog.dispose(this.pool_);
  delete this.pool_;

  goog.net.FileDownloader.base(this, 'disposeInternal');
};



/**
 * The error object for FileDownloader download errors.
 *
 * @param {!goog.net.FileDownloader.Download_} download The download object for
 *     the download in question.
 * @param {goog.fs.Error=} opt_fsErr The file error object, if this was a file
 *     error.
 *
 * @constructor
 * @extends {goog.debug.Error}
 * @final
 */
goog.net.FileDownloader.Error = function(download, opt_fsErr) {
  'use strict';
  goog.net.FileDownloader.Error.base(
      this, 'constructor', 'Error capturing URL ' + download.url);

  /**
   * The URL the event relates to.
   * @type {string}
   */
  this.url = download.url;

  if (download.xhr) {
    this.xhrStatus = download.xhr.getStatus();
    this.xhrErrorCode = download.xhr.getLastErrorCode();
    this.message += ': XHR failed with status ' + this.xhrStatus +
        ' (error code ' + this.xhrErrorCode + ')';
  } else if (opt_fsErr) {
    this.fileError = opt_fsErr;
    this.message += ': file API failed (' + opt_fsErr.message + ')';
  }
};
goog.inherits(goog.net.FileDownloader.Error, goog.debug.Error);


/**
 * The status of the XHR. Only set if the error was caused by an XHR failure.
 * @type {number|undefined}
 */
goog.net.FileDownloader.Error.prototype.xhrStatus;


/**
 * The error code of the XHR. Only set if the error was caused by an XHR
 * failure.
 * @type {goog.net.ErrorCode|undefined}
 */
goog.net.FileDownloader.Error.prototype.xhrErrorCode;


/**
 * The file API error. Only set if the error was caused by the file API.
 * @type {goog.fs.Error|undefined}
 */
goog.net.FileDownloader.Error.prototype.fileError;



/**
 * A struct containing the data for a single download.
 *
 * @param {string} url The URL for the file being downloaded.
 * @param {!goog.net.FileDownloader} downloader The parent FileDownloader.
 * @extends {goog.Disposable}
 * @constructor
 * @private
 */
goog.net.FileDownloader.Download_ = function(url, downloader) {
  'use strict';
  goog.net.FileDownloader.Download_.base(this, 'constructor');

  /**
   * The URL for the file being downloaded.
   * @type {string}
   */
  this.url = url;

  /**
   * The Deferred that will be fired when the download is complete.
   * @type {!goog.async.Deferred}
   */
  this.deferred =
      new goog.async.Deferred(goog.bind(downloader.cancel_, downloader, this));

  /**
   * Whether this download has been cancelled by the user.
   * @type {boolean}
   */
  this.cancelled = false;

  /**
   * The XhrIo object for downloading the file. Only set once it's been
   * retrieved from the pool.
   * @type {?goog.net.XhrIo}
   */
  this.xhr = null;

  /**
   * The name of the blob being downloaded. Only sey once the XHR has completed,
   * if it completed successfully.
   * @type {?string}
   */
  this.name = null;

  /**
   * The downloaded blob. Only set once the XHR has completed, if it completed
   * successfully.
   * @type {?Blob}
   */
  this.blob = null;

  /**
   * The file entry where the blob is to be stored. Only set once it's been
   * loaded from the filesystem.
   * @type {?goog.fs.FileEntry}
   */
  this.file = null;

  /**
   * The file writer for writing the blob to the filesystem. Only set once it's
   * been loaded from the filesystem.
   * @type {?goog.fs.FileWriter}
   */
  this.writer = null;
};
goog.inherits(goog.net.FileDownloader.Download_, goog.Disposable);


/** @override */
goog.net.FileDownloader.Download_.prototype.disposeInternal = function() {
  'use strict';
  this.cancelled = true;
  if (this.xhr) {
    this.xhr.abort();
  } else if (
      this.writer &&
      this.writer.getReadyState() == goog.fs.FileSaver.ReadyState.WRITING) {
    this.writer.abort();
  }

  goog.net.FileDownloader.Download_.base(this, 'disposeInternal');
};
chromium/third_party/google-closure-library/closure/goog/net/filedownloader.js