Home Reference Source

lib/network/NetworkIdleWatcher.js

/// This file is from https://github.com/N0taN3rd/Squidwarc Copyright John Berlin <n0tan3rd@gmail.com> Apache 2.0
const EventEmitter = require('eventemitter3')
const Events = require('../Events')
const { helper } = require('../helper')

/**
 * Monitors the HTTP requests made by a page and emits the 'network-idle' event when it has been determined the network is idle
 * @extends {EventEmitter}
 * @since chrome-remote-interface-extra
 */
class NetIdleWatcher extends EventEmitter {
  /**
   * @param {NetworkManager} networkManager - Page object for the page being crawled
   * @param {?NetIdleOptions} [options = {}] - Optional options to control fine tune network idle determination
   */
  constructor (networkManager, options = {}) {
    super()

    /**
     * Maximum amount of time a crawler going to visit a page
     * @type {number}
     * @private
     */
    this._timeout = options.globalWait || 40000

    /**
     * The amount of time no new HTTP requests should be made before emitting the network-idle event
     * @type {number}
     * @private
     */
    this._idleTime = options.inflightIdle || 1500

    /**
     * The number of in-flight requests there should be before starting the network-idle timer
     * @type {number}
     * @private
     */
    this._idleInflight = options.numInflight || 2

    /**
     * Set of the HTTP requests ids, used for tracking network-idle
     * @type {Set<string>}
     * @private
     */
    this._requestIds = new Set()

    /**
     * The id of the setTimeout for the network-idle timer
     * @type {?number}
     * @private
     */
    this._idleTimer = null

    /**
     * Flag indicating if we are in a network tracking state of not
     * @type {boolean}
     * @private
     */
    this._doneTimers = false

    /**
     * The id of the global crawler setTimeout timer
     * @type {?number}
     * @private
     */
    this._globalWaitTimer = null

    /**
     * The page object of the current page the crawler is visting
     * @type {NetworkManager}
     */
    this._networkManager = networkManager

    /**
     * An array of listeners registered on the page object
     * @type {Object[]}
     * @private
     */
    this._pageListenrs = []

    this.reqFinished = this.reqFinished.bind(this)
    this.reqStarted = this.reqStarted.bind(this)
    this._networkIdled = this._networkIdled.bind(this)
    this._globalNetworkTimeout = this._globalNetworkTimeout.bind(this)
    this._clearTimers = this._clearTimers.bind(this)
    this._emitNetIdle = this._emitNetIdle.bind(this)
  }

  /**
   * Start monitoring the network and receive a Promise that resolves once network idle occurred or the global wait time has been reached
   * @param {NetworkManager} networkManager - NetworkManager object for the page being crawled
   * @param {?NetIdleOptions} [options = {}] - Optional options to control fine tune network idle determination
   * @return {Promise<void>}
   */
  static idlePromise (networkManager, options) {
    const im = new NetIdleWatcher(networkManager, options)
    return new Promise(resolve => {
      im.start()
      im.on(Events.NetworkIdleMonitor.NetworkIdle, resolve)
    })
  }

  /**
   * Setup the necessary listeners
   */
  start () {
    this._pageListenrs = [
      helper.addEventListener(
        this._networkManager,
        Events.NetworkManager.Request,
        this.reqStarted
      ),
      helper.addEventListener(
        this._networkManager,
        Events.NetworkManager.Response,
        this.reqFinished
      ),
      helper.addEventListener(
        this._networkManager,
        Events.NetworkManager.RequestFailed,
        this.reqFinished
      )
    ]
    this._requestIds.clear()
    this._doneTimers = false
    this._globalWaitTimer = setTimeout(
      this._globalNetworkTimeout,
      this._timeout
    )
  }

  /**
   * Indicate that a request was made
   * @param {Request} info - Puppeteer Request object
   */
  reqStarted (info) {
    if (!this._doneTimers) {
      this._requestIds.add(info.requestId())
      if (this._requestIds.size > this._idleInflight) {
        clearTimeout(this._idleTimer)
        this._idleTimer = null
      }
    }
  }

  /**
   * Indicate that a request has finished
   * @param {Response | Request} info - Puppeteer Request or Response object
   */
  reqFinished (info) {
    if (!this._doneTimers) {
      if (info.requestId()) {
        this._requestIds.delete(info.requestId())
      } else {
        this._requestIds.delete(info.request().requestId())
      }
      if (this._requestIds.size <= this._idleInflight && !this._idleTimer) {
        this._idleTimer = setTimeout(this._networkIdled, this._idleTime)
      }
    }
  }

  /**
   * Called when the global time limit was hit
   * @private
   */
  _globalNetworkTimeout () {
    if (!this._doneTimers) {
      this._doneTimers = true
    }
    this._clearTimers()
    process.nextTick(this._emitNetIdle)
  }

  /**
   * Called when the network idle has been determined
   * @private
   */
  _networkIdled () {
    if (!this._doneTimers) {
      this._doneTimers = true
    }
    this._clearTimers()
    process.nextTick(this._emitNetIdle)
  }

  /**
   * Emit the network-idle event
   * @private
   */
  _emitNetIdle () {
    helper.removeEventListeners(this._pageListenrs)
    this.emit(Events.NetworkIdleMonitor.NetworkIdle)
  }

  /**
   * Clear all timers
   * @private
   */
  _clearTimers () {
    if (this._globalWaitTimer) {
      clearTimeout(this._globalWaitTimer)
      this._globalWaitTimer = null
    }
    if (this._idleTimer) {
      clearTimeout(this._idleTimer)
      this._idleTimer = null
    }
  }
}

/**
 * @type {NetIdleWatcher}
 */
module.exports = NetIdleWatcher

/**
 * @typedef {Object} NetIdleOptions
 * @property {number} [globalWait = 40000]  - Maximum amount of time, in milliseconds, to wait for network idle to occur
 * @property {number} [numInflight = 2]     - The number of inflight requests (requests with no response) that should exist before starting the inflightIdle timer
 * @property {number} [inflightIdle = 1500] - Amount of time, in milliseconds, that should elapse when there are only numInflight requests for network idle to be determined
 */