Reference Source

lib/launcher/chrome.js

/*
 Squidwarc  Copyright (C) 2017-present  John Berlin <n0tan3rd@gmail.com>

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 Squidwarc is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this Squidwarc.  If not, see <http://www.gnu.org/licenses/>
 */

const os = require('os')
const path = require('path')
const cp = require('child_process')
const fs = require('fs-extra')
const CRI = require('chrome-remote-interface')
const readline = require('readline')
const DEFAULT_ARGS = require('./defaultArgs')
const ChromeFinder = require('./chromeFinder')
const H = require('../crawler/helper')
const { delay } = require('../utils/promises')

/**
 * @param {ChromeOptions} options
 * @return {?ChromeOptions}
 */
function ensureOptions (options = {}) {
  if (options.port === undefined) {
    options.port = 9222
  }
  if (options.host === undefined) {
    options.host = 'localhost'
  }
  if (options.local === undefined || options.local) {
    options.local = false
  }
  return options
}

/**
 *
 * @param {ChromeOptions} options
 * @param {string} userDataDir
 * @return {string[]}
 */
function chromeArgs (options, userDataDir) {
  const chromeArguments = [...DEFAULT_ARGS]
  chromeArguments.push(
    `--user-data-dir=${userDataDir}`,
    `--remote-debugging-port=${options.port}`
  )
  if (options.headless) {
    chromeArguments.push('--headless', '--hide-scrollbars')
    if (os.platform() === 'win32') {
      chromeArguments.push('--disable-gpu')
    }
  }
  if (process.env.INDOCKER) {
    chromeArguments.push(
      '--no-sandbox',
      '--disable-setuid-sandbox',
      '--disable-dev-shm-usage'
    )
  }
  chromeArguments.push('about:blank')
  return chromeArguments
}

/**
 * @type {string}
 */
const CHROME_PROFILE_PATH = path.join(os.tmpdir(), 'squidwarc_profile-')

/**
 * @desc Utility class for launching or connecting to a Chrome/Chromium instance
 */
class ChromeLauncher {
  /**
   * @desc Launch Chrome by finding an acceptable executable on the host system
   * @param {?ChromeOptions} [options = {}]
   * @return {Promise<CRI>}
   */
  static async launch (options) {
    options = ensureOptions(options)
    if (options.executable === undefined) {
      options.executable = await ChromeFinder.findChrome()
    }
    let userDataDir = null
    if (!options.userDataDir) {
      userDataDir = await fs.mkdtemp(CHROME_PROFILE_PATH)
    } else {
      userDataDir = options.userDataDir
    }
    const chromeArguments = chromeArgs(options, userDataDir)

    const chromeProcess = cp.spawn(options.executable, chromeArguments, {
      stdio: ['ignore', 'ignore', 'pipe'],
      env: process.env,
      detached: process.platform !== 'win32'
    })

    function maybeRemoveUDataDir () {
      if (!options.userDataDir) {
        try {
          fs.removeSync(userDataDir)
        } catch (e) {}
      }
    }

    let killed = false

    function killChrome () {
      if (killed) {
        return
      }
      killed = true
      try {
        if (process.platform === 'win32') {
          cp.execSync(`taskkill /pid ${chromeProcess.pid} /T /F`)
        } else {
          process.kill(-chromeProcess.pid, 'SIGKILL')
        }
      } catch (e) {}

      maybeRemoveUDataDir()
    }

    process.on('exit', killChrome)
    chromeProcess.once('exit', maybeRemoveUDataDir)

    process.on('SIGINT', () => {
      killChrome()
      process.exit(130)
    })
    process.once('SIGTERM', killChrome)
    process.once('SIGHUP', killChrome)

    let criClient
    try {
      // await Promise.delay(5000)
      let listeningOn = await waitForWSEndpoint(chromeProcess, 30 * 1000)
      console.log(listeningOn)
      await delay(3000)
      criClient = await CRI(options)
    } catch (e) {
      killChrome()
      throw e
    }
    return criClient
  }

  /**
   * @desc Launch Chrome by finding an acceptable executable on the host system but do not connect to it
   * @param {?ChromeOptions} [options = {}]
   * @return {Promise<CRI>}
   */
  static async launchNoConnect (options = {}) {
    options = ensureOptions(options)
    if (options.executable == null) {
      options.executable = await ChromeFinder.findChrome()
    }
    let userDataDir = null
    if (!options.executable) {
      userDataDir = await fs.mkdtemp(CHROME_PROFILE_PATH)
    } else {
      userDataDir = options.userDataDir
    }

    const chromeArguments = chromeArgs(options, userDataDir)
    let killed = false
    const chromeProcess = cp.spawn(options.executable, chromeArguments, {
      stdio: ['ignore', 'ignore', 'pipe'],
      env: process.env,
      detached: process.platform !== 'win32'
    })

    process.on('exit', killChrome)
    chromeProcess.once('exit', maybeRemoveUDataDir)

    process.on('SIGINT', () => {
      killChrome()
      process.exit(130)
    })
    process.once('SIGTERM', killChrome)
    process.once('SIGHUP', killChrome)

    try {
      // await Promise.delay(5000)
      let listeningOn = await waitForWSEndpoint(chromeProcess, 30 * 1000)
      console.log(listeningOn)
    } catch (e) {
      killChrome()
      throw e
    }

    function maybeRemoveUDataDir () {
      if (!options.userDataDir) {
        try {
          fs.removeSync(userDataDir)
        } catch (e) {}
      }
    }

    function killChrome () {
      if (killed) {
        return
      }
      killed = true
      try {
        if (process.platform === 'win32') {
          cp.execSync(`taskkill /pid ${chromeProcess.pid} /T /F`)
        } else {
          process.kill(-chromeProcess.pid, 'SIGKILL')
        }
      } catch (e) {}

      maybeRemoveUDataDir()
    }
  }

  /**
   * @desc Connect to an running instance of Chrome
   * @param {?ChromeOptions} [options]
   * @return {Promise<CRI>}
   */
  static connect (options = {}) {
    return CRI(ensureOptions(options))
  }

  /**
   * @desc Create and connect to a new tab of a running Chrome instance
   * @param {?ChromeOptions} [options = {}]
   * @return {Promise<CRI>}
   * @public
   */
  static async newTab (options = {}) {
    options = ensureOptions(options)
    let target = await CRI.New(options)
    return CRI({ ...options, target })
  }

  /**
   * @desc Receive the protocol definition of the remote Chrome
   * @param {?ChromeOptions} [options = {}]
   * @return {Promise<Object>}
   * @public
   */
  static async getProtocolDef (options = {}) {
    options = ensureOptions(options)
    return CRI.Protocol(options)
  }
}

/**
 * @desc Function that returns a promise resolving when chrome tells us the WS endpoint is ready
 * @param chromeProcess
 * @param timeout
 * @return {Promise<any>}
 */
function waitForWSEndpoint (chromeProcess, timeout) {
  return new Promise((resolve, reject) => {
    const rl = readline.createInterface({ input: chromeProcess.stderr })
    let stderr = ''
    const listeners = [
      H.addEventListener(rl, 'line', onLine),
      H.addEventListener(rl, 'close', onClose),
      H.addEventListener(chromeProcess, 'exit', onClose),
      H.addEventListener(chromeProcess, 'error', onClose)
    ]
    const timeoutId = timeout ? setTimeout(onTimeout, timeout) : 0

    function onClose () {
      cleanup()
      reject(new Error(['Failed to launch chrome!', stderr].join('\n')))
    }

    function onTimeout () {
      cleanup()
      reject(
        new Error(`Timed out after ${timeout} ms while trying to connect to Chrome!`)
      )
    }

    /**
     * @param {string} line
     */
    function onLine (line) {
      stderr += line + '\n'
      const match = line.match(/^DevTools listening on (ws:\/\/.*)$/)
      if (!match) {
        return
      }
      cleanup()
      resolve(match[1])
    }

    function cleanup () {
      if (timeoutId) {
        clearTimeout(timeoutId)
      }
      H.removeEventListeners(listeners)
    }
  })
}

/**
 * @type {ChromeLauncher}
 */
module.exports = ChromeLauncher

/**
 * @external {CRI} https://github.com/cyrus-and/chrome-remote-interface
 */