Home Reference Source

lib/writers/requestLib.js

const WARCWriterBase = require('./warcWriterBase')
const { STATUS_CODES } = require('http')
const { URL } = require('url')
const { CRLF } = require('./warcFields')
const {
  stringifyHeaders,
  stringifyRequestHeaders
} = require('../utils/headerSerializers')
const { noGZ, replaceContentLen } = require('./constants')

/**
 * @desc WARC Generator for use with request
 * @see https://github.com/request/request-promise
 * @extends {WARCWriterBase}
 */
class RequestLibWARCGenerator extends WARCWriterBase {
  /**
   * @desc Create a new Request lib WARC generator
   * @param {?WARCFileOpts} [defaultOpts]
   */
  constructor (defaultOpts) {
    super(defaultOpts)
    /**
     * @type {URL}
     * @private
     */
    this._UP = new URL('about:blank')
  }

  /**
   * @desc Generates a WARC record. Needs the following request lib defaults:
   * rp.defaults({
   *   resolveWithFullResponse: true,
   *   simple: false
   * })
   * @param {Object} resp
   * @returns {Promise<void>}
   */
  async generateWarcEntry (resp) {
    // generate the HTTP request to put in the WARC headers
    let reqHTTP = ''
    this._UP.href = resp.request.href
    if (this._UP.search !== '') {
      reqHTTP += `${resp.request.method} ${this._UP.pathname}${
        this._UP.search[0]
      }${this._UP.searchParams} HTTP/1.1${CRLF}`
    } else {
      reqHTTP += `${resp.request.method} ${this._UP.pathname} HTTP/1.1${CRLF}`
    }
    reqHTTP += stringifyRequestHeaders(resp.request.headers, this._UP.host)

    // if we made a POST request, make sure we have the content
    const pd = resp.request.method === 'POST' ? resp.request.body : null

    // now, write our request record
    await this.writeRequestRecord(this._UP.href, reqHTTP, pd)

    // write the response - we won't have a resp if we've only made a request
    let resHTTP = `HTTP/1.1 ${resp.statusCode} ${
      STATUS_CODES[resp.statusCode]
    } ${CRLF}${stringifyHeaders(resp.headers)}`
    let body = resp.body

    if (body) {
      resHTTP = resHTTP.replace(noGZ, '')
      resHTTP = resHTTP.replace(
        replaceContentLen,
        `Content-Length: ${Buffer.byteLength(body, 'utf8')}${CRLF}`
      )
    } else {
      // indicate that this record has 0 content
      resHTTP = resHTTP.replace(replaceContentLen, `Content-Length: 0${CRLF}`)
    }
    await this.writeResponseRecord(this._UP.href, resHTTP, body)
  }
}

module.exports = RequestLibWARCGenerator