Home Reference Source

lib/warcRecord/record.js

'use strict'
const ContentParser = require('./warcContentParsers')
const { WARCTypes } = require('../writers/warcFields')

/**
 * @desc WARC record class. The WARC named fields are properties on this object
 */
class WARCRecord {
  /**
   * @desc Create a new WARCRecord
   * @param {{header: Buffer[], c1: Buffer[], c2: Buffer[]}} warcParts - The parts of a warc record
   */
  constructor (warcParts) {
    /**
     * @desc An object containing the parsed WARC header
     * @type {Object}
     */
    this.warcHeader = ContentParser.parseWarcRecordHeader(warcParts.header)

    /**
     * @desc An object containing the parsed request or response records HTTP information
     * @type {?RequestHTTP|?ResponseHTTP}
     */
    this.httpInfo = null

    /**
     * @desc The content of the record.
     * @type {Buffer}
     */
    this.content = null

    const wt = this.warcType
    switch (wt) {
      case WARCTypes.request:
      case WARCTypes.response:
        this.httpInfo = ContentParser.parseHTTPPortion(
          warcParts.c1,
          wt === WARCTypes.request
        )
        this.content = Buffer.concat(warcParts.c2 || [])
        break
      case WARCTypes.revisit:
        this.httpInfo = ContentParser.parseResHTTP(warcParts.c1)
        break
      default:
        this.content = Buffer.concat(warcParts.c1 || [])
        break
    }
  }

  /**
   * @desc Determine if this warc record header has the supplied WARC named field.
   * If the supplied header key does not start with WARC then this it will be added.
   * @example
   *  // both ways are equivalent to determine if the record has the WARC-Type field
   *  record.hasWARCHeader('WARC-Type')
   *  record.hasWARCHeader('Type')
   * @param {!string} headerKey - The WARC header key (named field)
   * @return {boolean}
   */
  hasWARCHeader (headerKey) {
    if (!headerKey.startsWith('WARC')) {
      headerKey = `WARC-${headerKey}`
    }
    return this.warcHeader[headerKey] != null
  }

  /**
   * @desc Retrieve the value of this records header field.
   * If the supplied header key does not start with WARC then this it will be added.
   * @example
   *  // both ways are equivalent to retrieve WARC-Type field
   *  record.getWARCHeader('WARC-Type')
   *  record.getWARCHeader('Type')
   * @param {!string} headerKey - The WARC header key (named field)
   * @return {?string}
   */
  getWARCHeader (headerKey) {
    if (!headerKey.startsWith('WARC')) {
      headerKey = `WARC-${headerKey}`
    }
    return this.warcHeader[headerKey]
  }

  /**
   * @return {?string}
   */
  get warcType () {
    return this.warcHeader['WARC-Type']
  }

  /**
   * @return {?string}
   */
  get warcRecordID () {
    return this.warcHeader['WARC-Record-ID']
  }

  /**
   * @return {?string}
   */
  get warcDate () {
    return this.warcHeader['WARC-Date']
  }

  /**
   * @return {?string}
   */
  get warcContentLength () {
    return this.warcHeader['Content-Length']
  }

  /**
   * @return {?string}
   */
  get warcContentType () {
    return this.warcHeader['Content-Type']
  }

  /**
   * @return {?string}
   */
  get warcConcurrentTo () {
    return this.warcHeader['WARC-Concurrent-To']
  }

  /**
   * @return {?string}
   */
  get warcBlockDigest () {
    return this.warcHeader['WARC-Block-Digest']
  }

  /**
   * @return {?string}
   */
  get warcPayloadDigest () {
    return this.warcHeader['WARC-Payload-Digest']
  }

  /**
   * @return {?string}
   */
  get warcIPAddress () {
    return this.warcHeader['WARC-IP-Address']
  }

  /**
   * @return {?string}
   */
  get warcRefersTo () {
    return this.warcHeader['WARC-Refers-To']
  }

  /**
   * @return {?string}
   */
  get warcRefersToTargetURI () {
    return this.warcHeader['WARC-Refers-To-Target-URI']
  }

  /**
   * @return {?string}
   */
  get warcRefersToDate () {
    return this.warcHeader['WARC-Refers-To-Date']
  }

  /**
   * @return {?string}
   */
  get warcTargetURI () {
    return this.warcHeader['WARC-Target-URI']
  }

  /**
   * @return {?string}
   */
  get warcTruncated () {
    return this.warcHeader['WARC-Truncated']
  }

  /**
   * @return {?string}
   */
  get warcWarcinfoID () {
    return this.warcHeader['WARC-Warcinfo-ID']
  }

  /**
   * @return {?string}
   */
  get warcFilename () {
    return this.warcHeader['WARC-Filename']
  }

  /**
   * @return {?string}
   */
  get warcProfile () {
    return this.warcHeader['WARC-Profile']
  }

  /**
   * @return {?string}
   */
  get warcIdentifiedPayloadType () {
    return this.warcHeader['WARC-Identified-Payload-Type']
  }

  /**
   * @return {?string}
   */
  get warcSegmentOriginID () {
    return this.warcHeader['WARC-Segment-Origin-ID']
  }

  /**
   * @return {?string}
   */
  get warcSegmentNumber () {
    return this.warcHeader['WARC-Segment-Number']
  }

  /**
   * @return {?string}
   */
  get warcSegmentTotalLength () {
    return this.warcHeader['WARC-Segment-Total-Length']
  }
}

/**
 * @type {WARCRecord}
 */
module.exports = WARCRecord