lib/warcRecord/builder.js
'use strict'
const WARCRecord = require('./record')
const { crlf, begin } = require('./fieldIdentifiers')
/**
* @type {{header: symbol, content1: symbol, content2: symbol, consumeCRLFHeader: symbol, consumeCRLFContent1: symbol, consumeCRLFContent2: symbol}}
*/
const parsingStates = {
header: Symbol('warc-parsing-header'),
content1: Symbol('warc-parsing-content1'),
content2: Symbol('warc-parsing-content2'),
consumeCRLFHeader: Symbol('warc-parsing-comsume-crlf-header'),
consumeCRLFContent1: Symbol('warc-parsing-comsume-crlf-c1'),
consumeCRLFContent2: Symbol('warc-parsing-comsume-crlf-c2')
}
/**
* @type {number}
*/
const WFIBeginLen = begin.length
/**
* @param {Buffer} line
* @returns {boolean}
*/
function isJustCRLF (line) {
if (line.length !== 2) return false
return line[0] === crlf[0] && line[1] === crlf[1]
}
/**
* @param {Buffer} line
* @returns {boolean}
*/
function isWARCRevisionLine (line) {
if (line.length > 11) return false
let i = 0
while (i < WFIBeginLen) {
if (begin[i] !== line[i]) return false
i += 1
}
return true
}
/**
* @desc Progressively builds warc records by consuming the file line by line
*/
class RecordBuilder {
/**
* @desc Create a new RecordBuilder
*/
constructor () {
/**
* @type {{header: Buffer[], c1: Buffer[], c2: Buffer[]}}
* @private
*/
this._parts = {
header: [],
c1: [],
c2: []
}
/**
* @type {symbol}
* @private
*/
this._parsingState = parsingStates.header
}
/**
* @desc Returns a new WARC record if one can be created otherwise returns null
* @returns {?WARCRecord}
*/
buildRecord () {
if (this._parts.header.length === 0) return null
const newRecord = new WARCRecord(this._parts)
this._parts.header = []
this._parts.c1 = []
this._parts.c2 = []
return newRecord
}
/**
* @desc Consumes a line of a WARC file.
* If a record can be built this function returns a new WARCRecord otherwise null
* @param {Buffer} line - The line to be consumed
* @returns {?WARCRecord}
*/
consumeLine (line) {
let newRecord = null
if (isWARCRevisionLine(line)) {
this._parsingState = parsingStates.header
newRecord = this.buildRecord()
}
const isSep = isJustCRLF(line)
switch (this._parsingState) {
case parsingStates.header:
if (!isSep) {
this._parts.header.push(line)
} else {
this._parsingState = parsingStates.consumeCRLFHeader
}
break
case parsingStates.consumeCRLFHeader:
if (!isSep) {
this._parts.c1.push(line)
this._parsingState = parsingStates.content1
}
break
case parsingStates.content1:
if (!isSep) {
this._parts.c1.push(line)
} else {
this._parsingState = parsingStates.consumeCRLFContent1
}
break
case parsingStates.consumeCRLFContent1:
if (!isSep) {
this._parts.c2.push(line)
this._parsingState = parsingStates.content2
}
break
case parsingStates.content2:
if (!isSep) {
this._parts.c2.push(line)
} else {
this._parsingState = parsingStates.consumeCRLFContent2
}
break
case parsingStates.consumeCRLFContent2:
break
}
return newRecord
}
}
/**
* @type {RecordBuilder}
*/
module.exports = RecordBuilder