src/parser.js
// Copyright (c) 2014-2019, Quentin Rossetti <quentin.rossetti@gmail.com>
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
import normalizePath from 'normalize-path'
import { INFOS, BODY_PROGRESS, BODY_SYMBOL_FILE, BODY_HASH, INFOS_SPLIT, INFOS_PATH, END_OF_STAGE_HYPHEN, END_OF_TECH_INFOS_HEADERS } from './regexp.js'
import { SYMBOL_OPERATIONS } from './references.js'
// Infos about the opertation are given by 7z on the stdout. They can be:
// - colon-seprated: `Creating archive: DirNew/BaseExt.7z`
// - equal-separated: `Method = LZMA2:12`
// - two on one line: `Prop 1: Data 1, # Prop 2: Data 2`
// - in the HEADERS or in the FOOTERS
// This function match if the current line contains some infos. A **Map** is
// used to store infos in the stream.
export function matchInfos (stream, line) {
const infosLine = line
.split(INFOS_SPLIT)
.map(res => res.match(INFOS))
.filter(res => (res))
if (infosLine.length === 0) {
return null
}
const infos = new Map()
for (const info of infosLine) {
infos.set(info.groups.property, info.groups.value)
}
return infos
}
// Most 7z commands doesn't outputs a specific marker for the end of headers.
// Instead we can check if the line is a match for body.
// The end of headers for commands that uses `matchBodySymbol()` is only
// delimited by a `matchBodySymbol() === true`. Retunring a truthly value would
// cause the current loop iteration to end and doing so missing to push the
// current line to the stream, so we have to push in here.
export function matchEndOfHeadersSymbol (stream, line) {
return stream._matchBodyData(stream, line)
}
// Some 7z commands uses a `--- -----` like string as a maker for the end of
// headers and the end of headers. The position of spaces are saved in the
// stream to be exploited by the `matchBodyList()` function
export function matchEndOfHeadersHyphen (stream, line) {
const isEnd = END_OF_STAGE_HYPHEN.test(line)
if (isEnd) {
stream._columnsPositions = Array.from(line)
.map(getSpacesPosition)
.filter(Number.isInteger)
return line
}
return null
}
export function matchEndOfHeadersTechInfo (stream, line) {
const isEnd = END_OF_TECH_INFOS_HEADERS.test(line)
if (isEnd) {
return line
}
return null
}
// Progress as a percentage is only displayed to stdout when the `-bsp1` switch
// is specified. Progress can has several forms:
// - only percent: ` 0%`
// - with file count: ` 23% 4`
// - with file name: ` 23% 4 file.txt`
export function matchProgress (stream, line) {
if (isEmpty(line)) {
return null
}
const match = line.match(BODY_PROGRESS)
if (match) {
return {
percent: Number.parseInt(match.groups.percent),
fileCount: Number.parseInt(match.groups.fileCount),
file: normalizePath(match.groups.file)
}
}
return null
}
// Most 7z command outputs body as a symbol-filename pair. The symbol is an
// unique character that represents the state of the opertaion applied by the
// command to the file. E.g.:
// - testing file: `T file/to/test.txt`
// - adding file to archive: `+ file/to/add.txt`
export function matchBodySymbol (stream, line) {
const match = line.match(BODY_SYMBOL_FILE)
if (match) {
match.groups.file = normalizePath(match.groups.file)
const data = {
symbol: match.groups.symbol,
file: normalizePath(match.groups.file),
status: SYMBOL_OPERATIONS[match.groups.symbol]
}
return data
}
return null
}
// 7z list command ouptuts lines that looks like this:
// 2018-09-29 09:06:15 ....A 9 24 DirHex/42550418a4ef9
// The caveat is that each value can be empty. So we don't use a Regexp but
// the values of were the columns are to split the line into an object.
export function matchBodyList (stream, line) {
const raw = {}
try {
const columns = stream._columnsPositions
raw.datetime = line.substring(0, columns[0])
raw.attributes = line.substring(columns[0], columns[1])
raw.size = line.substring(columns[1], columns[2])
raw.sizeCompressed = line.substring(columns[2], columns[3])
raw.file = line.substring(columns[3])
} catch (err) {
return null
}
const datetime = (!isEmpty(raw.datetime)) ? new Date(Date.parse(raw.datetime)) : undefined
const attributes = (!isEmpty(raw.attributes)) ? raw.attributes.trim() : undefined
const size = (!isEmpty(raw.size)) ? Number.parseInt(raw.size) : undefined
const sizeCompressed = (!isEmpty(raw.sizeCompressed)) ? Number.parseInt(raw.sizeCompressed) : undefined
const file = (!isEmpty(raw.file)) ? normalizePath(raw.file.trim()) : undefined
return { datetime, attributes, size, sizeCompressed, file }
}
// Hash command outputs body as a hash-size-filename trio. Some data can be
// empty. E.g.:
// - hash with all info: `hebdf6 43 hashed/file.txt`
// - hash with some info: `hebdf6 hashed/file.txt`
// - hash for directories: ` hashed/file.txt`
export function matchBodyHash (stream, line) {
if (isEmpty(line)) {
return null
}
const match = line.match(BODY_HASH)
if (match) {
return {
hash: match.groups.hash,
size: Number.parseInt(match.groups.size),
file: normalizePath(match.groups.file)
}
}
return null
}
// List command with -slt switch. This commands outputs multiples lines per
// file. E.g.:
// Path = DirImages/LICENSE
// Size = 37
// Packed Size = 18292718
// Modified = 2018-10-02 21:45:49
// Attributes = A_ -rw-r--r--
// CRC = F303F60C
// Encrypted = -
// Method = LZMA2:24
// Block = 0
// *Path* is the first and *Block* is the last so we use that to mark the end
// of data. The end of the output is marked by 2 empty lines
export function matchBodyTechInfo (stream, line) {
if (!stream._lastLines) {
stream._lastLines = ['', '']
}
stream._lastLines[1] = stream._lastLines[0]
stream._lastLines[0] = line
if (isEmpty(line)) {
if (isEmpty(stream._lastLines[1])) {
return null
}
return {
file: stream._lastTechInfo.get('Path'),
techInfo: stream._lastTechInfo
}
}
const match = line.match(INFOS)
if (match) {
if (match.groups.property === 'Path') {
stream._lastTechInfo = new Map()
match.groups.value = normalizePath(match.groups.value)
}
stream._lastTechInfo.set(match.groups.property, match.groups.value)
}
return null
}
// This function determines if the end of the body section has been reached,
// an empty line is emited by 7z at the end of the body, so this function
// use this as an indicator.
// When the progress switch is activated the `formatByLine()` method adds
// additionnal empty lines: By adding a marker to the `SevenZipStream` object
// the function can detect two empty lines in a row.
export function matchEndOfBodySymbol (stream, line) {
const isLastLineEmpty = (stream._lastLineEmpty)
if (!isEmpty(line)) {
stream._lastLineEmpty = false
return null
} else if (!stream._isProgressFlag) {
return true
} else if (isLastLineEmpty) {
return true
} else {
stream._lastLineEmpty = true
return null
}
}
function getSpacesPosition (char, indexOfChar) {
return (char === ' ') ? indexOfChar : null
}
function isEmpty (string) {
return (string.trim().length === 0)
}
// Given a command, the formating of STAGES HEADERS, BODY and FOOTERS differs,
// so each command as it's particular set of parser functions. ie:
// - To identify the end of the BODY stage a list command outputs a
// `---- -- --- ---` line.
// - An extract command outpus the FOOTERS after after an empty line.
export const fetch = (command, parser) => {
const PARSERS = {
add: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
delete: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
extract: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
extractFull: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
hash: {
bodyData: matchBodyHash,
endOfHeaders: matchEndOfHeadersHyphen,
endOfBody: matchEndOfHeadersHyphen,
dataType: 'table'
},
list: {
bodyData: matchBodyList,
endOfHeaders: matchEndOfHeadersHyphen,
endOfBody: matchEndOfHeadersHyphen,
dataType: 'table'
},
listTechInfo: {
bodyData: matchBodyTechInfo,
endOfHeaders: matchEndOfHeadersTechInfo,
endOfBody: matchEndOfHeadersHyphen,
dataType: 'showTechInfo'
},
rename: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
test: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
},
update: {
bodyData: matchBodySymbol,
endOfHeaders: matchEndOfHeadersSymbol,
endOfBody: matchEndOfBodySymbol,
dataType: 'symbol'
}
}
return PARSERS[command][parser]
}
export default {
matchInfos,
matchEndOfHeadersSymbol,
matchEndOfHeadersHyphen,
matchEndOfHeadersTechInfo,
matchProgress,
matchBodySymbol,
matchBodyList,
matchBodyHash,
matchBodyTechInfo,
matchEndOfBodySymbol,
fetch
}