src/types/files/archives/sevenZip/sevenZip.ts
import path from 'node:path';
import _7z, { Result } from '7zip-min';
import async, { AsyncResultCallback } from 'async';
import { Mutex } from 'async-mutex';
import Defaults from '../../../../globals/defaults.js';
import Temp from '../../../../globals/temp.js';
import fsPoly from '../../../../polyfill/fsPoly.js';
import ExpectedError from '../../../expectedError.js';
import Archive from '../archive.js';
import ArchiveEntry from '../archiveEntry.js';
export default class SevenZip extends Archive {
private static readonly LIST_MUTEX = new Mutex();
// eslint-disable-next-line class-methods-use-this
protected new(filePath: string): Archive {
return new SevenZip(filePath);
}
static getExtensions(): string[] {
return ['.7z'];
}
// eslint-disable-next-line class-methods-use-this
getExtension(): string {
return SevenZip.getExtensions()[0];
}
async getArchiveEntries(checksumBitmask: number): Promise<ArchiveEntry<Archive>[]> {
/**
* WARN(cemmer): even with the above mutex, {@link _7z.list} will still sometimes return no
* entries. This seems to happen more on older Node.js versions (v16, v18) and specific OSes
* (Linux). Most archives contain at least one file, so assume this is wrong and attempt again
* up to 5 times total.
*/
const maxAttempts = 5;
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const archiveEntries = await this.getArchiveEntriesNotCached(checksumBitmask);
if (archiveEntries.length > 0) {
return archiveEntries;
}
// Backoff with jitter
if (attempt >= maxAttempts) {
break;
}
await new Promise((resolve) => {
setTimeout(resolve, Math.random() * (2 ** (attempt - 1) * 10));
});
}
return [];
}
private async getArchiveEntriesNotCached(checksumBitmask: number): Promise<ArchiveEntry<this>[]> {
/**
* WARN(cemmer): {@link _7z.list} seems to have issues with any amount of real concurrency,
* it will return no files but also no error. Try to prevent that behavior.
*/
const filesIn7z = await SevenZip.LIST_MUTEX.runExclusive(
async () =>
new Promise<Result[]>((resolve, reject) => {
_7z.list(this.getFilePath(), (err, result) => {
if (err) {
const msg = err.toString().replace(/\n\n+/g, '\n').replace(/^/gm, ' ').trim();
reject(msg);
} else {
// https://github.com/onikienko/7zip-min/issues/70
// If `7zip-min.list()` failed to parse the entry name then ignore it
resolve(result.filter((entry) => entry.name));
}
});
}),
);
return async.mapLimit(
filesIn7z.filter((result) => !result.attr?.startsWith('D')),
Defaults.ARCHIVE_ENTRY_SCANNER_THREADS_PER_ARCHIVE,
async (result, callback: AsyncResultCallback<ArchiveEntry<this>, Error>) => {
const archiveEntry = await ArchiveEntry.entryOf(
{
archive: this,
entryPath: result.name,
size: Number.parseInt(result.size, 10),
crc32: result.crc,
// If MD5, SHA1, or SHA256 is desired, this file will need to be extracted to calculate
},
checksumBitmask,
);
callback(undefined, archiveEntry);
},
);
}
async extractEntryToFile(entryPath: string, extractedFilePath: string): Promise<void> {
const tempDir = await fsPoly.mkdtemp(path.join(Temp.getTempDir(), '7z'));
try {
let tempFile = path.join(tempDir, entryPath);
await new Promise<void>((resolve, reject) => {
_7z.cmd(
[
// _7z.unpack() flags
'x',
this.getFilePath(),
'-y',
`-o${tempDir}`,
// https://github.com/onikienko/7zip-min/issues/71
// Extract only the single archive entry
entryPath,
'-r',
],
(err) => {
if (err) {
reject(err);
} else {
resolve();
}
},
);
});
// https://github.com/onikienko/7zip-min/issues/86
// Fix `7zip-min.list()` returning unicode entry names as � on Windows
if (process.platform === 'win32' && !(await fsPoly.exists(tempFile))) {
const files = await fsPoly.walk(tempDir);
if (files.length === 0) {
throw new ExpectedError('failed to extract any files');
} else if (files.length > 1) {
throw new ExpectedError('extracted too many files');
}
[tempFile] = files;
}
await fsPoly.mv(tempFile, extractedFilePath);
} finally {
await fsPoly.rm(tempDir, { recursive: true, force: true });
}
}
}