src/connectors/Excel.ts
/*
* Copyright (c) 2018 Rain Agency <contact@rain.agency>
* Author: Rain Agency <contact@rain.agency>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
import fs from "fs-extra";
import _ from "lodash";
import xlsx from "node-xlsx";
import path from "path";
import { IVoxaSheet, SheetTypes } from "../VoxaSheet";
import { findSheetType, rowFormatted } from "./utils";
import { filterSheets } from "../Processor";
function findLocalFiles(spreadsheet: string): string[] {
const fsStats = fs.lstatSync(spreadsheet);
if (fsStats.isDirectory()) {
return fs
.readdirSync(spreadsheet)
.filter(f => _.endsWith(f, ".xlsx") || _.endsWith(f, ".ods") || _.endsWith(f, ".fods"))
.map(f => path.join(spreadsheet, f));
}
return [spreadsheet];
}
function readFileCreateWorkbook(f: string) {
const workbook = xlsx.parse(f);
const spreadsheetTitle = _.last(f.split("/")) as string;
const spreadsheetId = f;
if (!_.isString(spreadsheetTitle)) {
return undefined;
}
return workbook.map(book => ({
spreadsheetId,
spreadsheetTitle,
sheetTitle: book.name,
type: "none",
data: processBookData(book.data).filter(row => row.length)
}));
}
function refactorExcelData(sheet: IVoxaSheet) {
const reestrictFormat = _.isEmpty(filterSheets([sheet], [SheetTypes.UTTERANCE]));
sheet.data = (_.chain(sheet).get("data") as any)
.map((next: any, index: number, arr: any) => {
if (index === 0) {
return next;
}
const head = arr[0];
const extraColumns = head.length - next.length;
if (extraColumns > 0) {
next = _.concat(next, _.fill(Array(extraColumns), undefined));
}
return next;
})
.reduce(
(acc: any[], next: any, iindex: number, arr: any[]) =>
rowFormatted(acc, next, iindex, arr, reestrictFormat),
[] as any[]
)
.drop()
.value();
return sheet;
}
export async function buildFromLocalExcel(
options: any,
spreadsheetKey: string
): Promise<IVoxaSheet[]> {
const vsheet = ((_.chain(options).get(spreadsheetKey) as any)
.map((f: string) => (f.indexOf("/") === 0 ? f : path.join(options.rootPath, f)))
.filter((spreadsheet: string) => fs.pathExistsSync(spreadsheet))
.map(findLocalFiles)
.flatten()
.map(readFileCreateWorkbook)
.flatten()
.compact()
.map(findSheetType)
.filter((sheet: IVoxaSheet) => _.get(sheet, "type") !== "none" && !_.isEmpty(sheet))
.map(refactorExcelData)
.value() as unknown) as IVoxaSheet[];
return vsheet;
}
function processBookData(data: string[][]) {
// this is all because of a bug in node-xlsx where the first cell of the first tab get's a lot
// of garbage appended to it's content
const titleRow = _.get(data, "[0]", "");
const firstColumnTitle = _.get(titleRow, "[0]", "");
const split = firstColumnTitle.split("\n");
if (split.length > 1) {
titleRow[0] = split[split.length - 1];
}
return data;
}