iterative/vscode-dvc

View on GitHub
extension/src/fileSystem/index.ts

Summary

Maintainability
B
4 hrs
Test Coverage
A
96%
import {
  basename,
  extname,
  join,
  parse,
  relative,
  resolve,
  sep,
  format,
  dirname
} from 'path'
import {
  appendFileSync,
  ensureFileSync,
  existsSync,
  lstatSync,
  readdir,
  readFileSync,
  removeSync,
  writeFileSync
} from 'fs-extra'
import { Uri, workspace, window, commands, ViewColumn } from 'vscode'
import { csv2json, json2csv } from 'json-2-csv'
import yaml from 'yaml'
import { standardizePath } from './path'
import { findFiles } from './workspace'
import { definedAndNonEmpty, sortCollectedArray } from '../util/array'
import { Logger } from '../common/logger'
import { gitPath } from '../cli/git/constants'
import { createValidInteger } from '../util/number'
import { processExists } from '../process/execution'
import { getFirstWorkspaceFolder } from '../vscode/workspaceFolders'
import { DOT_DVC, FULLY_NESTED_DVC } from '../cli/dvc/constants'
import { delay } from '../util/time'
import { PlotConfigData, PlotConfigDataAxis } from '../pipeline/quickPick'

export const exists = (path: string): boolean => existsSync(path)

const checkStats = (path: string, check: 'isDirectory' | 'isFile'): boolean => {
  try {
    return lstatSync(path)[check]()
  } catch {
    return false
  }
}

export const isDirectory = (path: string): boolean =>
  checkStats(path, 'isDirectory')

export const isFile = (path: string): boolean => checkStats(path, 'isFile')

export const getModifiedTime = (path: string): number => {
  if (exists(path)) {
    return lstatSync(path).mtime.getTime()
  }
  return -1
}

export const findSubRootPaths = async (
  cwd: string,
  dotDir: string
): Promise<string[] | undefined> => {
  const children = await readdir(cwd)

  return children
    .filter(child => isDirectory(join(cwd, child, dotDir)))
    .map(child => standardizePath(join(cwd, child)))
}

export const findDvcRootPaths = async (): Promise<Set<string>> => {
  const dvcRoots = []

  const nested = await findFiles(FULLY_NESTED_DVC)
  if (definedAndNonEmpty(nested)) {
    dvcRoots.push(
      ...nested.map(nestedRoot => standardizePath(dirname(dirname(nestedRoot))))
    )
  }

  return new Set(sortCollectedArray(dvcRoots))
}

export const findAbsoluteDvcRootPath = async (
  cwd: string,
  relativePathPromise: Promise<string | undefined>
): Promise<string | undefined> => {
  const relativePath = await relativePathPromise
  if (!relativePath) {
    return
  }

  return standardizePath(resolve(cwd, relativePath))
}

// .git inside a submodule is a file with the following content: `gitdir: ../.git/modules/demo`
const findDotGitDir = (gitRoot: string) => {
  const dotGitPath = join(gitRoot, gitPath.DOT_GIT)

  const isSubmodule = isFile(dotGitPath)
  if (isSubmodule) {
    const dotGitAsFileContent = readFileSync(dotGitPath, 'utf8')
    const gitDirPrefix = 'gitdir: '
    const gitDirLine = dotGitAsFileContent
      .split(/\r?\n/)
      .find(line => line.indexOf(gitDirPrefix) === 0)
    return resolve(gitRoot, ...(gitDirLine?.slice(8).split('/') || []))
  }
  return dotGitPath
}

const gitRootGitDir: { [key: string]: string } = {}

export const getGitPath = (gitRoot: string, path: string) => {
  const gitDir = gitRootGitDir[gitRoot] || findDotGitDir(gitRoot)
  gitRootGitDir[gitRoot] = gitDir

  if (path === gitPath.DOT_GIT) {
    return gitDir
  }

  return join(gitDir, path)
}

export const isSameOrChild = (root: string, path: string) => {
  const rel = relative(root, path)
  return !rel.startsWith('..')
}

export const isPathInSubProject = (
  path: string,
  subProjects: string[]
): boolean => subProjects.some(dvcRoot => path.startsWith(dvcRoot))

export const isPathInProject = (
  path: string | undefined,
  dvcRoot: string,
  subProjects: string[]
): boolean =>
  !!path?.startsWith(dvcRoot) &&
  path !== dvcRoot &&
  !isPathInSubProject(path, subProjects)

export type Out =
  | string
  | Record<string, { checkpoint?: boolean; cache?: boolean }>

export type PartialDvcYaml = {
  stages: {
    [stage: string]: {
      outs?: Out[]
    }
  }
}

export const isAnyDvcYaml = (path?: string): boolean =>
  !!(
    path &&
    (extname(path) === DOT_DVC ||
      basename(path) === 'dvc.lock' ||
      basename(path) === 'dvc.yaml')
  )

export const openFileInEditor = async (filePath: string) => {
  const document = await workspace.openTextDocument(Uri.file(filePath))
  await window.showTextDocument(document)
  return document
}

export const openImageFileInEditor = async (imagePath: string) =>
  await commands.executeCommand('vscode.open', Uri.file(imagePath), {
    viewColumn: ViewColumn.Beside
  })

export const hasDvcYamlFile = (cwd: string) => existsSync(`${cwd}/dvc.yaml`)

export const findOrCreateDvcYamlFile = (
  cwd: string,
  trainingScript: string,
  stageName: string,
  command: string,
  applyRelativePath: boolean
) => {
  const dvcYamlPath = `${cwd}/dvc.yaml`
  ensureFileSync(dvcYamlPath)

  const scriptPath = applyRelativePath
    ? relative(cwd, trainingScript)
    : format(parse(trainingScript))

  const pipeline = `# Type dvc-help in this file and hit enter to get more information on how the extension can help to setup pipelines
stages:
  ${stageName}:
    cmd: ${command} ${scriptPath}
    deps:
      - ${scriptPath}
`

  void openFileInEditor(dvcYamlPath)
  return appendFileSync(dvcYamlPath, pipeline)
}

const loadYamlAsDoc = (
  path: string
): { doc: yaml.Document; lineCounter: yaml.LineCounter } | undefined => {
  try {
    const lineCounter = new yaml.LineCounter()
    return {
      doc: yaml.parseDocument(readFileSync(path, 'utf8'), { lineCounter }),
      lineCounter
    }
  } catch {
    Logger.error(`failed to load yaml ${path}`)
  }
}

const formatPlotYamlObjAxis = (axis: PlotConfigDataAxis) => {
  const formattedAxis: { [file: string]: string | string[] } = {}

  for (const [file, fields] of Object.entries(axis)) {
    if (fields.length === 1) {
      formattedAxis[file] = fields[0]
      continue
    }

    formattedAxis[file] = fields
  }

  return formattedAxis
}

const getPlotYamlObj = (plot: PlotConfigData) => {
  const { x, y, template, title } = plot

  const yFiles = Object.keys(y)
  const xFiles = Object.keys(x)
  const firstXFile = xFiles[0]
  const oneFileUsed =
    yFiles.length === 1 && xFiles.length === 1 && yFiles[0] === firstXFile

  return {
    [title]: {
      template,
      x: oneFileUsed ? x[firstXFile][0] : formatPlotYamlObjAxis(x),
      y: formatPlotYamlObjAxis(y)
    }
  }
}

const getPlotsYaml = (plotObj: PlotConfigData, indentSearchLines: string[]) => {
  const indentReg = /^( +)[^ ]/
  const indentLine = indentSearchLines.find(line => indentReg.test(line)) || ''
  const spacesMatches = indentLine.match(indentReg)
  const spaces = spacesMatches?.[1].length || 2

  const newPlotLines = yaml
    .stringify({ plots: [getPlotYamlObj(plotObj)] }, { indent: spaces })
    .split('\n')

  const doesYamlListItemHaveNoIndent = indentSearchLines.find(line =>
    line.startsWith('-')
  )
  return doesYamlListItemHaveNoIndent
    ? newPlotLines.map(line =>
        line.startsWith(' ') ? line.slice(spaces) : line
      )
    : newPlotLines
}

export const addPlotToDvcYamlFile = (cwd: string, plotObj: PlotConfigData) => {
  const dvcYamlFile = `${cwd}/dvc.yaml`
  ensureFileSync(dvcYamlFile)
  const dvcYamlDoc = loadYamlAsDoc(dvcYamlFile)

  if (!dvcYamlDoc) {
    return
  }

  const { doc, lineCounter } = dvcYamlDoc

  const dvcYamlLines = readFileSync(dvcYamlFile, 'utf8').split('\n')
  const plots = doc.get('plots', true) as yaml.YAMLSeq | undefined

  if (!plots?.range) {
    const plotYaml = getPlotsYaml(plotObj, dvcYamlLines)
    dvcYamlLines.push(...plotYaml)

    void openFileInEditor(dvcYamlFile)
    return writeFileSync(dvcYamlFile, dvcYamlLines.join('\n'))
  }

  const plotsEndPos = lineCounter.linePos(plots.range[2]).line
  const arePlotsAtBottomOfFile =
    plotsEndPos === dvcYamlLines.length &&
    dvcYamlLines[dvcYamlLines.length - 1].trim() !== ''
  const insertLineNum = arePlotsAtBottomOfFile ? plotsEndPos : plotsEndPos - 1

  const plotsStartPos = lineCounter.linePos(plots.range[0]).line - 1
  const plotYaml = getPlotsYaml(
    plotObj,
    dvcYamlLines.slice(plotsStartPos, insertLineNum)
  )
  dvcYamlLines.splice(insertLineNum, 0, ...plotYaml.slice(1))

  void openFileInEditor(dvcYamlFile)
  return writeFileSync(dvcYamlFile, dvcYamlLines.join('\n'))
}

export const writeFile = (path: string, contents: string): void => {
  ensureFileSync(path)
  return writeFileSync(path, contents)
}

export const getFileExtension = (filePath: string) => parse(filePath).ext

export const relativeWithUri = (dvcRoot: string, uri: Uri) =>
  relative(dvcRoot, uri.fsPath)

export const removeDir = (path: string): void => removeSync(path)

const loadYaml = <T>(path: string): T | undefined => {
  try {
    return yaml.parse(readFileSync(path, 'utf8')) as T
  } catch {
    Logger.error(`failed to load yaml ${path}`)
  }
}

export const loadJson = <T>(path: string): T | undefined => {
  try {
    return JSON.parse(readFileSync(path).toString()) as T
  } catch {
    Logger.error(`failed to load JSON from ${path}`)
  }
}

const loadCsv = (path: string) => {
  try {
    const content = readFileSync(path).toString()

    return csv2json(content)
  } catch {
    Logger.error(`failed to load CSV from ${path}`)
  }
}

const loadTsv = (path: string) => {
  try {
    const content = readFileSync(path).toString()

    return csv2json(content, { delimiter: { field: '\t' } })
  } catch {
    Logger.error(`failed to load TSV from ${path}`)
  }
}

const loadDataFile = (file: string): unknown => {
  const ext = getFileExtension(file)

  switch (ext) {
    case '.csv':
      return loadCsv(file)
    case '.json':
      return loadJson<Record<string, unknown> | unknown[]>(file)
    case '.tsv':
      return loadTsv(file)
    case '.yaml':
      return loadYaml<Record<string, unknown>>(file)
  }
}

export const loadDataFiles = async (
  files: string[]
): Promise<{ file: string; data: unknown }[]> => {
  const filesData: { file: string; data: unknown }[] = []
  for (const file of files) {
    const data = await loadDataFile(file)
    filesData.push({ data, file })
  }
  return filesData
}

export const writeJson = <
  T extends Record<string, unknown> | Array<Record<string, unknown>>
>(
  path: string,
  obj: T,
  format = false
): void => {
  const json = format ? JSON.stringify(obj, null, 4) : JSON.stringify(obj)
  return writeFile(path, json)
}

export const writeCsv = (path: string, arr: Array<Record<string, unknown>>) => {
  const csv = json2csv(arr)
  return writeFile(path, csv)
}

export const writeTsv = (path: string, arr: Array<Record<string, unknown>>) => {
  const csv = json2csv(arr, { delimiter: { field: '\t' } })
  return writeFile(path, csv)
}

const getPid = (contents: string): number | undefined => {
  try {
    const { pid } = JSON.parse(contents) as { pid?: string }
    if (pid) {
      return createValidInteger(pid)
    }
  } catch {}
  return createValidInteger(contents)
}

export const getPidFromFile = async (
  path: string
): Promise<number | undefined> => {
  if (!exists(path)) {
    return
  }

  const contents = readFileSync(path).toString()
  const pid = getPid(contents)

  if (!pid || !(await processExists(pid))) {
    removeSync(path)
    return
  }
  return pid
}

export const getEntryFromJsonFile = (
  path: string,
  key: string
): string | undefined => {
  const json = loadJson(path)
  if (!json) {
    return
  }

  try {
    return (json as { [key: string]: string })[key]
  } catch {}
}

export const checkSignalFile = async (path: string): Promise<boolean> => {
  return !!(await getPidFromFile(path))
}

export const pollSignalFileForProcess = async (
  path: string,
  callback: () => void,
  ms = 5000
): Promise<void> => {
  await delay(ms)
  const signalIsValid = await checkSignalFile(path)
  if (signalIsValid) {
    return pollSignalFileForProcess(path, callback, ms)
  }
  return callback()
}

export const getBinDisplayText = (
  path: string | undefined
): string | undefined => {
  if (!path) {
    return
  }

  const workspaceRoot = getFirstWorkspaceFolder()
  if (!workspaceRoot) {
    return path
  }

  return isSameOrChild(workspaceRoot, path)
    ? '.' + sep + relative(workspaceRoot, path)
    : path
}

export const showSaveDialog = (fileName: string, extname: string) =>
  window.showSaveDialog({
    defaultUri: Uri.file(fileName),
    filters: { [extname.toUpperCase()]: [extname] }
  })