trufflesuite/truffle

View on GitHub
packages/codec/lib/contexts/utils.ts

Summary

Maintainability
A
0 mins
Test Coverage
import debugModule from "debug";
const debug = debugModule("codec:contexts:utils");

import * as Evm from "@truffle/codec/evm";
import type * as Compilations from "@truffle/codec/compilations";
import type * as Ast from "@truffle/codec/ast";
import * as Conversion from "@truffle/codec/conversion";
import type { CompilerVersion } from "@truffle/codec/compiler";
import type { Context, Contexts } from "./types";
import escapeRegExp from "lodash/escapeRegExp";
import * as cbor from "cbor";
import { Shims } from "@truffle/compile-common";
import * as Abi from "@truffle/abi-utils";
import type * as Common from "@truffle/codec/common";
import * as AbiDataUtils from "@truffle/codec/abi-data/utils";

export function findContext(
  contexts: Contexts,
  binary: string
): Context | null {
  const matchingContexts = Object.values(contexts).filter(context =>
    matchContext(context, binary)
  );
  //rather than just pick an arbitrary matching context, we're going
  //to pick one that isn't a descendant of any of the others.
  //(if there are multiple of *those*, then yeah it's arbitrary.)
  const context = matchingContexts.find(
    descendant =>
      !matchingContexts.some(
        ancestor =>
          descendant.compilationId === ancestor.compilationId &&
          descendant.linearizedBaseContracts &&
          ancestor.contractId !== undefined &&
          descendant.linearizedBaseContracts
            .slice(1)
            .includes(ancestor.contractId)
        //we do slice one because everything is an an ancestor of itself; we only
        //care about *proper* ancestors
      )
  );
  return context || null;
}

export function matchContext(context: Context, givenBinary: string): boolean {
  const { binary, compiler, isConstructor } = context;
  const lengthDifference = givenBinary.length - binary.length;
  //first: if it's not a constructor, and it's not Vyper,
  //they'd better be equal in length.
  //if it is a constructor, or is Vyper,
  //the given binary must be at least as long,
  //and the difference must be a multiple of 32 bytes (64 hex digits)
  const additionalAllowed =
    isConstructor || (compiler != undefined && compiler.name === "vyper");
  if (
    (!additionalAllowed && lengthDifference !== 0) ||
    lengthDifference < 0 ||
    lengthDifference % (2 * Evm.Utils.WORD_SIZE) !== 0
  ) {
    return false;
  }
  for (let i = 0; i < binary.length; i++) {
    //note: using strings like arrays is kind of dangerous in general in JS,
    //but everything here is ASCII so it's fine
    //note that we need to compare case-insensitive, since Solidity will
    //put addresses in checksum case in the compiled source
    //(we don't actually need that second toLowerCase(), but whatever)
    if (
      binary[i] !== "." &&
      binary[i].toLowerCase() !== givenBinary[i].toLowerCase()
    ) {
      return false;
    }
  }
  return true;
}

export function normalizeContexts(contexts: Contexts): Contexts {
  //unfortunately, due to our current link references format, we can't
  //really use the binary from the artifact directly -- neither for purposes
  //of matching, nor for purposes of decoding internal functions.  So, we
  //need to perform this normalization step on our contexts before using
  //them.  Once we have truffle-db, this step should largely go away.

  debug("normalizing contexts");

  //first, let's clone the input
  //(let's do a 2-deep clone because we'll be altering binary & compiler)
  let newContexts: Contexts = Object.assign(
    {},
    ...Object.entries(contexts).map(([contextHash, context]) => ({
      [contextHash]: { ...context }
    }))
  );

  debug("contexts cloned");

  //next, we get all the library names and sort them descending by length.
  //We're going to want to go in descending order of length so that we
  //don't run into problems when one name is a substring of another.
  //For simplicity, we'll exclude names of length <38, because we can
  //handle these with our more general check for link references at the end
  const fillerLength = 2 * Evm.Utils.ADDRESS_SIZE;
  let names = Object.values(newContexts)
    .filter(context => context.contractKind === "library")
    .map(context => context.contractName)
    .filter(name => name.length >= fillerLength - 3)
    //the -3 is for 2 leading underscores and 1 trailing
    .sort((name1, name2) => name2.length - name1.length);

  debug("names sorted");

  //now, we need to turn all these names into regular expressions, because,
  //unfortunately, str.replace() will only replace all if you use a /g regexp;
  //note that because names may contain '$', we need to escape them
  //(also we prepend "__" because that's the placeholder format)
  let regexps = names.map(name => new RegExp(escapeRegExp("__" + name), "g"));

  debug("regexps prepared");

  //having done so, we can do the replace for these names!
  const replacement = ".".repeat(fillerLength);
  for (let regexp of regexps) {
    for (let context of Object.values(newContexts)) {
      context.binary = context.binary.replace(regexp, replacement);
    }
  }

  debug("long replacements complete");

  //now we can do a generic replace that will catch all names of length
  //<40, while also catching the Solidity compiler's link reference format
  //as well as Truffle's.  Hooray!
  const genericRegexp = new RegExp("_.{" + (fillerLength - 2) + "}_", "g");
  //we're constructing the regexp /_.{38}_/g, but I didn't want to use a
  //literal 38 :P
  for (let context of Object.values(newContexts)) {
    context.binary = context.binary.replace(genericRegexp, replacement);
  }

  debug("short replacements complete");
  //now we must handle the delegatecall guard -- libraries' deployedBytecode will include
  //0s in place of their own address instead of a link reference at the
  //beginning, so we need to account for that too
  const pushAddressInstruction = (0x5f + Evm.Utils.ADDRESS_SIZE).toString(16); //"73"
  for (let context of Object.values(newContexts)) {
    if (context.contractKind === "library" && !context.isConstructor) {
      context.binary = context.binary.replace(
        "0x" + pushAddressInstruction + "00".repeat(Evm.Utils.ADDRESS_SIZE),
        "0x" + pushAddressInstruction + replacement
      );
    }
  }

  debug("extra library replacements complete");

  //now let's handle immutable references
  //(these are much nicer than link references due to not having to deal with the old format)
  for (let context of Object.values(newContexts)) {
    if (context.immutableReferences) {
      for (let variable of Object.values(context.immutableReferences)) {
        for (let { start, length } of <{ start: number; length: number }[]>(
          variable
        )) {
          //Goddammit TS
          let lowerStringIndex = 2 + 2 * start;
          let upperStringIndex = 2 + 2 * (start + length);
          context.binary =
            context.binary.slice(0, lowerStringIndex) +
            "..".repeat(length) +
            context.binary.slice(upperStringIndex);
        }
      }
    }
  }

  debug("immutables complete");

  //now: extract & decode all the cbor's.  we're going to use these for
  //two different purposes, so let's just get them all upfront.
  let cborInfos: { [contextHash: string]: CborInfo } = {};
  let decodedCbors: { [contextHash: string]: any } = {};
  //note: invalid cbor will be indicated in decodedCbors by the lack of an entry,
  //*not* by undefined or null, since there exists cbor for those :P

  for (const [contextHash, context] of Object.entries(newContexts)) {
    const cborInfo = extractCborInfo(context.binary);
    cborInfos[contextHash] = cborInfo;
    if (cborInfo) {
      try {
        //note this *will* throw if there's data left over,
        //which is what we want it to do
        const decoded: any = cbor.decodeFirstSync(cborInfo.cbor);
        decodedCbors[contextHash] = decoded;
      } catch {
        //just don't add it
      }
    }
  }

  debug("intial cbor processing complete");

  //now: if a context lacks a compiler, but a version can be found in the
  //cbor, add it.
  for (let [contextHash, context] of Object.entries(newContexts)) {
    if (!context.compiler && contextHash in decodedCbors) {
      context.compiler = detectCompilerInfo(decodedCbors[contextHash]);
    }
  }

  debug("versions complete");

  //one last step: where there's CBOR with a metadata hash, we'll allow the
  //CBOR to vary, aside from the length (note: ideally here we would *only*
  //dot-out the metadata hash part of the CBOR, but, well, it's not worth the
  //trouble to detect that; doing that could potentially get pretty involved)
  //note that if the code isn't Solidity, that's fine -- we just won't get
  //valid CBOR and will not end up adding to our list of regular expressions
  const externalCborInfos = Object.entries(cborInfos)
    .filter(
      ([contextHash, _cborInfo]) =>
        contextHash in decodedCbors &&
        isObjectWithHash(decodedCbors[contextHash])
    )
    .map(([_contextHash, cborInfo]) => cborInfo);
  const cborRegexps = externalCborInfos.map(cborInfo => ({
    input: new RegExp(cborInfo.cborSegment, "g"), //hex string so no need for escape
    output: "..".repeat(cborInfo.cborLength) + cborInfo.cborLengthHex
  }));
  //HACK: we will replace *every* occurrence of *every* external CBOR occurring
  //in *every* context, in order to cover created contracts (including if there
  //are multiple or recursive ones)
  for (let context of Object.values(newContexts)) {
    for (let { input, output } of cborRegexps) {
      context.binary = context.binary.replace(input, output);
    }
  }

  debug("external wildcards complete");

  //finally, return this mess!
  return newContexts;
}

interface CborInfo {
  cborStart: number;
  cborLength: number;
  cborEnd: number;
  cborLengthHex: string;
  cbor: string;
  cborSegment: string;
}

//returns cbor info if cbor section is found, null if it is not.
//note that it does not account for Vyper 0.3.4's idiosyncratic format
//and so will return null on that.  but that's OK, because Vyper 0.3.4's
//CBOR section is always fixed, so there isn't a need to normalize it here
function extractCborInfo(binary: string): CborInfo | null {
  debug("extracting cbor segement of %s", binary);
  const lastTwoBytes = binary.slice(2).slice(-2 * 2); //2 bytes * 2 for hex
  //the slice(2) there may seem unnecessary; it's to handle the possibility that the contract
  //has less than two bytes in its bytecode (that won't happen with Solidity, but let's be
  //certain)
  if (lastTwoBytes.length < 2 * 2) {
    return null; //don't try to handle this case!
  }
  const cborLength: number = parseInt(lastTwoBytes, 16);
  const cborEnd = binary.length - 2 * 2;
  const cborStart = cborEnd - cborLength * 2;
  //sanity check
  if (cborStart < 2) {
    //"0x"
    return null; //don't try to handle this case!
  }
  const cbor = binary.slice(cborStart, cborEnd);
  return {
    cborStart,
    cborLength,
    cborEnd,
    cborLengthHex: lastTwoBytes,
    cbor,
    cborSegment: cbor + lastTwoBytes
  };
}

function isObjectWithHash(decoded: any): boolean {
  if (typeof decoded !== "object" || decoded === null) {
    return false;
  }
  //cbor sometimes returns maps and sometimes objects,
  //so let's make things consistent by converting to a map
  //(actually, is this true? borc did this, I think cbor
  //does too, but I haven't checked recently)
  if (!(decoded instanceof Map)) {
    decoded = new Map(Object.entries(decoded));
  }
  const hashKeys = ["bzzr0", "bzzr1", "ipfs"];
  return hashKeys.some(key => decoded.has(key));
}

//returns undefined if no valid compiler info detected
//(if it detects solc but no version, it will not return
//a partial result, just undefined)
function detectCompilerInfo(decoded: any): CompilerVersion | undefined {
  if (typeof decoded !== "object" || decoded === null) {
    return undefined;
  }
  //cbor sometimes returns maps and sometimes objects,
  //so let's make things consistent by converting to a map
  //(although see note above?)
  if (!(decoded instanceof Map)) {
    decoded = new Map(Object.entries(decoded));
  }
  if (!decoded.has("solc")) {
    //return undefined if the solc version field is not present
    //(this occurs if version <0.5.9)
    //currently no other language attaches cbor info, so, yeah
    return undefined;
  }
  const rawVersion = decoded.get("solc");
  if (typeof rawVersion === "string") {
    //for prerelease versions, the version is stored as a string.
    return {
      name: "solc",
      version: rawVersion
    };
  } else if (rawVersion instanceof Uint8Array && rawVersion.length === 3) {
    //for release versions, it's stored as a bytestring of length 3, with the
    //bytes being major, minor, patch. so we just join them with "." to form
    //a version string (although it's missing precise commit & etc).
    return {
      name: "solc",
      version: rawVersion.join(".")
    };
  } else {
    //return undefined on anything else
    return undefined;
  }
}

export function makeContext(
  contract: Compilations.Contract,
  node: Ast.AstNode | undefined,
  compilation: Compilations.Compilation,
  isConstructor = false
): Context {
  const abi = Abi.normalize(contract.abi);
  const bytecode = isConstructor
    ? contract.bytecode
    : contract.deployedBytecode;
  const binary: string = Shims.NewToLegacy.forBytecode(bytecode);
  const hash = Conversion.toHexString(
    Evm.Utils.keccak256({
      type: "string",
      value: binary
    })
  );
  debug("hash: %s", hash);
  const fallback =
    <Abi.FallbackEntry>abi.find(abiEntry => abiEntry.type === "fallback") ||
    null; //TS is failing at inference here
  const receive =
    <Abi.ReceiveEntry>abi.find(abiEntry => abiEntry.type === "receive") || null; //and here
  return {
    context: hash,
    contractName: contract.contractName,
    binary,
    contractId: node ? node.id : undefined,
    linearizedBaseContracts: node ? node.linearizedBaseContracts : undefined,
    contractKind: contractKind(contract, node),
    immutableReferences: isConstructor
      ? undefined
      : contract.immutableReferences,
    isConstructor,
    abi: AbiDataUtils.computeSelectors(abi),
    payable: AbiDataUtils.abiHasPayableFallback(abi),
    fallbackAbi: { fallback, receive },
    compiler: compilation.compiler || contract.compiler,
    compilationId: compilation.id
  };
}

//attempts to determine if the given contract is a library or not
function contractKind(
  contract: Compilations.Contract,
  node?: Ast.AstNode
): Common.ContractKind {
  //first: if we have a node, use its listed contract kind
  if (node) {
    return node.contractKind;
  }
  //next: check the contract kind field on the contract object itself, if it exists.
  //however this isn't implemented yet so we'll skip it.
  //next: if we have no direct info on the contract kind, but we do
  //have the deployed bytecode, we'll use a HACK:
  //we'll assume it's an ordinary contract, UNLESS its deployed bytecode begins with
  //PUSH20 followed by 20 0s, in which case we'll assume it's a library
  //(note: this will fail to detect libraries from before Solidity 0.4.20)
  if (contract.deployedBytecode) {
    const deployedBytecode = Shims.NewToLegacy.forBytecode(
      contract.deployedBytecode
    );
    const pushAddressInstruction = (0x5f + Evm.Utils.ADDRESS_SIZE).toString(16); //"73"
    const libraryString =
      "0x" + pushAddressInstruction + "00".repeat(Evm.Utils.ADDRESS_SIZE);
    return deployedBytecode.startsWith(libraryString) ? "library" : "contract";
  }
  //finally, in the absence of anything to go on, we'll assume it's an ordinary contract
  return "contract";
}