go_agent/src/code.google.com/p/go.tools/ssa/lift.go from cloudfoundry-community/bosh-cloudstack-cpi

go_agent/src/code.google.com/p/go.tools/ssa/lift.go
Summary

Maintainability

2 days
Test Coverage

Issues
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ssa

// This file defines the lifting pass which tries to "lift" Alloc
// cells (new/local variables) into SSA registers, replacing loads
// with the dominating stored value, eliminating loads and stores, and
// inserting φ-nodes as needed.

// Cited papers and resources:
//
// Ron Cytron et al. 1991. Efficiently computing SSA form...
// http://doi.acm.org/10.1145/115372.115320
//
// Cooper, Harvey, Kennedy.  2001.  A Simple, Fast Dominance Algorithm.
// Software Practice and Experience 2001, 4:1-10.
// http://www.hipersoft.rice.edu/grads/publications/dom14.pdf
//
// Daniel Berlin, llvmdev mailing list, 2012.
// http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-January/046638.html
// (Be sure to expand the whole thread.)

// TODO(adonovan): opt: there are many optimizations worth evaluating, and
// the conventional wisdom for SSA construction is that a simple
// algorithm well engineered often beats those of better asymptotic
// complexity on all but the most egregious inputs.
//
// Danny Berlin suggests that the Cooper et al. algorithm for
// computing the dominance frontier is superior to Cytron et al.
// Furthermore he recommends that rather than computing the DF for the
// whole function then renaming all alloc cells, it may be cheaper to
// compute the DF for each alloc cell separately and throw it away.
//
// Consider exploiting liveness information to avoid creating dead
// φ-nodes which we then immediately remove.
//
// Integrate lifting with scalar replacement of aggregates (SRA) since
// the two are synergistic.
//
// Also see many other "TODO: opt" suggestions in the code.

import (
    "fmt"
    "go/token"
    "math/big"
    "os"

    "code.google.com/p/go.tools/go/types"
)

// If true, perform sanity checking and show diagnostic information at
// each step of lifting.  Very verbose.
const debugLifting = false

// domFrontier maps each block to the set of blocks in its dominance
// frontier.  The outer slice is conceptually a map keyed by
// Block.Index.  The inner slice is conceptually a set, possibly
// containing duplicates.
//
// TODO(adonovan): opt: measure impact of dups; consider a packed bit
// representation, e.g. big.Int, and bitwise parallel operations for
// the union step in the Children loop.
//
// domFrontier's methods mutate the slice's elements but not its
// length, so their receivers needn't be pointers.
//
type domFrontier [][]*BasicBlock

func (df domFrontier) add(u, v *domNode) {
    p := &df[u.Block.Index]
    *p = append(*p, v.Block)
}

// build builds the dominance frontier df for the dominator (sub)tree
// rooted at u, using the Cytron et al. algorithm.
//
// TODO(adonovan): opt: consider Berlin approach, computing pruned SSA
// by pruning the entire IDF computation, rather than merely pruning
// the DF -> IDF step.
func (df domFrontier) build(u *domNode) {
    // Encounter each node u in postorder of dom tree.
    for _, child := range u.Children {
        df.build(child)
    }
    for _, vb := range u.Block.Succs {
        if v := vb.dom; v.Idom != u {
            df.add(u, v)
        }
    }
    for _, w := range u.Children {
        for _, vb := range df[w.Block.Index] {
            // TODO(adonovan): opt: use word-parallel bitwise union.
            if v := vb.dom; v.Idom != u {
                df.add(u, v)
            }
        }
    }
}

func buildDomFrontier(fn *Function) domFrontier {
    df := make(domFrontier, len(fn.Blocks))
    df.build(fn.Blocks[0].dom)
    if fn.Recover != nil {
        df.build(fn.Recover.dom)
    }
    return df
}

// lift attempts to replace local and new Allocs accessed only with
// load/store by SSA registers, inserting φ-nodes where necessary.
// The result is a program in classical pruned SSA form.
//
// Preconditions:
// - fn has no dead blocks (blockopt has run).
// - Def/use info (Operands and Referrers) is up-to-date.
//
func lift(fn *Function) {
    // TODO(adonovan): opt: lots of little optimizations may be
    // worthwhile here, especially if they cause us to avoid
    // buildDomTree.  For example:
    //
    // - Alloc never loaded?  Eliminate.
    // - Alloc never stored?  Replace all loads with a zero constant.
    // - Alloc stored once?  Replace loads with dominating store;
    //   don't forget that an Alloc is itself an effective store
    //   of zero.
    // - Alloc used only within a single block?
    //   Use degenerate algorithm avoiding φ-nodes.
    // - Consider synergy with scalar replacement of aggregates (SRA).
    //   e.g. *(&x.f) where x is an Alloc.
    //   Perhaps we'd get better results if we generated this as x.f
    //   i.e. Field(x, .f) instead of Load(FieldIndex(x, .f)).
    //   Unclear.
    //
    // But we will start with the simplest correct code.

    buildDomTree(fn)

    df := buildDomFrontier(fn)

    if debugLifting {
        title := false
        for i, blocks := range df {
            if blocks != nil {
                if !title {
                    fmt.Fprintf(os.Stderr, "Dominance frontier of %s:\n", fn)
                    title = true
                }
                fmt.Fprintf(os.Stderr, "\t%s: %s\n", fn.Blocks[i], blocks)
            }
        }
    }

    newPhis := make(newPhiMap)

    // During this pass we will replace some BasicBlock.Instrs
    // (allocs, loads and stores) with nil, keeping a count in
    // BasicBlock.gaps.  At the end we will reset Instrs to the
    // concatenation of all non-dead newPhis and non-nil Instrs
    // for the block, reusing the original array if space permits.

    // While we're here, we also eliminate 'rundefers'
    // instructions in functions that contain no 'defer'
    // instructions.
    usesDefer := false

    // Determine which allocs we can lift and number them densely.
    // The renaming phase uses this numbering for compact maps.
    numAllocs := 0
    for _, b := range fn.Blocks {
        b.gaps = 0
        b.rundefers = 0
        for _, instr := range b.Instrs {
            switch instr := instr.(type) {
            case *Alloc:
                index := -1
                if liftAlloc(df, instr, newPhis) {
                    index = numAllocs
                    numAllocs++
                }
                instr.index = index
            case *Defer:
                usesDefer = true
            case *RunDefers:
                b.rundefers++
            }
        }
    }

    // renaming maps an alloc (keyed by index) to its replacement
    // value.  Initially the renaming contains nil, signifying the
    // zero constant of the appropriate type; we construct the
    // Const lazily at most once on each path through the domtree.
    // TODO(adonovan): opt: cache per-function not per subtree.
    renaming := make([]Value, numAllocs)

    // Renaming.
    rename(fn.Blocks[0], renaming, newPhis)

    // Eliminate dead new phis, then prepend the live ones to each block.
    for _, b := range fn.Blocks {

        // Compress the newPhis slice to eliminate unused phis.
        // TODO(adonovan): opt: compute liveness to avoid
        // placing phis in blocks for which the alloc cell is
        // not live.
        nps := newPhis[b]
        j := 0
        for _, np := range nps {
            if !phiIsLive(np.phi) {
                continue // discard it
            }
            nps[j] = np
            j++
        }
        nps = nps[:j]

        rundefersToKill := b.rundefers
        if usesDefer {
            rundefersToKill = 0
        }

        if j+b.gaps+rundefersToKill == 0 {
            continue // fast path: no new phis or gaps
        }

        // Compact nps + non-nil Instrs into a new slice.
        // TODO(adonovan): opt: compact in situ if there is
        // sufficient space or slack in the slice.
        dst := make([]Instruction, len(b.Instrs)+j-b.gaps-rundefersToKill)
        for i, np := range nps {
            dst[i] = np.phi
        }
        for _, instr := range b.Instrs {
            if instr == nil {
                continue
            }
            if !usesDefer {
                if _, ok := instr.(*RunDefers); ok {
                    continue
                }
            }
            dst[j] = instr
            j++
        }
        for i, np := range nps {
            dst[i] = np.phi
        }
        b.Instrs = dst
    }

    // Remove any fn.Locals that were lifted.
    j := 0
    for _, l := range fn.Locals {
        if l.index < 0 {
            fn.Locals[j] = l
            j++
        }
    }
    // Nil out fn.Locals[j:] to aid GC.
    for i := j; i < len(fn.Locals); i++ {
        fn.Locals[i] = nil
    }
    fn.Locals = fn.Locals[:j]
}

func phiIsLive(phi *Phi) bool {
    for _, instr := range *phi.Referrers() {
        if instr == phi {
            continue // self-refs don't count
        }
        if _, ok := instr.(*DebugRef); ok {
            continue // debug refs don't count
        }
        return true
    }
    return false
}

type blockSet struct{ big.Int } // (inherit methods from Int)

// add adds b to the set and returns true if the set changed.
func (s *blockSet) add(b *BasicBlock) bool {
    i := b.Index
    if s.Bit(i) != 0 {
        return false
    }
    s.SetBit(&s.Int, i, 1)
    return true
}

// take removes an arbitrary element from a set s and
// returns its index, or returns -1 if empty.
func (s *blockSet) take() int {
    l := s.BitLen()
    for i := 0; i < l; i++ {
        if s.Bit(i) == 1 {
            s.SetBit(&s.Int, i, 0)
            return i
        }
    }
    return -1
}

// newPhi is a pair of a newly introduced φ-node and the lifted Alloc
// it replaces.
type newPhi struct {
    phi   *Phi
    alloc *Alloc
}

// newPhiMap records for each basic block, the set of newPhis that
// must be prepended to the block.
type newPhiMap map[*BasicBlock][]newPhi

// liftAlloc determines whether alloc can be lifted into registers,
// and if so, it populates newPhis with all the φ-nodes it may require
// and returns true.
//
func liftAlloc(df domFrontier, alloc *Alloc, newPhis newPhiMap) bool {
    // Don't lift aggregates into registers, because we don't have
    // a way to express their zero-constants.
    switch deref(alloc.Type()).Underlying().(type) {
    case *types.Array, *types.Struct:
        return false
    }

    // Don't lift named return values in functions that defer
    // calls that may recover from panic.
    if fn := alloc.Parent(); fn.Recover != nil {
        for _, nr := range fn.namedResults {
            if nr == alloc {
                return false
            }
        }
    }

    // Compute defblocks, the set of blocks containing a
    // definition of the alloc cell.
    var defblocks blockSet
    for _, instr := range *alloc.Referrers() {
        // Bail out if we discover the alloc is not liftable;
        // the only operations permitted to use the alloc are
        // loads/stores into the cell, and DebugRef.
        switch instr := instr.(type) {
        case *Store:
            if instr.Val == alloc {
                return false // address used as value
            }
            if instr.Addr != alloc {
                panic("Alloc.Referrers is inconsistent")
            }
            defblocks.add(instr.Block())
        case *UnOp:
            if instr.Op != token.MUL {
                return false // not a load
            }
            if instr.X != alloc {
                panic("Alloc.Referrers is inconsistent")
            }
        case *DebugRef:
            // ok
        default:
            return false // some other instruction
        }
    }
    // The Alloc itself counts as a (zero) definition of the cell.
    defblocks.add(alloc.Block())

    if debugLifting {
        fmt.Fprintln(os.Stderr, "\tlifting ", alloc, alloc.Name())
    }

    fn := alloc.Parent()

    // Φ-insertion.
    //
    // What follows is the body of the main loop of the insert-φ
    // function described by Cytron et al, but instead of using
    // counter tricks, we just reset the 'hasAlready' and 'work'
    // sets each iteration.  These are bitmaps so it's pretty cheap.
    //
    // TODO(adonovan): opt: recycle slice storage for W,
    // hasAlready, defBlocks across liftAlloc calls.
    var hasAlready blockSet

    // Initialize W and work to defblocks.
    var work blockSet = defblocks // blocks seen
    var W blockSet                // blocks to do
    W.Set(&defblocks.Int)

    // Traverse iterated dominance frontier, inserting φ-nodes.
    for i := W.take(); i != -1; i = W.take() {
        u := fn.Blocks[i]
        for _, v := range df[u.Index] {
            if hasAlready.add(v) {
                // Create φ-node.
                // It will be prepended to v.Instrs later, if needed.
                phi := &Phi{
                    Edges:   make([]Value, len(v.Preds)),
                    Comment: alloc.Comment,
                }
                phi.pos = alloc.Pos()
                phi.setType(deref(alloc.Type()))
                phi.block = v
                if debugLifting {
                    fmt.Fprintf(os.Stderr, "\tplace %s = %s at block %s\n", phi.Name(), phi, v)
                }
                newPhis[v] = append(newPhis[v], newPhi{phi, alloc})

                if work.add(v) {
                    W.add(v)
                }
            }
        }
    }

    return true
}

// replaceAll replaces all intraprocedural uses of x with y,
// updating x.Referrers and y.Referrers.
// Precondition: x.Referrers() != nil, i.e. x must be local to some function.
//
func replaceAll(x, y Value) {
    var rands []*Value
    pxrefs := x.Referrers()
    pyrefs := y.Referrers()
    for _, instr := range *pxrefs {
        rands = instr.Operands(rands[:0]) // recycle storage
        for _, rand := range rands {
            if *rand != nil {
                if *rand == x {
                    *rand = y
                }
            }
        }
        if pyrefs != nil {
            *pyrefs = append(*pyrefs, instr) // dups ok
        }
    }
    *pxrefs = nil // x is now unreferenced
}

// renamed returns the value to which alloc is being renamed,
// constructing it lazily if it's the implicit zero initialization.
//
func renamed(renaming []Value, alloc *Alloc) Value {
    v := renaming[alloc.index]
    if v == nil {
        v = zeroConst(deref(alloc.Type()))
        renaming[alloc.index] = v
    }
    return v
}

// rename implements the (Cytron et al) SSA renaming algorithm, a
// preorder traversal of the dominator tree replacing all loads of
// Alloc cells with the value stored to that cell by the dominating
// store instruction.  For lifting, we need only consider loads,
// stores and φ-nodes.
//
// renaming is a map from *Alloc (keyed by index number) to its
// dominating stored value; newPhis[x] is the set of new φ-nodes to be
// prepended to block x.
//
func rename(u *BasicBlock, renaming []Value, newPhis newPhiMap) {
    // Each φ-node becomes the new name for its associated Alloc.
    for _, np := range newPhis[u] {
        phi := np.phi
        alloc := np.alloc
        renaming[alloc.index] = phi
    }

    // Rename loads and stores of allocs.
    for i, instr := range u.Instrs {
        switch instr := instr.(type) {
        case *Alloc:
            if instr.index >= 0 { // store of zero to Alloc cell
                // Replace dominated loads by the zero value.
                renaming[instr.index] = nil
                if debugLifting {
                    fmt.Fprintf(os.Stderr, "\tkill alloc %s\n", instr)
                }
                // Delete the Alloc.
                u.Instrs[i] = nil
                u.gaps++
            }

        case *Store:
            if alloc, ok := instr.Addr.(*Alloc); ok && alloc.index >= 0 { // store to Alloc cell
                // Replace dominated loads by the stored value.
                renaming[alloc.index] = instr.Val
                if debugLifting {
                    fmt.Fprintf(os.Stderr, "\tkill store %s; new value: %s\n",
                        instr, instr.Val.Name())
                }
                // Delete the Store.
                u.Instrs[i] = nil
                u.gaps++
            }

        case *UnOp:
            if instr.Op == token.MUL {
                if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // load of Alloc cell
                    newval := renamed(renaming, alloc)
                    if debugLifting {
                        fmt.Fprintf(os.Stderr, "\tupdate load %s = %s with %s\n",
                            instr.Name(), instr, newval.Name())
                    }
                    // Replace all references to
                    // the loaded value by the
                    // dominating stored value.
                    replaceAll(instr, newval)
                    // Delete the Load.
                    u.Instrs[i] = nil
                    u.gaps++
                }
            }

        case *DebugRef:
            if alloc, ok := instr.X.(*Alloc); ok && alloc.index >= 0 { // ref of Alloc cell
                if instr.IsAddr {
                    instr.X = renamed(renaming, alloc)
                    instr.IsAddr = false
                } else {
                    // A source expression denotes the address
                    // of an Alloc that was optimized away.
                    instr.X = nil

                    // Delete the DebugRef.
                    u.Instrs[i] = nil
                    u.gaps++
                }
            }
        }
    }

    // For each φ-node in a CFG successor, rename the edge.
    for _, v := range u.Succs {
        phis := newPhis[v]
        if len(phis) == 0 {
            continue
        }
        i := v.predIndex(u)
        for _, np := range phis {
            phi := np.phi
            alloc := np.alloc
            newval := renamed(renaming, alloc)
            if debugLifting {
                fmt.Fprintf(os.Stderr, "\tsetphi %s edge %s -> %s (#%d) (alloc=%s) := %s\n",
                    phi.Name(), u, v, i, alloc.Name(), newval.Name())
            }
            phi.Edges[i] = newval
            if prefs := newval.Referrers(); prefs != nil {
                *prefs = append(*prefs, phi)
            }
        }
    }

    // Continue depth-first recursion over domtree, pushing a
    // fresh copy of the renaming map for each subtree.
    for _, v := range u.dom.Children {
        // TODO(adonovan): opt: avoid copy on final iteration; use destructive update.
        r := make([]Value, len(renaming))
        copy(r, renaming)
        rename(v.Block, r, newPhis)
    }
}