ethergo/submitter/queue.go from synapsecns/sanguine

ethergo/submitter/queue.go
Summary

Maintainability

1 day
Test Coverage

Issues
package submitter

import (
    "context"
    "fmt"
    "math/big"
    "sync"
    "time"

    "github.com/ethereum/go-ethereum/core/types"
    "github.com/lmittmann/w3"
    "github.com/lmittmann/w3/module/eth"
    "github.com/lmittmann/w3/w3types"
    "github.com/synapsecns/sanguine/core/metrics"
    "github.com/synapsecns/sanguine/ethergo/client"
    "github.com/synapsecns/sanguine/ethergo/submitter/db"
    "go.opentelemetry.io/otel/attribute"
    "go.opentelemetry.io/otel/trace"
)

// runSelector runs the selector start loop.
func (t *txSubmitterImpl) runSelector(parentCtx context.Context, i int) (shouldExit bool, err error) {
    ctx, span := t.metrics.Tracer().Start(parentCtx, "submitter.Start", trace.WithAttributes(attribute.Int("i", i)))
    defer func() {
        metrics.EndSpanWithErr(span, err)
    }()

    select {
    case <-ctx.Done():
        return true, fmt.Errorf("context done: %w", ctx.Err())
    case <-time.After(t.GetRetryInterval()):
        err = t.processQueue(ctx)
    case <-t.retryNow:
        err = t.processQueue(ctx)
    }
    return false, err
}

// processQueue processes the queue of transactions.
// TODO: add a way to process a confirmation queue.
func (t *txSubmitterImpl) processQueue(parentCtx context.Context) (err error) {
    // TODO: this might be too short of a deadline depending on the number of pendingTxes in the queue
    deadlineCtx, cancel := context.WithTimeout(parentCtx, time.Second*60)
    defer cancel()

    ctx, span := t.metrics.Tracer().Start(deadlineCtx, "submitter.ProcessQueue")
    defer func() {
        metrics.EndSpanWithErr(span, err)
    }()

    // TODO: parallelize resubmission by chainid, maybe w/ a locker per chain
    var wg sync.WaitGroup
    wg.Add(1)

    go func() {
        defer wg.Done()
        err := t.processConfirmedQueue(ctx)
        if err != nil {
            span.AddEvent("processConfirmedQueue error", trace.WithAttributes(
                attribute.String("error", err.Error())))
        }
    }()

    pendingChainIDs, err := t.db.GetChainIDsByStatus(ctx, t.signer.Address(), db.Stored, db.Pending, db.FailedSubmit, db.Submitted)
    if err != nil {
        return fmt.Errorf("could not get pendingChainIDs: %w", err)
    }

    t.distinctChainIDMux.RLock()
    noOpChainIDs := outersection(pendingChainIDs, t.distinctChainIDs)
    t.distinctChainIDMux.RUnlock()

    pendingChainIDs64 := make([]int64, len(pendingChainIDs))
    for i, chainID := range pendingChainIDs {
        pendingChainIDs64[i] = chainID.Int64()
    }
    span.SetAttributes(attribute.Int64Slice("pending_chain_ids", pendingChainIDs64))

    wg.Add(len(pendingChainIDs))

    for _, chainID := range pendingChainIDs {
        go func(chainID *big.Int) {
            defer wg.Done()

            // get all the pendingTxes in the queue
            pendingTxes, err := t.db.GetTXS(ctx, t.signer.Address(), chainID, db.WithStatuses(db.Stored, db.Pending, db.FailedSubmit, db.Submitted))
            if err != nil {
                span.AddEvent("could not get pendingTxes", trace.WithAttributes(
                    attribute.String("error", err.Error()), attribute.Int64("chainID", chainID.Int64()),
                ))
                return
            }

            err = t.chainPendingQueue(ctx, chainID, pendingTxes)
            if err != nil {
                span.AddEvent("chainPendingQueue error", trace.WithAttributes(
                    attribute.String("error", err.Error()), attribute.Int64("chainID", chainID.Int64())))
            }
        }(chainID)
    }

    for _, chainID := range noOpChainIDs {
        t.otelRecorder.RecordOldestPendingTx(uint32(chainID.Int64()), 0)
        t.otelRecorder.RecordNumPendingTxes(uint32(chainID.Int64()), 0)

        if !t.otelRecorder.HasNonceForChain(uint32(chainID.Int64())) {
            wg.Add(1)
            go func() {
                defer wg.Done()
                evmClient, err := t.fetcher.GetClient(ctx, chainID)
                if err != nil {
                    logger.Warn("could not get client", "error", err)
                    return
                }
                nonce, err := evmClient.NonceAt(ctx, t.signer.Address(), nil)
                if err != nil {
                    logger.Warn("could not get nonce", "error", err)
                    return
                }
                t.otelRecorder.RecordNonceForChain(uint32(chainID.Int64()), nonce)
            }()
        }

        if !t.otelRecorder.HasGasBalanceForChain(uint32(chainID.Int64())) {
            wg.Add(1)
            go func() {
                defer wg.Done()
                evmClient, err := t.fetcher.GetClient(ctx, chainID)
                if err != nil {
                    logger.Warn("could not get client", "error", err)
                    return
                }
                balance, err := evmClient.BalanceAt(ctx, t.signer.Address(), nil)
                if err != nil {
                    logger.Warn("could not get balance", "error", err)
                    return
                }
                t.otelRecorder.RecordGasBalanceForChain(uint32(chainID.Int64()), balance)
            }()
        }
    }

    wg.Wait()

    return nil
}

const maxTxesPerChain = 100

func (t *txSubmitterImpl) processConfirmedQueue(parentCtx context.Context) (err error) {
    ctx, span := t.metrics.Tracer().Start(parentCtx, "submitter.processConfirmedQueue")
    defer func() {
        metrics.EndSpanWithErr(span, err)
    }()

    txs, err := t.db.GetAllTXAttemptByStatus(ctx, t.signer.Address(), nil, db.WithMaxResults(1000), db.WithStatuses(db.ReplacedOrConfirmed))
    if err != nil {
        return fmt.Errorf("could not get txs: %w", err)
    }

    sortedTXsByChainID := sortTxesByChainID(txs, maxTxesPerChain)

    t.distinctChainIDMux.RLock()
    noOpChainIDs := outersection(mapToBigIntSlice(sortedTXsByChainID), t.distinctChainIDs)
    t.distinctChainIDMux.RUnlock()

    var wg sync.WaitGroup
    wg.Add(len(sortedTXsByChainID))

    for chainID := range sortedTXsByChainID {
        go func(chainID uint64) {
            defer wg.Done()
            err := t.chainConfirmQueue(ctx, new(big.Int).SetUint64(chainID), sortedTXsByChainID[chainID])
            if err != nil {
                span.AddEvent("chainPendingQueue error", trace.WithAttributes(
                    attribute.String("error", err.Error()), attribute.Int64("chainID", int64(chainID))))
            }
        }(chainID)
    }

    for _, chainID := range noOpChainIDs {
        t.otelRecorder.RecordConfirmedQueue(uint32(chainID.Int64()), 0)
    }

    wg.Wait()
    return nil
}

func (t *txSubmitterImpl) chainConfirmQueue(parentCtx context.Context, chainID *big.Int, txes []db.TX) (err error) {
    ctx, span := t.metrics.Tracer().Start(parentCtx, "submitter.chainConfirmQueue")
    defer func() {
        metrics.EndSpanWithErr(span, err)
    }()

    t.otelRecorder.RecordConfirmedQueue(uint32(chainID.Int64()), len(txes))

    // chainClient is the client for the chain we're working on
    chainClient, err := t.fetcher.GetClient(ctx, chainID)
    if err != nil {
        return fmt.Errorf("could not get client: %w", err)
    }

    nonceMap := groupTxesByNonce(txes)
    for nonce := range nonceMap {
        err = t.checkAndSetConfirmation(ctx, chainClient, nonceMap[nonce])
        if err != nil {
            return fmt.Errorf("could not check and set confirmation: %w", err)
        }
    }
    return nil
}

// checkAndSetConfirmation checks if the tx is confirmed and sets the status accordingly.
// note: assumes all txes have the same nonce.
func (t *txSubmitterImpl) checkAndSetConfirmation(ctx context.Context, chainClient client.EVM, txes []db.TX) error {
    // nothing do to
    if len(txes) == 0 {
        return nil
    }

    // we're going to take every tx for this nonce and get a receipt for it
    // because there can only be one transaction per nonce, as soon as we know which one has a receipt, we can assume all the
    // others are replaced.
    //
    // There are a few constraints on the logic below as it's currently implemented. Namely that the number of txes
    // can't be bigger than batch size.
    //
    // the other constraint is we treat all errors as "tx not found" errors. This is fine because we only store txes in cases
    //
    calls := make([]w3types.Caller, len(txes))
    receipts := make([]types.Receipt, len(txes))
    for i := range calls {
        calls[i] = eth.TxReceipt(txes[i].Hash()).Returns(&receipts[i])
    }

    err := chainClient.BatchWithContext(ctx, calls...)
    foundSuccessfulTX := false
    if err != nil {
        // there's no way around this type inference
        //nolint: errorlint
        callErr, ok := err.(w3.CallErrors)
        if !ok {
            //nolint: errorlint
            return fmt.Errorf("unexpected error type: %T", err)
        }

        for i := range callErr {
            if callErr[i] != nil {
                txes[i].Status = db.Replaced
            } else {
                foundSuccessfulTX = true
                txes[i].Status = db.Confirmed
            }
        }
    } else if receipts[0].TxHash == txes[0].Hash() {
        // there must be only one tx, so we can just check the first one
        // TODO: handle the case where there is more than one
        txes[0].Status = db.Confirmed
        foundSuccessfulTX = true
    }

    if foundSuccessfulTX {
        err = t.db.PutTXS(ctx, txes...)
        if err != nil {
            return fmt.Errorf("could not put txes: %w", err)
        }
    }

    return nil
}