ipfs-search/ipfs-search

View on GitHub
components/protocol/ipfs/ls.go

Summary

Maintainability
A
1 hr
Test Coverage
B
86%
package ipfs

import (
    "context"
    "encoding/json"
    "errors"
    "fmt"
    "io"

    unixfs "github.com/ipfs/go-unixfs"
    unixfs_pb "github.com/ipfs/go-unixfs/pb"

    t "github.com/ipfs-search/ipfs-search/types"
)

var (
    errUnexpectedObjectsLen = errors.New("unexpected Objects len")
    errUnexpectedLinksLen   = errors.New("unexpected Links len")
)

// Note: copied from https://github.com/ipfs/go-ipfs-http-client/blob/6062f4dc5c9edafa6f1b8301e420b8439588f2fa/unixfs.go#L133
type lsLink struct {
    Name, Hash string
    Size       uint64
    Type       unixfs_pb.Data_DataType
    Target     string
}

type lsObject struct {
    Hash  string
    Links []lsLink
}

type lsOutput struct {
    Objects []lsObject
}

func typeFromPb(pbType unixfs_pb.Data_DataType) t.ResourceType {
    // Note: even though both resolve type and size are set to false, it seems that object
    // types are resolved. This might be a bug in the underlying implementation.
    // Hence we should not expect returned objects to have a type defined. When they are
    // not, they default to the unixfs_pb zero type of Raw.
    //
    // Performance-wise, not resolving here is strongly preferable (otherwise, referred
    // blocks need to be fetched).
    //
    // Current trace analysis (a real price-winning implementation!):
    //
    // 1. HTTP API returns numeric type based on unixfs_pb
    //    Zero-value of Type (0) is Raw.
    //
    //    https://github.com/ipfs/go-unixfs/blob/0faf57387de7e336a68a7ed5a9c35308cb98f576/pb/unixfs.proto
    //    http://docs.ipfs.io.ipns.localhost:8080/reference/http/api/#api-v0-ls
    //
    // 2. go-ipfs core API
    //    Maps DirEntry.Type from interface-go-ipfs-core interface to unixfs_pb
    //    iface.TFile -> unixfs.TFile
    //    iface.TDirectory -> unixfs.TDirectory
    //    iface.TUnknown -> not mapped (unixfs_pb 0 value of Raw)
    //
    //    https://github.com/ipfs/interface-go-ipfs-core/blob/master/unixfs.go#L50
    //    https://github.com/ipfs/go-ipfs/blob/5ec98e14016950510d8004c7acf306876c7ef4c0/core/commands/ls.go#L146
    //
    // 3. go-ipfs unixfs core API
    //    Implements interface-go-ipfs-core. Maps unixfs_pb to DirEntry.Type (!):
    //
    //    unixfs.TFile, unixfs.TRaw -> iface.TFile
    //    unixfs.THAMTShard, unixfs.TDirectory, unixfs.TMetadata -> iface.TDirectory
    //
    //    But only when ResolveChildren is true. If not, for DagProtobuf the lnk.Type is not set, causing
    //    it to default type defined in the core interface, being iface.TUnknown.
    //
    //    For Raw leave nodes, the type is set to iface.TFile.
    //
    //      https://github.com/ipfs/go-ipfs/blob/5ec98e14016950510d8004c7acf306876c7ef4c0/core/commands/ls.go#L135
    //
    // Hence, if `resolve-type` and `size` are both `false`, `ResolveChildren` *should* be `false` as well and
    // the UnixFS implementation of go-ipfs should have `Type = TUnknown`. The core API should map this
    // to unixfs_pb type Raw, which causes the HTTP API to return 0.
    //
    // However, if `ResolveChildren` is *not* `false`, as seems to be the case, unixfs.TRaw is
    // mapped to iface.TFile and then back to unixfs.TFile.
    //
    // Hence, we probably want to map unixfs `Raw` to `UndefinedType`.
    //
    // Note that it *seems* that HAMT sharded directories *include* type information in the directory and
    // hence do not rely on protobuf types. Hence, type and size information will be included at no
    // additional costs, while normal directories will always have type and size set to their 0-value.

    switch pbType {
    case unixfs.TRaw:
        // This could both be a file as well as an unresolved type.
        return t.UndefinedType
    case unixfs.TFile:
        return t.FileType
    case unixfs.THAMTShard, unixfs.TDirectory, unixfs.TMetadata:
        return t.DirectoryType
    default:
        return t.UnsupportedType
    }
}

// decodeLink decodes an lsOutput and returns a link.
func decodeLink(dec *json.Decoder) (*lsLink, error) {
    var link lsOutput

    if err := dec.Decode(&link); err != nil {
        // Propagate other decoding errors
        return nil, fmt.Errorf("decoding json: %w", err)
    }

    if len(link.Objects) != 1 {
        return nil, errUnexpectedObjectsLen
    }

    if len(link.Objects[0].Links) != 1 {
        return nil, errUnexpectedLinksLen
    }

    return &link.Objects[0].Links[0], nil
}

// Ls returns a channel with AnnotatedResource's with Type and Size populated.
func (i *IPFS) Ls(ctx context.Context, r *t.AnnotatedResource, out chan<- *t.AnnotatedResource) error {
    ctx, span := i.Tracer.Start(ctx, "protocol.ipfs.Ls")
    defer span.End()

    path := absolutePath(r)

    resp, err := i.shell.Request("ls", path).
        Option("resolve-type", false).
        Option("size", false).
        Option("stream", true).
        Send(ctx)
    if err != nil {
        return err
    }

    // If err == nil, response might be nil and cannot be closed.
    defer resp.Close()

    if err := resp.Error; err != nil {
        if isInvalidResourceErr(resp.Error) {
            // Wrap original error with ErrInvalidResource.
            return fmt.Errorf("%w: %v", t.ErrInvalidResource, resp.Error)
        }

        span.RecordError(err)
        return err
    }

    dec := json.NewDecoder(resp.Output)

    for {
        link, err := decodeLink(dec)
        if err != nil {
            // Decoding errors result in termination of the loop.

            // TODO: Consider using an error channel here; don't abort on individual decoding errors?
            // Alternativel: propagate an InvalidType object instead and log the error without propagating.
            // Needs real world testing. How many directories with invalid entries are there,
            // and should we care about them?

            if errors.Is(err, io.EOF) {
                // EOF means we're done, hence err is cleared
                err = nil
            }

            if err != nil {
                span.RecordError(err)
            }
            return err
        }

        refR := t.AnnotatedResource{
            Resource: &t.Resource{
                Protocol: t.IPFSProtocol,
                ID:       link.Hash,
            },
            Source: t.DirectorySource,
            Reference: t.Reference{
                Parent: r.Resource,
                Name:   link.Name,
            },
            Stat: t.Stat{
                Type: typeFromPb(link.Type),
                Size: link.Size,
            },
        }

        select {
        case <-ctx.Done():
            return ctx.Err()
        case out <- &refR:
        }
    }
}