Bnei-Baruch/mdb

View on GitHub
storage/missing_sha1.go

Summary

Maintainability
A
35 mins
Test Coverage
package storage

import (
    "bufio"
    "database/sql"
    "encoding/csv"
    "io"
    "os"
    "strconv"
    "strings"
    "time"

    log "github.com/Sirupsen/logrus"
    "github.com/pkg/errors"
    "github.com/spf13/viper"
    "github.com/volatiletech/sqlboiler/v4/boil"
    "github.com/volatiletech/sqlboiler/v4/queries"

    "github.com/Bnei-Baruch/mdb/utils"
)

func MissingSha1Analysis() {
    var err error
    clock := time.Now()

    log.SetFormatter(&log.TextFormatter{FullTimestamp: true})
    //log.SetLevel(log.WarnLevel)

    log.Info("Starting import of storage status")

    log.Info("Setting up connection to MDB")
    mdb, err := sql.Open("postgres", viper.GetString("mdb.url"))
    utils.Must(err)
    utils.Must(mdb.Ping())
    defer mdb.Close()
    boil.SetDB(mdb)
    //boil.DebugMode = true

    fileMap, err := loadFilesByKMID(mdb)
    if err != nil {
        panic(errors.Wrap(err, "Load files from MDB"))
    }

    utils.Must(analyzeSha1CSV(fileMap, mdb))
    utils.Must(analyzeMissingSha1CSV(fileMap, mdb))

    log.Info("Success")
    log.Infof("Total run time: %s", time.Now().Sub(clock).String())
}

func loadFilesByKMID(db *sql.DB) (map[int64]string, error) {
    fileMap := make(map[int64]string, 500000)

    rows, err := queries.Raw(`SELECT (properties->>'kmedia_id')::int, encode(sha1, 'hex') FROM files WHERE sha1 IS NOT NULL AND properties ? 'kmedia_id'`).
        Query(db)
    if err != nil {
        return nil, errors.Wrap(err, "Load files")
    }

    for rows.Next() {
        var kmID int64
        var sha1 string
        err = rows.Scan(&kmID, &sha1)
        if err != nil {
            return nil, errors.Wrap(err, "Scan row")
        }

        fileMap[kmID] = sha1
    }
    err = rows.Err()
    if err != nil {
        return nil, errors.Wrap(err, "Iterate rows")
    }
    err = rows.Close()
    if err != nil {
        return nil, errors.Wrap(err, "Close rows")
    }

    log.Info("Loaded files from MDB")
    return fileMap, nil
}

func analyzeSha1CSV(fileMap map[int64]string, mdb *sql.DB) error {
    input := "/home/edos/projects/kmedia/kmedia_files_sha1.csv"
    log.Infof("Processing data file: %s", input)
    f, err := os.Open(input)
    if err != nil {
        return errors.Wrap(err, "Open data file")
    }
    defer f.Close()

    i := 0
    csvReader := csv.NewReader(f)
    for {
        r, err := csvReader.Read()
        if err == io.EOF {
            break
        }
        if err != nil {
            return errors.Wrapf(err, "csv.Read() %d", i)
        }

        i++

        kmid, err := strconv.Atoi(r[0])
        if err != nil {
            return errors.Wrapf(err, "Bad KMID %s line %d", r[0], i)
        }
        sha1 := r[4]

        dbSha1, ok := fileMap[int64(kmid)]
        if !ok {
            //log.Infof("Unknown kmid %d", kmid)
            continue
        }

        if sha1 == dbSha1 {
            continue
        }

        log.Infof("SHA1 mismatch: %d, %s != %s", kmid, sha1, dbSha1)
        _, err = queries.Raw("UPDATE files set sha1=decode($1,'hex') where (properties->>'kmedia_id')::int = $2", sha1, kmid).Exec(mdb)
        if err != nil {
            log.Errorf("Update sha1 %d line %d: %s", kmid, i, err.Error())
        }
    }

    //i := 0
    //scanner := bufio.NewScanner(f)
    //for scanner.Scan() {
    //    i++
    //    line := strings.TrimSpace(scanner.Text())
    //    if line == "" {
    //        continue
    //    }
    //
    //    r := strings.Split(line, ",")
    //    kmid, err := strconv.Atoi(r[0])
    //    if err != nil {
    //        return errors.Wrapf(err, "Bad KMID %s", r[0])
    //    }
    //    sha1 := r[4]
    //
    //    dbSha1, ok := fileMap[int64(kmid)]
    //    if !ok {
    //        //log.Infof("Unknown kmid %d", kmid)
    //        continue
    //    }
    //
    //    if sha1 == dbSha1 {
    //        continue
    //    }
    //
    //    log.Infof("SHA1 mismatch: %d, %s != %s", kmid, sha1, dbSha1)
    //    _, err = queries.Raw(mdb, "UPDATE files set sha1=decode($1,'hex') where (properties->>'kmedia_id')::int = $2", sha1, kmid).Exec()
    //    if err != nil {
    //        return errors.Wrapf(err, "Update sha1 %d", kmid)
    //    }
    //}

    return nil
}

func analyzeMissingSha1CSV(fileMap map[int64]string, mdb *sql.DB) error {
    input := "/home/edos/projects/kmedia/kmedia_files_missing.csv"
    log.Infof("Processing data file: %s", input)
    f, err := os.Open(input)
    if err != nil {
        return errors.Wrap(err, "Open data file")
    }
    defer f.Close()

    i := 0
    scanner := bufio.NewScanner(f)
    for scanner.Scan() {
        i++
        line := strings.TrimSpace(scanner.Text())
        if line == "" {
            continue
        }

        r := strings.Split(line, ",")
        kmid, err := strconv.Atoi(r[0])
        if err != nil {
            return errors.Wrapf(err, "Bad KMID %s", r[0])
        }

        _, ok := fileMap[int64(kmid)]
        if ok {
            log.Infof("SHA1 should be null kmid %d", kmid)
            _, err = queries.Raw("UPDATE files SET sha1=NULL WHERE (properties->>'kmedia_id')::int = $1", kmid).Exec(mdb)
            if err != nil {
                return errors.Wrapf(err, "Update sha1 %d", kmid)
            }
        }
    }

    return nil
}