pkg/tdglib/todogenerator.go

Summary

Maintainability
A
3 hrs
Test Coverage
package tdglib

import (
    "bufio"
    "crypto/md5"
    "encoding/hex"
    "errors"
    "io"
    "log"
    "os"
    "path/filepath"
    "regexp"
    "strconv"
    "strings"
    "sync"
    "unicode"

    "github.com/zieckey/goini"
)

const (
    estimateEpsilon = 0.01
    minTitleWords   = 2
)

var (
    commentPrefixes        = [...]string{"TODO", "FIXME", "BUG", "HACK"}
    emptyRunes             = [...]rune{}
    categoryIniKey         = "category"
    issueIniKey            = "issue"
    estimateIniKey         = "estimate"
    authorIniKey           = "author"
    errCannotParseIni      = errors.New("Cannot parse ini properties")
    errCannotParseEstimate = errors.New("Cannot parse time estimate")
)

// ToDoComment a task that is parsed from TODO comment
// estimate is in hours
type ToDoComment struct {
    Type     string  `json:"type"`
    Title    string  `json:"title"`
    Body     string  `json:"body"`
    File     string  `json:"file"`
    Line     int     `json:"line"`
    Issue    int     `json:"issue,omitempty"`
    Author   string  `json:"author,omitempty"`
    Category string  `json:"category,omitempty"`
    Estimate float64 `json:"estimate,omitempty"`
}

// ToDoGenerator is responsible for parsing code base to ToDoComments
type ToDoGenerator struct {
    root       string
    include    []*regexp.Regexp
    exclude    []*regexp.Regexp
    commentsWG sync.WaitGroup
    comments   []*ToDoComment
    minWords   int
    minChars   int
    addedMap   map[string]bool
    commentMux sync.Mutex
}

// NewToDoGenerator creates new generator for a source root
func NewToDoGenerator(root string, include []string, exclude []string, minWords, minChars int) *ToDoGenerator {
    log.Printf("Using source code root %v", root)
    log.Printf("Using %v include filters", include)
    ifilters := make([]*regexp.Regexp, 0, len(include))
    for _, f := range include {
        ifilters = append(ifilters, regexp.MustCompile(f))
    }

    log.Printf("Using %v exclude filters", exclude)
    efilters := make([]*regexp.Regexp, 0, len(exclude))
    for _, f := range exclude {
        efilters = append(efilters, regexp.MustCompile(f))
    }

    absolutePath, err := filepath.Abs(root)
    if err != nil {
        log.Printf("Error setting generator root: %v", err)

        absolutePath = root
    }

    return &ToDoGenerator{
        root:     absolutePath,
        include:  ifilters,
        exclude:  efilters,
        minWords: minWords,
        minChars: minChars,
        comments: make([]*ToDoComment, 0),
        addedMap: make(map[string]bool),
    }
}

func (td *ToDoGenerator) Root() string {
    return td.root
}

func (td *ToDoGenerator) Includes(path string) bool {
    anyMatch := false

    for _, f := range td.include {
        if f.MatchString(path) {
            anyMatch = true
            break
        }
    }

    if !anyMatch && len(td.include) > 0 {
        return false
    }

    return true
}

func (td *ToDoGenerator) Excludes(path string) bool {
    anyMatch := false

    for _, f := range td.exclude {
        if f.MatchString(path) {
            anyMatch = true
            break
        }
    }

    return anyMatch
}

// Generate is an entry point to comment generation
func (td *ToDoGenerator) Generate() ([]*ToDoComment, error) {
    matchesCount := 0
    totalFiles := 0
    err := filepath.Walk(td.root, func(path string, info os.FileInfo, err error) error {
        if err != nil {
            return err
        }

        if !info.Mode().IsRegular() {
            return nil
        }

        totalFiles++

        if !td.Includes(path) {
            return nil
        }

        if td.Excludes(path) {
            return nil
        }

        anyMatch := false
        for _, f := range td.include {
            if f.MatchString(path) {
                anyMatch = true
                break
            }
        }
        if !anyMatch && len(td.include) > 0 {
            return nil
        }

        matchesCount++
        td.commentsWG.Add(1)
        go td.parseFile(path)

        return nil
    })

    if err != nil {
        return nil, err
    }

    log.Printf("Scanned files: %v", totalFiles)
    log.Printf("Matched files: %v", matchesCount)
    td.commentsWG.Wait()
    log.Printf("Found comments: %v", len(td.comments))

    return td.comments, nil
}

func countTitleWords(s string) int {
    words := strings.Fields(s)
    count := 0

    for _, w := range words {
        if len(w) > minTitleWords {
            count++
        }
    }

    return count
}

func (td *ToDoGenerator) addComment(c *ToDoComment) {
    defer td.commentsWG.Done()

    h := md5.New()
    io.WriteString(h, c.File)
    io.WriteString(h, c.Title)
    io.WriteString(h, c.Body)
    s := hex.EncodeToString(h.Sum(nil))

    td.commentMux.Lock()
    defer td.commentMux.Unlock()

    if _, ok := td.addedMap[s]; ok {
        log.Printf("Skipping comment duplicate in %v:%v", c.File, c.Line)
        return
    }

    if countTitleWords(c.Title) >= td.minWords || len(c.Title) >= td.minChars {
        td.addedMap[s] = true
        td.comments = append(td.comments, c)
    } else {
        log.Printf("Ignoring comment in %v:%v", c.File, c.Line)
    }
}

func isCommentRune(r rune) bool {
    return r == '/' ||
        r == '#' ||
        r == '%' ||
        r == ';' ||
        r == '*'
}

// try to parse comment body from commented line
func parseComment(line string) []rune {
    runes := []rune(line)
    i := 0
    size := len(runes)
    // skip prefix whitespace
    for i < size && unicode.IsSpace(runes[i]) {
        i++
    }

    hasComment := false
    // skip comment symbols themselves
    for i < size && isCommentRune(runes[i]) {
        i++
        hasComment = true
    }

    if !hasComment {
        return nil
    }
    // and skip space again
    for i < size && unicode.IsSpace(runes[i]) {
        i++
    }

    j := size - 1
    // skip suffix whitespace
    for j > i && unicode.IsSpace(runes[j]) {
        j--
    }

    // empty comment
    if i >= size || j < 0 || i >= j {
        return emptyRunes[:]
    }

    return runes[i : j+1]
}

func startsWith(s, pr []rune) bool {
    // do not check length (it's checked above)
    for i, p := range pr {
        if unicode.ToUpper(s[i]) != p {
            return false
        }
    }

    return true
}

func parseToDoTitle(line []rune) (ctype, title, author []rune) {
    if len(line) == 0 {
        return nil, nil, nil
    }

    size := len(line)

    for _, pr := range commentPrefixes {
        prlen := len(pr)
        if size > prlen && startsWith(line, []rune(pr)) {
            i := prlen
            if unicode.IsLetter(line[i]) {
                continue
            }

            ctype = []rune(pr)[:prlen]

            if line[i] == '(' {
                for i < size && line[i] != ')' {
                    i++
                }

                author = line[prlen+1 : i]
            }

            for i < size &&
                !unicode.IsSpace(line[i]) &&
                line[i] != ':' {
                i++
            }

            for i < size && (unicode.IsSpace(line[i]) || line[i] == ':') {
                i++
            }

            if i < size {
                title = line[i:]
                return
            }
        }
    }

    return nil, nil, nil
}

// parseEstimate parses human-readible hours or minutes
// estimate to float64 in hours
func parseEstimate(estimate string) (float64, error) {
    if len(estimate) == 0 {
        return 0, errCannotParseEstimate
    }
    var s string
    last := rune(estimate[len(estimate)-1])
    if unicode.IsLetter(last) && last != 'm' && last != 'h' {
        return 0, errCannotParseEstimate
    }

    if unicode.IsLetter(last) {
        s = estimate[:len(estimate)-1]
    } else {
        s = estimate
    }

    if f, err := strconv.ParseFloat(s, 64); err == nil {
        if last == 'm' {
            return f / 60.0, nil
        }
        return f, nil
    }
    return 0, errCannotParseEstimate
}

func (t *ToDoComment) parseIniProperties(line string) error {
    if !strings.Contains(line, "=") {
        return errCannotParseIni
    }
    ini := goini.New()
    err := ini.Parse([]byte(line), " ", "=")
    if err != nil {
        return err
    }
    if v, ok := ini.Get(categoryIniKey); ok {
        t.Category = v
    }
    if v, ok := ini.Get(authorIniKey); ok {
        if len(t.Author) == 0 {
            t.Author = v
        }
    }
    if v, ok := ini.Get(issueIniKey); ok {
        if i, err := strconv.Atoi(v); err == nil {
            t.Issue = i
        }
    }
    if v, ok := ini.Get(estimateIniKey); ok {
        if f, err := parseEstimate(v); err == nil {
            t.Estimate = f
        }
    }

    if len(t.Category) == 0 &&
        t.Issue == 0 &&
        t.Estimate < estimateEpsilon {
        return errCannotParseIni
    }
    return nil
}

// NewComment creates new task from parsed comment lines
func NewComment(path string, lineNumber int, ctype, author string, body []string) *ToDoComment {
    if body == nil || len(body) == 0 {
        return nil
    }

    t := &ToDoComment{
        Type:   ctype,
        Title:  body[0],
        File:   path,
        Line:   lineNumber,
        Author: author,
    }

    if len(body) > 1 {
        var commentBody string
        if err := t.parseIniProperties(body[1]); err == nil {
            commentBody = strings.Join(body[2:], "\n")
        } else {
            commentBody = strings.Join(body[1:], "\n")
        }
        t.Body = strings.TrimSpace(commentBody)
    }

    return t
}

func (td *ToDoGenerator) accountComment(path string, lineNumber int, ctype, author string, body []string) {

    relativePath, err := filepath.Rel(td.root, path)
    if err != nil {
        relativePath = path
    }
    c := NewComment(relativePath, lineNumber, ctype, author, body)
    if c != nil {
        td.commentsWG.Add(1)
        go td.addComment(c)
    }
}

func (td *ToDoGenerator) parseFile(path string) {
    defer td.commentsWG.Done()
    f, err := os.Open(path)
    if err != nil {
        log.Print(err)
        return
    }
    defer f.Close()
    scanner := bufio.NewScanner(f)
    var todo []string
    var lastType string
    var lastAuthor string
    var lastStart int
    lineNumber := 0
    for scanner.Scan() {
        line := scanner.Text()
        lineNumber++
        if c := parseComment(line); c != nil {
            // current comment is new TODO-like commment
            if ctype, title, author := parseToDoTitle(c); title != nil {
                // do we need to finalize previous
                if lastType != "" {
                    td.accountComment(path, lastStart+1, lastType, lastAuthor, todo)
                }
                // construct new one
                lastAuthor = string(author)
                lastType = string(ctype)
                lastStart = lineNumber - 1
                todo = make([]string, 0)
                todo = append(todo, string(title))
            } else if lastType != "" {
                // continue consecutive comment line
                todo = append(todo, string(c))
            }
        } else {
            // not a comment anymore: finalize
            if lastType != "" {
                td.accountComment(path, lastStart+1, lastType, lastAuthor, todo)
                lastType = ""
            }
        }
    }
    // detect todo item at the end of the file
    if lastType != "" {
        td.accountComment(path, lastStart+1, lastType, lastAuthor, todo)
    }
}