elliotchance/gedcom

View on GitHub
date_range.go

Summary

Maintainability
A
0 mins
Test Coverage
package gedcom

import (
    "fmt"
    "math"
    "regexp"
    "time"
)

// A DateRange represents a period of time.
//
// The minimum possible period is 1 day and ranges only have a resolution of a
// single day.
//
// DateRanges should be considered immutable and are passed by value because of
// this. You should create a new DateRange to represent a new range rather than
// mutating an existing DateRange.
type DateRange struct {
    start, end     Date
    originalString string
}

func NewZeroDateRange() DateRange {
    return DateRange{}
}

func NewDateRangeWithString(s string) (dr DateRange) {
    defer func(originalString string) {
        dr.originalString = originalString
    }(s)

    dateString := CleanSpace(s)

    // Try to match a range first.
    parts := dateRangeRegexp.FindStringSubmatch(dateString)
    if len(parts) > 0 {
        datePart1 := parseDateParts(parts[2], false)
        datePart2 := parseDateParts(parts[4], true)
        dateRange := NewDateRange(
            datePart1,
            datePart2,
        )

        return dateRange
    }

    // Single date.
    datePart1 := parseDateParts(dateString, false)
    datePart2 := parseDateParts(dateString, true)

    return NewDateRange(
        datePart1,
        datePart2,
    )
}

// NewDateRange creates a new date range between two provided dates. It is
// expected that the start date be less than or equal to the end date.
func NewDateRange(start, end Date) DateRange {
    start.IsEndOfRange = false
    end.IsEndOfRange = true

    return DateRange{
        start: start,
        end:   end,
    }
}

// Describes the matrix of possible ranges where each letter represents Before,
// Equal or After. A lower-case letter refers to the lower boundary. Conversely
// an upper-case letter refers to the upper boundary.
var dateRangeCompareMatrix = map[string]DateRangeComparison{
    "bb": DateRangeComparisonEntirelyBefore,
    "be": DateRangeComparisonBefore,
    "ba": DateRangeComparisonPartiallyBefore,
    "bB": DateRangeComparisonPartiallyBefore,
    "bE": DateRangeComparisonOutsideEnd,
    "bA": DateRangeComparisonOutside,
    "eb": DateRangeComparisonInvalid,
    "ee": DateRangeComparisonInsideStart,
    "ea": DateRangeComparisonInsideStart,
    "eB": DateRangeComparisonInsideStart,
    "eE": DateRangeComparisonEqual,
    "eA": DateRangeComparisonOutsideStart,
    "ab": DateRangeComparisonInvalid,
    "ae": DateRangeComparisonInvalid,
    "aa": DateRangeComparisonInside,
    "aB": DateRangeComparisonInside,
    "aE": DateRangeComparisonInsideEnd,
    "aA": DateRangeComparisonPartiallyAfter,
    // Bx is the same as ax.
    "Eb": DateRangeComparisonInvalid,
    "Ee": DateRangeComparisonInvalid,
    "Ea": DateRangeComparisonInvalid,
    "EB": DateRangeComparisonInvalid,
    "EE": DateRangeComparisonInsideEnd,
    "EA": DateRangeComparisonAfter,
    "Ab": DateRangeComparisonInvalid,
    "Ae": DateRangeComparisonInvalid,
    "Aa": DateRangeComparisonInvalid,
    "AB": DateRangeComparisonInvalid,
    "AE": DateRangeComparisonInvalid,
    "AA": DateRangeComparisonEntirelyAfter,
}

func compareDatesForLetter(value, start, end Date) string {
    // We only deal with whole days. This is needed for dates that are ending
    // dates so we don't get the 23:59:59.999 part.
    valueTime := value.Time().Truncate(24 * time.Hour)
    startTime := start.Time().Truncate(24 * time.Hour)
    endTime := end.Time().Truncate(24 * time.Hour)

    switch {
    case valueTime.Equal(startTime):
        return "e"

    case valueTime.Equal(endTime):
        return "E"

    case valueTime.Before(startTime):
        return "b"

    case valueTime.After(endTime):
        return "A"
    }

    // a and B would be the same thing.
    return "a"
}

func (dr DateRange) Compare(dr2 DateRange) DateRangeComparison {
    start := compareDatesForLetter(dr.start, dr2.start, dr2.end)
    end := compareDatesForLetter(dr.end, dr2.start, dr2.end)

    return dateRangeCompareMatrix[start+end]
}

// Start is the lower boundary of the date range.
func (dr DateRange) StartDate() Date {
    return dr.start
}

// End is the upper boundary of the date range.
func (dr DateRange) EndDate() Date {
    return dr.end
}

// Before returns true if the start date is before the other start date.
//
// The idea of "before" in the context of overlapping date ranges is ambiguous.
// The simplest way to think treat all these situations is to only look at the
// start date for each range. No matter when the end dates are or how much of
// each other then end up overlapping.
func (dr DateRange) IsBefore(dr2 DateRange) bool {
    return dr.start.IsBefore(dr2.start)
}

// After returns true if the end date is after the other end date.
//
// See Before for a more detailed explanation.
func (dr DateRange) IsAfter(dr2 DateRange) bool {
    return dr.end.IsAfter(dr2.end)
}

var dateRangeRegexp = regexp.MustCompile(
    fmt.Sprintf(`(?i)^(%s) (.+) (%s) (.+)$`, DateWordsBetween, DateWordsAnd))

// Years works in a similar way to Date.Years() but also takes into
// consideration the StartDate() and EndDate() values of a whole date range,
// like "Bet. 1943 and 1945". It does this by averaging out the Years() value of
// the StartDate() and EndDate() values.
//
// If the DateNode has a single date, like "Mar 1937" then Years will return the
// same value as the Years on the start or end date (no average will be used.)
//
// You can read the specific conversion rules in Date.Years() but be aware that
// the returned value is an approximation and should not be used in date
// calculations.
func (dr DateRange) Years() float64 {
    return (dr.StartDate().Years() + dr.EndDate().Years()) / 2.0
}

// Similarity returns a value from 0.0 to 1.0 to identify how similar two dates
// (or date ranges) are to each other. 1.0 would mean that the dates are exactly
// the same, whereas 0.0 would mean that they are not similar at all.
//
// Similarity is safe to use when either date is nil. If either side is nil then
// 0.5 is returned. Not because they are similar but because there is not enough
// information to make the distinction either way. This is important when using
// date comparisons in combination or part of larger calculations where missing
// data on both sides does not lead to very low scores unnecessarily.
//
// The returned value is calculated on a parabola that awards higher values to
// dates that are proportionally closer to each other. That is, dates that are
// twice as close will have more than twice the score. This attempts to satisfy
// a usable comparison values for close specific dates as well as more relaxed
// values (such as those that one provide an approximate year).
//
// Only the difference between dates is used in the calculation so it is not
// affected by time lines. That is to say that the difference between the years
// 500 and 502 would return the same similarity as the years 2000 to 2002.
//
// The maxYears allows the error margin to be adjusted. Dates that are further
// apart (in any direction) than maxYears will always return 0.0.
//
// A greater maxYears can be used when dates are less exact (such as ancient
// dates that could be commonly off by 10 years or more) or a smaller value when
// dealing with recent dates that are provided in a more exact form.
//
// A sensible default value for maxYears is provided with
// DefaultMaxYearsForSimilarity. You should use this if you are unsure. There is
// also more explanation on the constant.
func (dr DateRange) Similarity(dr2 DateRange, maxYears float64) float64 {
    leftYears := dr.Years()
    rightYears := dr2.Years()
    yearsApart := leftYears - rightYears
    similarity := math.Pow(yearsApart/maxYears, 2)

    // When one date is invalid the similarity will go asymptotic.
    if similarity > 1 {
        return 0
    }

    return 1 - similarity
}

// Equals compares the values of two dates taking into consideration the date
// constraint.
//
// If either date is nil then false is always returned. Even if both dates are
// nil.
//
// A DateNode is considered to be equal only when its StartDate() and EndDate()
// both equal their respective values in the other DateNode.
//
// The comparisons of dates is quite complicated. See the documentation for
// Date.Equals for a full explanation.
func (dr DateRange) Equals(dr2 DateRange) bool {
    // Phrases can only be compared to themselves and they must be the exact
    // same value to be considered equal.
    if dr.IsPhrase() && dr2.IsPhrase() && dr.originalString == dr2.originalString {
        return true
    }

    // Invalid dates follow the same rules as phrases.
    if !dr.IsValid() && !dr2.IsValid() && dr.originalString == dr2.originalString {
        return true
    }

    // Compare dates by value range.
    matchStartDate := dr.StartDate().Equals(dr2.StartDate())
    matchEndDate := dr.EndDate().Equals(dr2.EndDate())

    return matchStartDate && matchEndDate
}

func (dr DateRange) StartAndEndDates() (Date, Date) {
    return dr.StartDate(), dr.EndDate()
}

// IsValid returns true only when the node is not nil and the start and end date
// are non-zero.
//
// A "zero date" (Date.IsZero) is a date that is missing the year, month and
// day. Even if there is other associated information this date is considered to
// be useless for most purposes.
//
// It is safe and completely valid to use IsValid on a nil node.
func (dr DateRange) IsValid() bool {
    start, end := dr.StartAndEndDates()

    return !start.IsZero() && !end.IsZero()
}

// IsExact will return true if the date range represents a single day with an
// exact constraint.
//
// See Date.IsExact for more information.
func (dr DateRange) IsExact() bool {
    start, end := dr.StartAndEndDates()
    startIsExact := start.IsExact()
    endIsExact := end.IsExact()

    return startIsExact && endIsExact
}

// IsPhrase returns true if the date value is a phrase.
//
// A phrase is any statement offered as a date when the year is not
// recognizable to a date parser, but which gives information about when an
// event occurred. The date phrase is enclosed in matching parentheses.
//
// IsPhrase is safe to use on a nil DateNode, and will return false.
func (dr DateRange) IsPhrase() bool {
    if len(dr.originalString) == 0 {
        return false
    }

    firstLetter := dr.originalString[0]

    // ghost:ignore
    lastLetter := dr.originalString[len(dr.originalString)-1]

    return firstLetter == '(' && lastLetter == ')'
}

func (dr DateRange) ParseError() error {
    if err := dr.StartDate().ParseError; err != nil {
        return err
    }

    if err := dr.EndDate().ParseError; err != nil {
        return err
    }

    return nil
}

func (dr DateRange) String() string {
    start, end := dr.StartAndEndDates()
    if start.Equals(end) {
        return start.String()
    }

    return fmt.Sprintf("Bet. %s and %s", start, end)
}

func (dr DateRange) Sub(dr2 DateRange) DurationRange {
    start := dr.StartDate().Sub(dr2.StartDate())
    end := dr.EndDate().Sub(dr2.EndDate())

    return NewDurationRange(start, end)
}

func (dr DateRange) Duration() Duration {
    start := dr.StartDate()
    end := dr.EndDate()

    return end.Sub(start)
}