elliotchance/gedcom

View on GitHub
filter.go

Summary

Maintainability
A
35 mins
Test Coverage
// Filtering and Tree Walking
//
// The Filter function recursively removes or manipulates nodes with a
// FilterFunction:
//
//   newNodes := gedcom.Filter(node, func (node gedcom.Node) (gedcom.Node, bool) {
//     if node.Tag().Is(gedcom.TagIndividual) {
//       // false means it will not traverse children, since an
//       // individual can never be inside of another individual.
//       return node, false
//     }
//
//     return nil, false
//   })
//
//   // Remove all tags that are not official.
//   newNodes := gedcom.Filter(node, gedcom.OfficialTagFilter())
//
// Some examples of Filter functions include BlacklistTagFilter,
// OfficialTagFilter, SimpleNameFilter and WhitelistTagFilter.
package gedcom

// FilterFunction is used with the Filter function.
//
// The node (as passed in through the parameter) will be replaced with newNode.
// You should return the same node argument if you do not want the node to be
// changed.
//
// The traverseChildren argument decides if the traversal should continue
// through the children of node. If the traversal continues (traverseChildren is
// true) the FilterFunction will always receive the children of the node, and
// not the children (if any) of newNode.
//
// If the newNode is nil then it will be removed and the children will not be
// traversed, regardless of the traverseChildren value.
type FilterFunction func(node Node) (newNode Node, traverseChildren bool)

// Filter returns a new nest node structure by recursively filtering all
// children based on a callback FilterFunction.
//
// See FilterFunction for the implementation details of fn.
//
// There are several other functions that can be used as filters including;
// WhitelistTagFilter, BlacklistTagFilter and OfficialTagFilter.
//
// Some nodes, such as an IndividualNode or FamilyNode cannot be created without
// being attached to a document. Filter will attach these to a new document
// which can be accessed through their respective Document() method.
func Filter(root Node, document *Document, fn FilterFunction) Node {
    entityMap := entityMap{}

    return filter(root, fn, entityMap, document, nil)
}

func filter(root Node, fn FilterFunction, entityMap entityMap, document *Document, family *FamilyNode) Node {
    newRoot, keepTraversing := fn(root)
    if IsNil(newRoot) {
        return nil
    }

    if familyNoder, ok := newRoot.(FamilyNoder); ok {
        fam := familyNoder.Family()

        family = entityMap.GetOrAssign(fam, func() interface{} {
            return document.AddFamily(fam.Pointer())
        }).(*FamilyNode)
    }

    result := shallowCopyNode(newRoot, document, family)

    if keepTraversing {
        for _, child := range root.Nodes() {
            newNode := filter(child, fn, entityMap, document, family)
            if newNode != nil {
                result.AddNode(newNode)
            }
        }
    } else {
        for _, child := range newRoot.Nodes() {
            result.AddNode(child)
        }
    }

    return result
}

// Copy a node without children.
//
// Some nodes require a document (such as an IndividualNode) or family (such as
// a ChildNode) to be created. Since we don't want to attach these to the
// existing documents, families, etc. new entities will have to be passed in.
//
// One important thing to note here is that we don't want to create new
// documents, etc for every single node we copy because that will leave the new
// nodes totally fractured and not in a state that we would expect to traverse.
// Be careful to reuse the document and other entities in a reasonable way.
func shallowCopyNode(node Node, document *Document, family *FamilyNode) Node {
    tag := node.Tag()
    value := node.Value()
    pointer := node.Pointer()

    return newNode(document, family, tag, value, pointer)
}

// WhitelistTagFilter returns any node that is one of the provided tags.
//
// This is the opposite of BlacklistTagFilter.
//
// See the Filter() function.
func WhitelistTagFilter(tags ...Tag) FilterFunction {
    return func(node Node) (Node, bool) {
        for _, tag := range tags {
            if tag.Is(node.Tag()) {
                return node, true
            }
        }

        return nil, false
    }
}

// BlacklistTagFilter returns any node that is not one of the provided tags.
//
// This is the opposite of WhitelistTagFilter.
//
// See the Filter function.
func BlacklistTagFilter(tags ...Tag) FilterFunction {
    return func(node Node) (Node, bool) {
        for _, tag := range tags {
            if tag.Is(node.Tag()) {
                return nil, false
            }
        }

        return node, true
    }
}

// OfficialTagFilter returns any node that is official tag. See Tag.IsOfficial
// for more information.
//
// See the Filter function.
func OfficialTagFilter() FilterFunction {
    return func(node Node) (Node, bool) {
        isOfficial := node.Tag().IsOfficial()

        return NodeCondition(isOfficial, node, nil), isOfficial
    }
}

// SimpleNameFilter flattens NAME nodes with the provided format.
//
// This is useful for comparing names when the components of the name (title,
// suffix, etc) are less important than the name itself.
func SimpleNameFilter(format NameFormat) FilterFunction {
    return func(node Node) (Node, bool) {
        if name, ok := node.(*NameNode); ok {
            newNode := newNode(
                nil,
                nil,
                TagName,
                name.Format(format),
                name.Pointer(),
            )

            return newNode, false
        }

        return node, true
    }
}

// OnlyVitalsTagFilter removes all tags except for vital individual information.
//
// The vital nodes are (or multiples in the same individual of): Name, birth,
// baptism, death and burial. Within these only the date and place is retained.
func OnlyVitalsTagFilter() FilterFunction {
    return WhitelistTagFilter(
        // Level 0: We have to allow this for the children.
        TagIndividual,

        // Level 1.
        TagName, TagBirth, TagBaptism, TagDeath, TagBurial,

        // Level 2: These should only ever appear as direct children of the tags
        // above.
        TagGivenName, TagSurname, TagSurnamePrefix, TagNamePrefix,
        TagNameSuffix, TagTitle, TagDate, TagPlace,
    )
}

// RemoveEmptyDeathTagFilter removes any Death (DEAT) events that do not have
// any child nodes (which would otherwise be information like the date or place.
//
// This is because some applications use the death tag as a marker without any
// further information which can cause problems when comparing individuals.
func RemoveEmptyDeathTagFilter() FilterFunction {
    return func(node Node) (Node, bool) {
        if death, ok := node.(*DeathNode); ok && len(death.Nodes()) == 0 {
            return nil, false
        }

        return node, true
    }
}

func RemoveDuplicateNamesFilter() FilterFunction {
    return func(node Node) (Node, bool) {
        if individual, ok := node.(*IndividualNode); ok {
            newIndividual := newIndividualNode(individual.Document(),
                individual.Pointer())
            names := map[string]bool{}

            for _, n := range individual.children {
                if name, isName := n.(*NameNode); isName {
                    nameString := name.String()
                    if names[nameString] == true {
                        continue
                    }

                    names[nameString] = true
                }
                newIndividual.AddNode(n)
            }

            return newIndividual, false
        }

        // Individuals can only exist on the root level so there's no need to
        // recurse.
        return node, true
    }
}