query/query.go
package query
import (
"fmt"
"regexp"
"strings"
"github.com/dictyBase/arangomanager"
"github.com/emirpasic/gods/lists/arraylist"
"github.com/jinzhu/now"
)
const (
logicIdx = 2
charSet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
filterStrLen = 5
strSeedLen = 10
arrQualMatchTmpl = `
LET %s = (
FOR x IN %s[*]
FILTER CONTAINS(x, LOWER('%s'))
LIMIT 1
RETURN 1
)
`
arrMatchTmpl = `
LET %s = (
FOR x IN %s.%s[*]
FILTER CONTAINS(x, LOWER('%s'))
LIMIT 1
RETURN 1
)
`
arrQualEqualTmpl = `
LET %s = (
FILTER '%s' IN %s[*]
RETURN 1
)
`
arrEqualTmpl = `
LET %s = (
FILTER '%s' IN %s.%s[*]
RETURN 1
)
`
arrNotEqualTmpl = `
LET %s = (
FILTER '%s' NOT IN %s.%s[*]
RETURN 1
)
`
arrQualNotEqualTmpl = `
LET %s = (
FILTER '%s' NOT IN %s[*]
RETURN 1
)
`
dateTmpl = "%s.%s %s DATE_ISO8601('%s')"
)
var startPrefixRegxp = regexp.MustCompile(`\(`)
var endPrefixRegxp = regexp.MustCompile(`\)`)
// Filter is a container for filter parameters.
type Filter struct {
// Field of the object on which the filter will be applied
Field string
// Type of filter for matching or exclusion
Operator string
// The value to match or exclude
Value string
// Logic for combining multiple filter expressions, usually "AND" or "OR"
Logic string
}
// StatementParameters is a container for elements needed in the AQL statement.
type StatementParameters struct {
// Map of filters to database fields
Fmap map[string]string
// Slice of Filter structs, contains all necessary items for AQL statement
Filters []*Filter
// The variable used for looping inside a collection (i.e. the "s" in "FOR s IN stock")
Doc string
// The variable used for looping inside a graph (i.e. the "v" in "FOR v IN 1..1 OUTBOUND s GRAPH 'xyz'")
Vert string
}
// buildFilter regex to capture all variations of filter string.
func buildFilter() (*regexp.Regexp, error) {
var bldr strings.Builder
bldr.WriteString(`(\w+)`)
bldr.WriteString(`(\=\=|\!\=|\=\=\=|\!\=\=|`)
bldr.WriteString(`\=\~|\!\~|>|<|>\=|`)
bldr.WriteString(`\=<|\$\=\=|\$\>|`)
bldr.WriteString(`\$\>\=|\$\<|\$\<\=|`)
bldr.WriteString(`\@\=\=|\@\!\=|`)
bldr.WriteString(`\@\!\~|\@\=\~)`)
bldr.WriteString(`([\w-@.\s]+)(\,|\;)?`)
rgxp, err := regexp.Compile(bldr.String())
if err != nil {
return rgxp, fmt.Errorf("error in compiling regexp %s", err)
}
return rgxp, nil
}
// buildDate builds a regex to capture all variations of date string
// https://play.golang.org/p/NzeBmlQh13v
func buildDate() (*regexp.Regexp, error) {
var bldr strings.Builder
bldr.WriteString(`^\d{4}\-(0[1-9]|1[012])$|`)
bldr.WriteString(`^\d{4}$|^\d{4}\-(0[1-9]|`)
bldr.WriteString(`1[012])\-(0[1-9]|[12][0-9]|3[01])$`)
rgxp, err := regexp.Compile(bldr.String())
if err != nil {
return rgxp, fmt.Errorf("error in compiling regexp %s", err)
}
return rgxp, nil
}
// ParseFilterString parses a predefined filter string to Filter
// structure. The filter string specification is defined in
// corresponding protocol buffer definition.
func ParseFilterString(fstr string) ([]*Filter, error) {
filters := make([]*Filter, 0)
qre, err := buildFilter()
if err != nil {
return filters, err
}
m := qre.FindAllStringSubmatch(fstr, -1)
if len(m) == 0 {
return filters, nil
}
omap := getOperatorMap()
for _, mtc := range m {
// if no operator found in map, return slice and throw error
if _, ok := omap[mtc[2]]; !ok {
return filters, fmt.Errorf("filter operator %s not allowed", mtc[2])
}
flt := &Filter{
Field: mtc[1],
Operator: mtc[2],
Value: mtc[3],
}
if len(mtc) == filterStrLen {
flt.Logic = mtc[4]
}
filters = append(filters, flt)
}
return filters, nil
}
// GenQualifiedAQLFilterStatement generates an AQL(arangodb query language)
// compatible filter query statement where the fields map is expected to
// contain namespaced(fully qualified like
//
// {
// tag: "doc.label",
// name: "doc.level.identifier"
// }
// )
//
// mapping to database fields.
func GenQualifiedAQLFilterStatement(
fmap map[string]string,
filters []*Filter,
) (string, error) {
stmts := map[string]*arraylist.List{
"let": arraylist.New(),
"nonlet": arraylist.New(),
}
for _, flt := range filters {
switch {
case hasArrayOperator(flt.Operator):
randStr := arangomanager.FixedLenRandomString(strSeedLen)
switch getArrayOpertaor(flt.Operator) {
case "=~":
stmts["let"].Insert(
0,
fmt.Sprintf(
arrQualMatchTmpl,
randStr,
fmap[flt.Field],
flt.Value,
),
)
case "==":
stmts["let"].Insert(
0,
fmt.Sprintf(
arrQualEqualTmpl,
randStr,
flt.Value,
fmap[flt.Field],
),
)
case "!=":
stmts["let"].Insert(
0,
fmt.Sprintf(
arrQualNotEqualTmpl,
randStr,
flt.Value,
fmap[flt.Field],
))
}
stmts["nonlet"].Add(fmt.Sprintf("LENGTH(%s) > 0", randStr))
case hasDateOperator(flt.Operator):
if err := dateValidator(flt.Value); err != nil {
return "", err
}
// write time conversion into AQL query
stmts["nonlet"].Add(fmt.Sprintf("%s %s DATE_ISO8601('%s')",
fmap[flt.Field], getOperator(flt.Operator), flt.Value,
))
case hasOperator(flt.Operator):
// write the rest of AQL statement based on regular string data
stmts["nonlet"].Add(fmt.Sprintf(
"%s %s %s",
fmap[flt.Field], getOperator(flt.Operator),
addQuoteToStrings(flt.Operator, flt.Value),
))
default:
return "", fmt.Errorf(
"unknown opertaor for parsing %s",
flt.Operator,
)
}
// if there's logic, write that too
addLogic(stmts["nonlet"], flt)
}
return toFullStatement(stmts), nil
}
func handleArrayOpertaor(
prms *StatementParameters,
flt *Filter,
randStr string,
) string {
inner := prms.Doc
var stmt string
switch getArrayOpertaor(flt.Operator) {
case "=~":
stmt = fmt.Sprintf(
arrMatchTmpl,
randStr,
inner,
prms.Fmap[flt.Field],
flt.Value,
)
case "==":
stmt = fmt.Sprintf(
arrEqualTmpl,
randStr,
flt.Value,
inner,
prms.Fmap[flt.Field],
)
case "!=":
stmt = fmt.Sprintf(
arrNotEqualTmpl,
randStr,
flt.Value,
inner,
prms.Fmap[flt.Field],
)
}
return stmt
}
// GenAQLFilterStatement generates an AQL(arangodb query language) compatible
// filter query statement.
func GenAQLFilterStatement(prms *StatementParameters) (string, error) {
inner := prms.Doc
stmts := arraylist.New()
if len(prms.Vert) > 0 {
inner = prms.Vert
}
for _, flt := range prms.Filters {
switch {
case hasArrayOperator(flt.Operator):
randStr := arangomanager.FixedLenRandomString(strSeedLen)
stmts.Insert(0, handleArrayOpertaor(prms, flt, randStr))
stmts.Add(fmt.Sprintf("LENGTH(%s) > 0", randStr))
case hasDateOperator(flt.Operator):
if err := dateValidator(flt.Value); err != nil {
return "", err
}
// write time conversion into AQL query
stmts.Add(
fmt.Sprintf(
dateTmpl, inner, prms.Fmap[flt.Field],
getOperator(flt.Operator), flt.Value,
),
)
case hasOperator(flt.Operator):
// write the rest of AQL statement based on regular string data
stmts.Add(fmt.Sprintf(
"%s.%s %s %s",
inner,
prms.Fmap[flt.Field],
getOperator(
flt.Operator,
),
addQuoteToStrings(flt.Operator, flt.Value),
))
default:
return "", fmt.Errorf(
"unknown opertaor for parsing %s",
flt.Operator,
)
}
addLogic(stmts, flt)
}
return toString(stmts), nil
}
func addLogic(stmts *arraylist.List, flt *Filter) {
currSize := stmts.Size()
if len(flt.Logic) == 0 {
addClosingParen(stmts, currSize)
return
}
logic := getLogic(flt.Logic)
switch logic {
case "OR":
addStartingParen(stmts, currSize)
case "AND":
addClosingParen(stmts, currSize)
}
stmts.Add(fmt.Sprintf("\n %s ", logic))
}
func addStartingParen(stmts *arraylist.List, currSize int) {
if !isBalancedParens(stmts) {
return
}
stmts.Insert(currSize-1, " ( ")
}
func addClosingParen(stmts *arraylist.List, currSize int) {
if isBalancedParens(stmts) {
return
}
elem, _ := stmts.Get(currSize - logicIdx)
if val, ok := elem.(string); ok {
if strings.TrimSpace(val) == "OR" {
stmts.Add(" ) ")
}
}
}
func isBalancedParens(stmts *arraylist.List) bool {
strStmt := stmts.String()
startLen := len(startPrefixRegxp.FindAllString(strStmt, -1))
endLen := len(endPrefixRegxp.FindAllString(strStmt, -1))
return startLen == endLen
}
func toFullStatement(mst map[string]*arraylist.List) string {
var clause strings.Builder
// print all LET statements first
if letList, ok := mst["let"]; ok {
itr := letList.Iterator()
for itr.Next() {
clause.WriteString(itr.Value().(string))
}
}
clause.WriteString("FILTER ")
if nonletList, ok := mst["nonlet"]; ok {
itr := nonletList.Iterator()
for itr.Next() {
clause.WriteString(itr.Value().(string))
}
}
return clause.String()
}
func toString(l *arraylist.List) string {
var clause strings.Builder
itr := l.Iterator()
for itr.Next() {
// print all LET statements first
if strings.Contains(itr.Value().(string), "LET ") {
clause.WriteString(itr.Value().(string))
}
}
// start FILTER statement
clause.WriteString("FILTER ")
itr.Begin()
for itr.Next() {
// print all non-LET statements
if !strings.Contains(itr.Value().(string), "LET ") {
clause.WriteString(itr.Value().(string))
}
}
return clause.String()
}
// check if operator is used for a string.
func addQuoteToStrings(ops, value string) string {
var stringOperators = map[string]int{
"==": 1,
"===": 1,
"!=": 1,
"=~": 1,
"!~": 1,
}
if _, ok := stringOperators[ops]; ok {
return fmt.Sprintf("'%s'", value)
}
return value
}
func dateValidator(str string) error {
// get all regex matches for date
dre, err := buildDate()
if err != nil {
return err
}
mtch := dre.FindString(str)
if len(mtch) == 0 {
return fmt.Errorf("error in validating date %s", str)
}
// grab valid date and parse to time object
if _, err := now.Parse(mtch); err != nil {
return fmt.Errorf("could not parse date %s %s", str, err)
}
return nil
}