Ananto30/ask-hadith

View on GitHub
api/search.go

Summary

Maintainability
A
1 hr
Test Coverage
/*
Package handler provides the HTTP handlers for the serverless API.
This file contains the handler for the /search endpoint.
*/
package handler

import (
    "context"
    "encoding/base64"
    "encoding/json"
    "fmt"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
    "time"

    lru "github.com/hashicorp/golang-lru"
    "go.mongodb.org/mongo-driver/bson"
    "go.mongodb.org/mongo-driver/mongo"
    "go.mongodb.org/mongo-driver/mongo/options"
)

const (
    pageSize = 500
)

var (
    mongoClient *mongo.Client
    cache       *lru.Cache

    shortNames         = []string{"bukhari", "abudawud", "nasai", "tirmidhi", "ibnmajah", "muslim"}
    simpleEnglishRegex = regexp.MustCompile(`^[a-zA-Z0-9\s]+$`)
)

// SearchHadith returns a list of hadiths that match the search query.
func SearchHadith(w http.ResponseWriter, r *http.Request) {
    ctx := r.Context()

    query := r.URL.Query().Get("search")
    if reason, ok := validateQuery(query); !ok {
        sendBadRequestResp(w, reason)
        return
    }
    query = sanitizeQuery(query)

    cache, err := getCache()
    if err != nil {
        sendServerErrorResp(w, err)
        return
    }

    if val, ok := cache.Get(query); ok {
        fmt.Println("cache hit")
        sendResp(w, val)
        return
    }

    colResps, err := searchInMongo(ctx, query)
    if err != nil {
        sendServerErrorResp(w, err)
        return
    }
    searchResp := colRespToSearchResp(colResps)

    cache.Add(query, searchResp)
    fmt.Println("cache added")

    sendResp(w, searchResp)
}

func searchInMongo(ctx context.Context, query string) ([]CollectionResponse, error) {
    result := make([]CollectionResponse, 0)

    words := strings.Fields(query)
    if isSpecificHadith(words) {
        hadith, err := getHadith(ctx, strings.ToLower(words[0]), words[1])
        if err != nil {
            return result, err
        }

        cache.Add(query, hadith)
        result = append(result, hadith...)
        return result, nil
    }

    hadithGroups, err := compoundSearch(ctx, query)
    if err != nil {
        return result, err
    }

    for _, group := range hadithGroups {
        result = append(result, mongoResultToColResp(group))
    }

    cache.Add(query, result)
    return result, nil
}

func compoundSearch(ctx context.Context, query string) ([]MongoGroupByResult, error) {
    client, err := getMongoClient()
    if err != nil {
        return nil, err
    }
    collection := client.Database("hadith").Collection("hadiths")

    pipeline := pipelineQueryGroupByCollection(query)
    cursor, err := collection.Aggregate(ctx, pipeline)
    if err != nil {
        return nil, err
    }

    var results []MongoGroupByResult
    err = cursor.All(ctx, &results)
    if err != nil {
        return nil, err
    }

    return results, nil
}

func pipelineQueryGroupByCollection(query string) []bson.M {
    return []bson.M{
        {
            "$search": bson.M{
                "compound": bson.M{
                    // "must": bson.A{
                    //     bson.M{
                    //         "text": bson.M{
                    //             "query": query,
                    //             "path":  bson.A{"body_en", "chapter_en"},
                    //             "fuzzy": bson.M{"maxEdits": 1, "maxExpansions": 10},
                    //         },
                    //     },
                    // },
                    "should": bson.A{
                        bson.M{
                            "phrase": bson.M{
                                "query": query,
                                "path":  bson.A{"body_en", "chapter_en"},
                                "slop":  5,
                            },
                        },
                    },
                },
                "highlight": bson.M{
                    "path": bson.A{"body_en", "chapter_en"},
                },
            },
        },
        {
            "$group": bson.M{
                "_id": "$collection_id",
                "collection": bson.M{
                    "$first": "$collection",
                },
                "count": bson.M{
                    "$sum": 1,
                },
                "hadiths": bson.M{
                    "$push": bson.M{
                        "collection_id": "$collection_id",
                        "collection":    "$collection",
                        "hadith_no":     "$hadith_no",
                        "book_no":       "$book_no",
                        "book_en":       "$book_en",
                        "chapter_no":    "$chapter_no",
                        "chapter_en":    "$chapter_en",
                        "narrator_en":   "$narrator_en",
                        "body_en":       "$body_en",
                        "book_ref_no":   "$book_ref_no",
                        "hadith_grade":  "$hadith_grade",
                        "score":         bson.M{"$meta": "searchScore"},
                        "highlights":    bson.M{"$meta": "searchHighlights"},
                    },
                },
            },
        },
        {
            "$limit": pageSize,
        },
        {
            "$sort": bson.M{
                "hadiths.score": -1,
            },
        },
    }
}

func getHadith(ctx context.Context, hadithName, hadithNo string) ([]CollectionResponse, error) {
    client, err := getMongoClient()
    if err != nil {
        return nil, err
    }
    collection := client.Database("hadith").Collection("hadiths")

    cursor, err := collection.Find(
        ctx,
        bson.M{
            "collection_id": hadithName,
            "hadith_no": bson.M{
                "$regex":   hadithNo + ".*",
                "$options": "",
            },
        },
    )
    if err != nil {
        return nil, err
    }

    var results []MongoHadith
    err = cursor.All(ctx, &results)
    if err != nil {
        return nil, err
    }

    // usually there is only one hadith
    res := make([]CollectionResponse, 0)
    if len(results) == 0 {
        return res, nil
    }

    first := CollectionResponse{}
    first.GroupByResult = &GroupByResult{}
    first.ID = results[0].CollectionID
    first.Collection = results[0].Collection
    first.Count = 1
    first.Hadiths = make([]HadithResponse, 0)
    first.Hadiths = append(first.Hadiths, mongoHadithToResp(results[0]))
    res = append(res, first)

    return res, nil
}

func validateQuery(query string) (string, bool) {
    if len(query) < 3 {
        return "Query must be at least 3 characters", false
    }

    if !simpleEnglishRegex.MatchString(query) {
        return "Query must be in simple English and numbers", false
    }

    return "", true
}

func sanitizeQuery(query string) string {
    return strings.ToLower(strings.ReplaceAll(query, "'", ""))
}

func isSpecificHadith(words []string) bool {
    if len(words) == 2 &&
        contains(shortNames, words[0]) &&
        isNumber(words[1]) {
        return true
    }
    return false
}

func sendResp(w http.ResponseWriter, r interface{}) {
    w.Header().Set("Content-Type", "application/json")
    w.WriteHeader(http.StatusOK)
    json.NewEncoder(w).Encode(r)
}

func sendBadRequestResp(w http.ResponseWriter, reason string) {
    w.WriteHeader(http.StatusBadRequest)
    w.Write([]byte(reason))
}

func sendServerErrorResp(w http.ResponseWriter, err error) {
    w.WriteHeader(http.StatusInternalServerError)
    w.Write([]byte(err.Error()))
}

func getMongoClient() (*mongo.Client, error) {
    if mongoClient != nil {
        return mongoClient, nil
    }

    client, err := mongo.NewClient(options.Client().ApplyURI(os.Getenv("MONGO_URI")))
    if err != nil {
        return nil, err
    }

    ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
    defer cancel()

    err = client.Connect(ctx)
    if err != nil {
        return nil, err
    }

    mongoClient = client
    return mongoClient, nil
}

func getCache() (*lru.Cache, error) {
    if cache != nil {
        return cache, nil
    }

    c, err := lru.New(1000)
    if err != nil {
        return nil, err
    }

    cache = c
    return cache, nil
}

func contains(s []string, e string) bool {
    for _, a := range s {
        if a == e {
            return true
        }
    }
    return false
}

func isNumber(s string) bool {
    _, err := strconv.Atoi(s)
    return err == nil
}

type Hadith struct {
    CollectionID string  `bson:"collection_id" json:"collection_id"`
    Collection   string  `bson:"collection" json:"collection"`
    HadithNo     string  `bson:"hadith_no" json:"hadith_no"`
    BookNo       string  `bson:"book_no" json:"book_no"`
    BookEn       string  `bson:"book_en" json:"book_en"`
    ChapterNo    string  `bson:"chapter_no" json:"chapter_no"`
    ChapterEn    string  `bson:"chapter_en" json:"chapter_en"`
    NarratorEn   string  `bson:"narrator_en" json:"narrator_en"`
    BodyEn       string  `bson:"body_en" json:"body_en"`
    BookRefNo    string  `bson:"book_ref_no" json:"book_ref_no"`
    HadithGrade  string  `bson:"hadith_grade" json:"hadith_grade"`
    Score        float64 `bson:"score" json:"score"`
}

type MongoHadith struct {
    *Hadith    `bson:",inline" json:",inline"`
    Highlights []MongoHighlight `bson:"highlights" json:"highlights"`
}

type HadithResponse struct {
    *Hadith    `bson:",inline" json:",inline"`
    Highlights []string `json:"highlights"`
}

type MongoHighlight struct {
    Path  string      `bson:"path" json:"path"`
    Score float64     `bson:"score" json:"score"`
    Texts []MongoText `bson:"texts" json:"texts"`
}

type MongoText struct {
    Type  string `bson:"type" json:"type"`
    Value string `bson:"value" json:"value"`
}

type GroupByResult struct {
    ID         string `bson:"_id" json:"_id"`
    Count      int    `bson:"count" json:"count"`
    Collection string `bson:"collection" json:"collection"`
}

type MongoGroupByResult struct {
    *GroupByResult `bson:",inline" json:",inline"`
    Hadiths        []MongoHadith `bson:"hadiths" json:"hadiths"`
}

type CollectionResponse struct {
    *GroupByResult `bson:",inline" json:",inline"`
    Hadiths        []HadithResponse `bson:"hadiths" json:"hadiths"`
}

type SearchResponse struct {
    Data              []CollectionResponse `json:"data"`
    FirstHadithBase64 string               `json:"first_hadith_base64"`
}

func mongoHighlightToSimpleHighlight(highlight MongoHighlight) []string {
    var res []string
    for _, text := range highlight.Texts {
        if text.Type == "hit" {
            res = append(res, text.Value)
        }
    }
    return res
}

func mongoHadithToResp(hadith MongoHadith) HadithResponse {
    highlights := NewSet()
    for _, highlight := range hadith.Highlights {
        highlights.AddMulti(mongoHighlightToSimpleHighlight(highlight))
    }
    return HadithResponse{hadith.Hadith, highlights.ToSlice()}
}

func mongoResultToColResp(group MongoGroupByResult) CollectionResponse {
    var res []HadithResponse
    for i := range group.Hadiths {
        hadith := group.Hadiths[i]
        res = append(res, mongoHadithToResp(hadith))
    }
    return CollectionResponse{group.GroupByResult, res}
}

func colRespToSearchResp(colResps []CollectionResponse) SearchResponse {
    var firstHadithBase64 string
    if len(colResps) > 0 && len(colResps[0].Hadiths) > 0 {
        firstHadithBase64 = hadithToBase64(colResps[0].Hadiths[0])
    }
    return SearchResponse{colResps, firstHadithBase64}
}

func hadithToBase64(hadith HadithResponse) string {
    bytes, err := json.Marshal(hadith)
    if err != nil {
        return ""
    }
    return base64.URLEncoding.EncodeToString(bytes)
}

type Set map[string]struct{}

func NewSet() Set {
    return make(map[string]struct{})
}

func (s Set) Add(v string) {
    s[v] = struct{}{}
}

func (s Set) AddMulti(v []string) {
    for _, vv := range v {
        s.Add(vv)
    }
}

func (s Set) ToSlice() []string {
    var result []string
    for k := range s {
        result = append(result, k)
    }
    return result
}