mezis/blurrily

View on GitHub
ext/blurrily/storage.h

Summary

Maintainability
Test Coverage
/*
  
  storage.h --

  Trigram map creation, persistence, and qurying.

*/
#ifndef __STORAGE_H__
#define __STORAGE_H__

#include <inttypes.h>
#include "tokeniser.h"
#include "blurrily.h"

struct trigram_map_t;
typedef struct trigram_map_t* trigram_map;

struct BR_PACKED_STRUCT trigram_match_t {
  uint32_t reference;
  uint32_t matches;
  uint32_t weight;
};
typedef struct trigram_match_t trigram_match_t;
typedef struct trigram_match_t* trigram_match;

typedef struct trigram_stat_t {
  uint32_t references;
  uint32_t trigrams;

} trigram_stat_t;


/* 
  Create a new trigram map, resident in memory.
*/
int blurrily_storage_new(trigram_map* haystack);

/* 
  Load an existing trigram map from disk.
*/
int blurrily_storage_load(trigram_map* haystack, const char* path);

/* 
  Release resources claimed by <new> or <open>.
*/
int blurrily_storage_close(trigram_map* haystack);

/* 
  Mark resources managed by Ruby GC.
*/
void blurrily_storage_mark(trigram_map haystack);


/* 
  Persist to disk what <blurrily_storage_new> or <blurrily_storage_open>
  gave you.
*/
int blurrily_storage_save(trigram_map haystack, const char* path);

/*
  Add a new string to the map. <reference> is your identifier for that
  string, <weight> will be using to discriminate entries that match "as
  well" when searching.

  If <weight> is zero, it will be replaced by the number of characters in
  the <needle>.

  Returns positive on success, negative on failure.
*/
int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight);

/*
  Check the map for an existing <reference>.

  Returns < 0 on error, 0 if the reference is not found, the number of trigrams
  for that reference otherwise.

  If <weight> is not NULL, will be set to the weight value passed to the put
  method on return (is the reference is found).

  If <trigrams> is not NULL, it should point an array <nb_trigrams> long,
  and up to <nb_trigrams> will be copied into it matching the <needle>
  originally passed to the put method.

  Not that this is a O(n) method: the whole map will be read.
*/
// int blurrily_storage_get(trigram_map haystack, uint32_t reference, uint32_t* weight, int nb_trigrams, trigram_t* trigrams);

/*
  Remove a <reference> from the map.

  Note that this is very innefective.

  Returns positive on success, negative on failure.
*/
int blurrily_storage_delete(trigram_map haystack, uint32_t reference);

/*
  Return at most <limit> entries matching <needle> from the <haystack>.

  Results are written to <results>. The first results are the ones entries
  sharing the most trigrams with the <needle>. Amongst entries with the same
  number of matches, the lightest ones (lowest <weight>) will be returned
  first.

  <results> should be allocated by the caller.

  Returns number of matches on success, negative on failure.
*/
int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results);

/*
  Copies metadata into <stats>

  Returns positive on success, negative on failure.
*/
int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats);

#endif