cozylife/hackfastalgos

View on GitHub
lib/datastructure/bloomfilter.php

Summary

Maintainability
A
0 mins
Test Coverage
<?HH
/**
 * Hack Fast Algos
 *
 * Implementation of a Bloom Filter
 * Learm more @link https://en.wikipedia.org/wiki/Bloom_filter
 * @link https://github.com/bitcoin/bitcoin/blob/219b916545f3be194eb53801bfb8d0694978fb00/src/bloom.cpp
 */

namespace HackFastAlgos\DataStructure;

newtype BloomMap = Map<int,int>;

class BloomFilter
{
    private BloomMap $bloomData = Map{};
    private BloomMap $removedBloomData = Map{};

    public function __construct(private int $size, private int $ddosSeed = 0){}

    /**
     * Runs in O(k*n) where "k" is the number of items in the vector, and n is the length of the item getting hashed.
     * Omega(n) best case
     */
    public function insert<T>(Vector<T> $items)
    {
        $this->addItemsToFilter($items, $this->bloomData);
    }

    /**
     * Runs in O(k*n) where "k" is the number of items in the vector, and n is the length of the item getting hashed.
     * Omega(n) best case
     */
    public function delete<T>(Vector<T> $items)
    {
        $this->addItemsToFilter($items, $this->removedBloomData);
    }

    /**
     * Runs in O(k*n) where "k" is the number of items in the vector, and n is the length of the item getting hashed.
     * Omega(n) best case
     */
    public function exists<T>(Vector<T> $items) : bool
    {
        $totalItems = $items->count();
        for ($i = 0; $i < $totalItems; $i++) {
            $location = $this->getLocationForItemAtPos($items[$i], $i+1);
            if (!$this->isInsertedItem($location)) {
                return false;
            }
        }

        return true;
    }

    /**
     * Runs in O(k*n) where "k" is the number of items in the vector, and n is the length of the item getting hashed.
     * Omega(n) best case
     */
    private function addItemsToFilter<T>(Vector<T> $items, BloomMap $filter)
    {
        $totalItems = $items->count();
        for ($i = 0; $i < $totalItems; $i++) {
            $location = $this->getLocationForItemAtPos($items[$i], $i+1);
            $filter[$location] = 1;
        }
    }

    private function getLocationForItemAtPos<T>(T $item, int $position) : int
    {
        return $this->hash($item, $position);
    }

    /**
     * Runs in Theta(n) time.
     */
    private function hash<T>(T $item, int $position) : int
    {
        $murmur = new \HackFastAlgos\MurmurHash3();
        $hash = $murmur->hash($item, $this->ddosSeed);
        return $hash % ($this->size * $position * 2);
    }

    private function isInsertedItem(int $location) : bool
    {
        if ($this->bloomData->containsKey($location) && !$this->removedBloomData->containsKey($location)) {
            return true;
        }

        return false;
    }
}