sdbf  3.3
 All Classes Functions Variables Friends
sdbf/bloom_filter.h
00001 // Header file for bloom_filter object
00002 //
00003 #ifndef _BLOOM_FILTER_H 
00004 #define _BLOOM_FILTER_H
00005 
00006 
00007 #include <stdint.h>
00008 #include <string>
00009 //#include <strings.h>
00010 
00011 using namespace std;
00012 
00013 /**
00014     bloom_filter:  a Bloom filter class.
00015 */
00016 /// bloom_filter class
00017 class bloom_filter {
00018 
00019 public:
00020     /// base constructor
00021     bloom_filter(uint64_t size, uint16_t hash_count, uint64_t max_elem, double max_fp); 
00022 
00023     /// construct from file - not add to master or fold up. 
00024     bloom_filter(string indexfilename);
00025 
00026     /// construct bloom filter from buffer 
00027     bloom_filter(uint8_t* data,uint64_t size,int id, int bf_elem_ct, uint16_t hamming);
00028     
00029     /// destructor
00030     ~bloom_filter();
00031 
00032     /// insert SHA1 hash
00033     bool insert_sha1(uint32_t *sha1);
00034     
00035     /// query SHA1 hash
00036     bool query_sha1(uint32_t *sha1);
00037 
00038     /// return element count
00039     uint64_t elem_count();
00040     /// return estimate of false positive rate
00041     double est_fp_rate();    
00042     /// return bits per element
00043     double bits_per_elem();
00044  
00045     /// name associated with bloom filter
00046     string name() const;
00047     /// change name associated with bloom filter
00048     void set_name(string name);
00049     /// fold a large bloom filter onto itself
00050     void fold(uint32_t times);
00051     /// add another same-sized bloom filter to this one
00052     int add(bloom_filter *other);
00053     /// write bloom filter to .idx file
00054     int write_out(string filename);
00055 
00056     /// id associated with bloom filter (used for grouping)
00057     int bloom_id();  
00058     void set_bloom_id(int id);
00059 
00060 private:
00061     /// actual query/insert function
00062     bool query_and_set(uint32_t *sha1, bool mode_set);
00063     /// compress blob
00064     char* compress() ;
00065     /// decompress blob and assign to bf
00066     int32_t decompress(char* src);
00067 public:
00068     static const uint32_t BIT_MASKS_32[];
00069     static const uint32_t BITS[];
00070 
00071     uint8_t  *bf;            // Beginning of the BF 
00072     uint16_t  hamming;        // weight of this bf
00073 private:
00074     uint64_t  max_elem;      // Max number of elements
00075     double    max_fp;        // Max FP rate
00076     
00077     uint64_t  bf_size;       // BF size in bytes (==m/8)
00078     uint64_t  bf_elem_count; // Actual number of elements inserted
00079     uint16_t  hash_count;    // Number of hash functions used (k)
00080     uint64_t  bit_mask;      // Bit mask
00081     uint64_t  comp_size;     // size of compressed bf to be read
00082     string    setname;       // name associated with bloom filter
00083     bool      created;       // set if we allocated the bloom filter ourselves
00084     int          bl_id;
00085 
00086 };
00087 
00088 #endif