sdbf
3.3
|
00001 // Header file for bloom_filter object 00002 // 00003 #ifndef _BLOOM_FILTER_H 00004 #define _BLOOM_FILTER_H 00005 00006 00007 #include <stdint.h> 00008 #include <string> 00009 //#include <strings.h> 00010 00011 using namespace std; 00012 00013 /** 00014 bloom_filter: a Bloom filter class. 00015 */ 00016 /// bloom_filter class 00017 class bloom_filter { 00018 00019 public: 00020 /// base constructor 00021 bloom_filter(uint64_t size, uint16_t hash_count, uint64_t max_elem, double max_fp); 00022 00023 /// construct from file - not add to master or fold up. 00024 bloom_filter(string indexfilename); 00025 00026 /// construct bloom filter from buffer 00027 bloom_filter(uint8_t* data,uint64_t size,int id, int bf_elem_ct, uint16_t hamming); 00028 00029 /// destructor 00030 ~bloom_filter(); 00031 00032 /// insert SHA1 hash 00033 bool insert_sha1(uint32_t *sha1); 00034 00035 /// query SHA1 hash 00036 bool query_sha1(uint32_t *sha1); 00037 00038 /// return element count 00039 uint64_t elem_count(); 00040 /// return estimate of false positive rate 00041 double est_fp_rate(); 00042 /// return bits per element 00043 double bits_per_elem(); 00044 00045 /// name associated with bloom filter 00046 string name() const; 00047 /// change name associated with bloom filter 00048 void set_name(string name); 00049 /// fold a large bloom filter onto itself 00050 void fold(uint32_t times); 00051 /// add another same-sized bloom filter to this one 00052 int add(bloom_filter *other); 00053 /// write bloom filter to .idx file 00054 int write_out(string filename); 00055 00056 /// id associated with bloom filter (used for grouping) 00057 int bloom_id(); 00058 void set_bloom_id(int id); 00059 00060 private: 00061 /// actual query/insert function 00062 bool query_and_set(uint32_t *sha1, bool mode_set); 00063 /// compress blob 00064 char* compress() ; 00065 /// decompress blob and assign to bf 00066 int32_t decompress(char* src); 00067 public: 00068 static const uint32_t BIT_MASKS_32[]; 00069 static const uint32_t BITS[]; 00070 00071 uint8_t *bf; // Beginning of the BF 00072 uint16_t hamming; // weight of this bf 00073 private: 00074 uint64_t max_elem; // Max number of elements 00075 double max_fp; // Max FP rate 00076 00077 uint64_t bf_size; // BF size in bytes (==m/8) 00078 uint64_t bf_elem_count; // Actual number of elements inserted 00079 uint16_t hash_count; // Number of hash functions used (k) 00080 uint64_t bit_mask; // Bit mask 00081 uint64_t comp_size; // size of compressed bf to be read 00082 string setname; // name associated with bloom filter 00083 bool created; // set if we allocated the bloom filter ourselves 00084 int bl_id; 00085 00086 }; 00087 00088 #endif