sdbf
3.3
|
00001 /** 00002 * sdbf.h: libsdbf header file 00003 * author: Vassil Roussev 00004 */ 00005 #include <assert.h> 00006 #include <errno.h> 00007 #include <fcntl.h> 00008 #include <math.h> 00009 #include <openssl/bio.h> 00010 #include <openssl/evp.h> 00011 #include <openssl/sha.h> 00012 #include <stdint.h> 00013 #include <stdio.h> 00014 #include <stdlib.h> 00015 #include <string.h> 00016 #include <sys/types.h> 00017 #include <sys/stat.h> 00018 #include <time.h> 00019 #include <ctype.h> 00020 00021 #include "sdbf_class.h" 00022 #include "sdbf_conf.h" 00023 #include "sdbf_set.h" 00024 #include "util.h" 00025 #include "index_info.h" 00026 00027 #include <vector> 00028 00029 #include <boost/interprocess/sync/interprocess_semaphore.hpp> 00030 00031 #ifndef __SDBF_DEF_H 00032 #define __SDBF_DEF_H 00033 00034 #define _MAX_ELEM_COUNT 160 00035 #define _MAX_ELEM_COUNT_DD 192 00036 #define _FP_THRESHOLD 4 00037 00038 // Command-line related 00039 #define DELIM_CHAR ':' 00040 #define DELIM_STRING ":" 00041 #define MAGIC_DD "sdbf-dd" 00042 #define MAGIC_STREAM "sdbf" 00043 #define MAX_MAGIC_HEADER 512 00044 00045 #define FLAG_OFF 0x00 00046 #define FLAG_ON 0x01 00047 00048 00049 // System parameters 00050 #define BF_SIZE 256 00051 #define BINS 1000 00052 #define ENTR_POWER 10 00053 #define ENTR_SCALE (BINS*(1 << ENTR_POWER)) 00054 #define MAX_FILES 1000000 00055 #define MAX_THREADS 512 00056 #define MIN_FILE_SIZE 512 00057 // changing 6 to 16, 3/5/13 00058 #define MIN_ELEM_COUNT 16 00059 #define MIN_REF_ELEM_COUNT 64 00060 #define POP_WIN_SIZE 64 00061 #define SD_SCORE_SCALE 0.3 00062 #define SYNC_SIZE 16384 00063 00064 // ugly ugly cpuid check. have to include it for OS X/Linux on same compile 00065 00066 #ifndef _WIN32 00067 00068 #define local_cpuid(func,ax,bx,cx,dx)\ 00069 __asm__ __volatile__ ("cpuid":\ 00070 "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); 00071 00072 #endif 00073 00074 // P-threading task spesicification structure for matching SDBFs 00075 typedef struct { 00076 uint32_t tid; // Thread id 00077 uint32_t tcount; // Total thread count for the job 00078 boost::interprocess::interprocess_semaphore sem_start;// Starting semaphore (allows thread to enter iteration) 00079 boost::interprocess::interprocess_semaphore sem_end;// Ending semaphore (signals the end of an iteration) 00080 class sdbf *ref_sdbf; // Reference SDBF 00081 uint32_t ref_index; // Index of the reference BF 00082 class sdbf *tgt_sdbf; // Target SDBF 00083 double result; // Result: max score for the task 00084 uint32_t done; // Are we finished 00085 } sdbf_task_t; 00086 00087 // P-threading task specification structure for block hashing 00088 typedef struct { 00089 uint32_t tid; // Thread id 00090 uint32_t tcount; // Total thread count for the job 00091 uint8_t *buffer; // File buffer to be hashed 00092 uint64_t file_size; // File size (for the buffer) 00093 uint64_t block_size; // Block size 00094 class sdbf *sdbf; // Result SDBF 00095 } blockhash_task_t; 00096 00097 00098 // P-threading task specification file-parallel stream hashing 00099 typedef struct { 00100 uint32_t tid; // Thread id 00101 uint32_t tcount; // Total thread count for the job 00102 char **filenames; // Files to be hashed 00103 uint32_t file_count; // Total number of files 00104 sdbf_set *addset; // where to add the result to 00105 index_info *info; // indexes to query against 00106 } filehash_task_t; 00107 00108 00109 00110 // bf_utils.c: bit manipulation 00111 // ---------------------------- 00112 uint32_t bf_bitcount( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t bf_size); 00113 uint32_t bf_bitcount_cut_256( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t cut_off, int32_t slack); 00114 uint32_t bf_bitcount_cut_256_asm( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t cut_off, int32_t slack); 00115 uint32_t bf_sha1_insert( uint8_t *bf, uint8_t bf_class, uint32_t *sha1_hash); 00116 uint32_t bf_match_est( uint32_t m, uint32_t k, uint32_t s1, uint32_t s2, uint32_t common); 00117 int32_t get_elem_count(class sdbf *sdbf, uint64_t index); 00118 void bf_merge( uint32_t *base, uint32_t *overlay, uint32_t size); 00119 00120 // base64.c: Base64 encoding/decoding 00121 // ---------------------------------- 00122 char *b64encode(const char *input, int length); 00123 char *b64decode(char *input, int length, int *decoded_len); 00124 uint64_t b64decode_into( const uint8_t *input, uint64_t length, uint8_t *output); 00125 00126 #endif