sdbf  3.3
 All Classes Functions Variables Friends
sdbf/sdbf_defines.h
00001 /**
00002  * sdbf.h: libsdbf header file
00003  * author: Vassil Roussev
00004  */
00005 #include <assert.h>
00006 #include <errno.h>
00007 #include <fcntl.h>
00008 #include <math.h>
00009 #include <openssl/bio.h>
00010 #include <openssl/evp.h>
00011 #include <openssl/sha.h>
00012 #include <stdint.h>
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <sys/types.h>
00017 #include <sys/stat.h>
00018 #include <time.h>
00019 #include <ctype.h>
00020 
00021 #include "sdbf_class.h"
00022 #include "sdbf_conf.h"
00023 #include "sdbf_set.h"
00024 #include "util.h"
00025 #include "index_info.h"
00026 
00027 #include <vector>
00028 
00029 #include <boost/interprocess/sync/interprocess_semaphore.hpp>
00030 
00031 #ifndef __SDBF_DEF_H
00032 #define __SDBF_DEF_H
00033 
00034 #define _MAX_ELEM_COUNT  160
00035 #define _MAX_ELEM_COUNT_DD  192
00036 #define _FP_THRESHOLD 4
00037 
00038 // Command-line related
00039 #define DELIM_CHAR       ':'
00040 #define DELIM_STRING     ":"
00041 #define MAGIC_DD        "sdbf-dd"
00042 #define MAGIC_STREAM    "sdbf"
00043 #define MAX_MAGIC_HEADER 512
00044 
00045 #define FLAG_OFF      0x00
00046 #define FLAG_ON       0x01
00047 
00048 
00049 // System parameters
00050 #define BF_SIZE                256
00051 #define BINS                1000
00052 #define ENTR_POWER            10        
00053 #define ENTR_SCALE            (BINS*(1 << ENTR_POWER))
00054 #define MAX_FILES           1000000
00055 #define MAX_THREADS         512
00056 #define MIN_FILE_SIZE        512
00057 // changing 6 to 16, 3/5/13
00058 #define MIN_ELEM_COUNT     16
00059 #define MIN_REF_ELEM_COUNT  64
00060 #define POP_WIN_SIZE        64
00061 #define SD_SCORE_SCALE      0.3
00062 #define SYNC_SIZE           16384
00063 
00064 // ugly ugly cpuid check.  have to include it for OS X/Linux on same compile
00065 
00066 #ifndef _WIN32
00067 
00068 #define local_cpuid(func,ax,bx,cx,dx)\
00069     __asm__ __volatile__ ("cpuid":\
00070     "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
00071 
00072 #endif
00073 
00074 // P-threading task spesicification structure for matching SDBFs 
00075 typedef struct {
00076     uint32_t  tid;            // Thread id
00077     uint32_t  tcount;        // Total thread count for the job
00078     boost::interprocess::interprocess_semaphore sem_start;// Starting semaphore (allows thread to enter iteration)
00079     boost::interprocess::interprocess_semaphore sem_end;// Ending semaphore (signals the end of an iteration)
00080     class sdbf   *ref_sdbf;      // Reference SDBF
00081     uint32_t  ref_index;    // Index of the reference BF
00082     class sdbf   *tgt_sdbf;        // Target SDBF
00083     double       result;        // Result: max score for the task
00084     uint32_t  done;        // Are we finished
00085 } sdbf_task_t; 
00086 
00087 // P-threading task specification structure for block hashing 
00088 typedef struct {
00089     uint32_t  tid;            // Thread id
00090     uint32_t  tcount;        // Total thread count for the job
00091     uint8_t  *buffer;       // File buffer to be hashed 
00092     uint64_t  file_size;    // File size (for the buffer) 
00093     uint64_t  block_size;   // Block size
00094     class    sdbf   *sdbf;            // Result SDBF
00095 } blockhash_task_t; 
00096 
00097 
00098 // P-threading task specification file-parallel stream hashing 
00099 typedef struct {
00100     uint32_t  tid;          // Thread id
00101     uint32_t  tcount;       // Total thread count for the job
00102     char    **filenames;    // Files to be hashed 
00103     uint32_t  file_count;   // Total number of files 
00104     sdbf_set *addset;               // where to add the result to
00105     index_info *info;         // indexes to query against
00106 } filehash_task_t;
00107 
00108 
00109 
00110 // bf_utils.c: bit manipulation
00111 // ----------------------------
00112 uint32_t bf_bitcount( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t bf_size);
00113 uint32_t bf_bitcount_cut_256( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t cut_off, int32_t slack);
00114 uint32_t bf_bitcount_cut_256_asm( uint8_t *bfilter_1, uint8_t *bfilter_2, uint32_t cut_off, int32_t slack);
00115 uint32_t bf_sha1_insert( uint8_t *bf, uint8_t bf_class, uint32_t *sha1_hash);
00116 uint32_t bf_match_est( uint32_t m, uint32_t k, uint32_t s1, uint32_t s2, uint32_t common);
00117 int32_t  get_elem_count(class sdbf *sdbf, uint64_t index);
00118 void     bf_merge( uint32_t *base, uint32_t *overlay, uint32_t size);
00119 
00120 // base64.c: Base64 encoding/decoding
00121 // ----------------------------------
00122 char     *b64encode(const char *input, int length);
00123 char     *b64decode(char *input, int length, int *decoded_len);
00124 uint64_t  b64decode_into( const uint8_t *input, uint64_t length, uint8_t *output);
00125 
00126 #endif