fmtbloom.c (2289B)
1 #include "stdinc.h" 2 #include "dat.h" 3 #include "fns.h" 4 5 Bloom b; 6 7 void 8 usage(void) 9 { 10 fprint(2, "usage: fmtbloom [-n nblocks | -N nhash] [-s size] file\n"); 11 threadexitsall(0); 12 } 13 14 void 15 threadmain(int argc, char *argv[]) 16 { 17 Part *part; 18 char *file; 19 vlong bits, size, size2; 20 int nhash; 21 vlong nblocks; 22 23 ventifmtinstall(); 24 statsinit(); 25 26 size = 0; 27 nhash = 0; 28 nblocks = 0; 29 ARGBEGIN{ 30 case 'n': 31 if(nhash || nblocks) 32 usage(); 33 nblocks = unittoull(EARGF(usage())); 34 break; 35 case 'N': 36 if(nhash || nblocks) 37 usage(); 38 nhash = unittoull(EARGF(usage())); 39 if(nhash > BloomMaxHash){ 40 fprint(2, "maximum possible is -N %d", BloomMaxHash); 41 usage(); 42 } 43 break; 44 case 's': 45 size = unittoull(ARGF()); 46 if(size == ~0) 47 usage(); 48 break; 49 default: 50 usage(); 51 break; 52 }ARGEND 53 54 if(argc != 1) 55 usage(); 56 57 file = argv[0]; 58 59 part = initpart(file, ORDWR|ODIRECT); 60 if(part == nil) 61 sysfatal("can't open partition %s: %r", file); 62 63 if(size == 0) 64 size = part->size; 65 66 if(size < 1024*1024) 67 sysfatal("bloom filter too small"); 68 69 if(size > MaxBloomSize){ 70 fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n", 71 size, (vlong)MaxBloomSize); 72 size = MaxBloomSize; 73 } 74 if(size&(size-1)){ 75 for(size2=1; size2<size; size2*=2) 76 ; 77 size = size2/2; 78 fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024); 79 } 80 81 if(nblocks){ 82 /* 83 * no use for more than 32 bits per block 84 * shoot for less than 64 bits per block 85 */ 86 size2 = size; 87 while(size2*8 >= nblocks*64) 88 size2 >>= 1; 89 if(size2 != size){ 90 size = size2; 91 fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n", 92 size/1024/1024); 93 } 94 95 /* 96 * optimal is to use ln 2 times as many hash functions as we have bits per blocks. 97 */ 98 bits = (8*size)/nblocks; 99 nhash = bits*7/10; 100 if(nhash > BloomMaxHash) 101 nhash = BloomMaxHash; 102 } 103 if(!nhash) 104 nhash = BloomMaxHash; 105 if(bloominit(&b, size, nil) < 0) 106 sysfatal("bloominit: %r"); 107 b.nhash = nhash; 108 bits = nhash*10/7; 109 nblocks = (8*size)/bits; 110 fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks); 111 b.data = vtmallocz(size); 112 b.part = part; 113 if(writebloom(&b) < 0) 114 sysfatal("writing %s: %r", file); 115 threadexitsall(0); 116 }