plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

fmtbloom.c (2289B)


      1 #include "stdinc.h"
      2 #include "dat.h"
      3 #include "fns.h"
      4 
      5 Bloom b;
      6 
      7 void
      8 usage(void)
      9 {
     10 	fprint(2, "usage: fmtbloom [-n nblocks | -N nhash] [-s size] file\n");
     11 	threadexitsall(0);
     12 }
     13 
     14 void
     15 threadmain(int argc, char *argv[])
     16 {
     17 	Part *part;
     18 	char *file;
     19 	vlong bits, size, size2;
     20 	int nhash;
     21 	vlong nblocks;
     22 
     23 	ventifmtinstall();
     24 	statsinit();
     25 
     26 	size = 0;
     27 	nhash = 0;
     28 	nblocks = 0;
     29 	ARGBEGIN{
     30 	case 'n':
     31 		if(nhash || nblocks)
     32 			usage();
     33 		nblocks = unittoull(EARGF(usage()));
     34 		break;
     35 	case 'N':
     36 		if(nhash || nblocks)
     37 			usage();
     38 		nhash = unittoull(EARGF(usage()));
     39 		if(nhash > BloomMaxHash){
     40 			fprint(2, "maximum possible is -N %d", BloomMaxHash);
     41 			usage();
     42 		}
     43 		break;
     44 	case 's':
     45 		size = unittoull(ARGF());
     46 		if(size == ~0)
     47 			usage();
     48 		break;
     49 	default:
     50 		usage();
     51 		break;
     52 	}ARGEND
     53 
     54 	if(argc != 1)
     55 		usage();
     56 
     57 	file = argv[0];
     58 
     59 	part = initpart(file, ORDWR|ODIRECT);
     60 	if(part == nil)
     61 		sysfatal("can't open partition %s: %r", file);
     62 
     63 	if(size == 0)
     64 		size = part->size;
     65 
     66 	if(size < 1024*1024)
     67 		sysfatal("bloom filter too small");
     68 
     69 	if(size > MaxBloomSize){
     70 		fprint(2, "warning: not using entire %,lld bytes; using only %,lld bytes\n",
     71 			size, (vlong)MaxBloomSize);
     72 		size = MaxBloomSize;
     73 	}
     74 	if(size&(size-1)){
     75 		for(size2=1; size2<size; size2*=2)
     76 			;
     77 		size = size2/2;
     78 		fprint(2, "warning: size not a power of 2; only using %lldMB\n", size/1024/1024);
     79 	}
     80 
     81 	if(nblocks){
     82 		/*
     83 		 * no use for more than 32 bits per block
     84 		 * shoot for less than 64 bits per block
     85 		 */
     86 		size2 = size;
     87 		while(size2*8 >= nblocks*64)
     88 			size2 >>= 1;
     89 		if(size2 != size){
     90 			size = size2;
     91 			fprint(2, "warning: using only %lldMB - not enough blocks to warrant more\n",
     92 				size/1024/1024);
     93 		}
     94 
     95 		/*
     96 		 * optimal is to use ln 2 times as many hash functions as we have bits per blocks.
     97 		 */
     98 		bits = (8*size)/nblocks;
     99 		nhash = bits*7/10;
    100 		if(nhash > BloomMaxHash)
    101 			nhash = BloomMaxHash;
    102 	}
    103 	if(!nhash)
    104 		nhash = BloomMaxHash;
    105 	if(bloominit(&b, size, nil) < 0)
    106 		sysfatal("bloominit: %r");
    107 	b.nhash = nhash;
    108 	bits = nhash*10/7;
    109 	nblocks = (8*size)/bits;
    110 	fprint(2, "fmtbloom: using %lldMB, %d hashes/score, best up to %,lld blocks\n", size/1024/1024, nhash, nblocks);
    111 	b.data = vtmallocz(size);
    112 	b.part = part;
    113 	if(writebloom(&b) < 0)
    114 		sysfatal("writing %s: %r", file);
    115 	threadexitsall(0);
    116 }