fixarenas.c (40556B)
1 /* 2 * Check and fix an arena partition. 3 * 4 * This is a lot grittier than the rest of Venti because 5 * it can't just give up if a byte here or there is wrong. 6 * 7 * The rule here (hopefully followed!) is that block corruption 8 * only ever has a local effect -- there are no blocks that you 9 * can wipe out that will cause large portions of 10 * uncorrupted data blocks to be useless. 11 */ 12 13 #include "stdinc.h" 14 #include "dat.h" 15 #include "fns.h" 16 #include "whack.h" 17 18 #define ROUNDUP(x,n) (((x)+(n)-1)&~((n)-1)) 19 20 #pragma varargck type "z" uvlong 21 #pragma varargck type "z" vlong 22 #pragma varargck type "t" uint 23 24 enum 25 { 26 K = 1024, 27 M = 1024*1024, 28 G = 1024*1024*1024, 29 30 Block = 4096, 31 }; 32 33 int debugsha1; 34 35 int verbose; 36 Part *part; 37 char *file; 38 char *basename; 39 char *dumpbase; 40 int fix; 41 int badreads; 42 int unseal; 43 uchar zero[MaxDiskBlock]; 44 45 Arena lastarena; 46 ArenaPart ap; 47 uvlong arenasize; 48 int nbadread; 49 int nbad; 50 uvlong partend; 51 void checkarena(vlong, int); 52 53 void 54 usage(void) 55 { 56 fprint(2, "usage: fixarenas [-fv] [-a arenasize] [-b blocksize] file [ranges]\n"); 57 threadexitsall(0); 58 } 59 60 /* 61 * Format number in simplest way that is okay with unittoull. 62 */ 63 static int 64 zfmt(Fmt *fmt) 65 { 66 vlong x; 67 68 x = va_arg(fmt->args, vlong); 69 if(x == 0) 70 return fmtstrcpy(fmt, "0"); 71 if(x%G == 0) 72 return fmtprint(fmt, "%lldG", x/G); 73 if(x%M == 0) 74 return fmtprint(fmt, "%lldM", x/M); 75 if(x%K == 0) 76 return fmtprint(fmt, "%lldK", x/K); 77 return fmtprint(fmt, "%lld", x); 78 } 79 80 /* 81 * Format time like ctime without newline. 82 */ 83 static int 84 tfmt(Fmt *fmt) 85 { 86 uint t; 87 char buf[30]; 88 89 t = va_arg(fmt->args, uint); 90 strcpy(buf, ctime(t)); 91 buf[28] = 0; 92 return fmtstrcpy(fmt, buf); 93 } 94 95 /* 96 * Coalesce messages about unreadable sectors into larger ranges. 97 * bad(0, 0) flushes the buffer. 98 */ 99 static void 100 bad(char *msg, vlong o, int len) 101 { 102 static vlong lb0, lb1; 103 static char *lmsg; 104 105 if(msg == nil) 106 msg = lmsg; 107 if(o == -1){ 108 lmsg = nil; 109 lb0 = 0; 110 lb1 = 0; 111 return; 112 } 113 if(lb1 != o || (msg && lmsg && strcmp(msg, lmsg) != 0)){ 114 if(lb0 != lb1) 115 print("%s %#llux+%#llux (%,lld+%,lld)\n", 116 lmsg, lb0, lb1-lb0, lb0, lb1-lb0); 117 lb0 = o; 118 } 119 lmsg = msg; 120 lb1 = o+len; 121 } 122 123 /* 124 * Read in the len bytes of data at the offset. If can't for whatever reason, 125 * fill it with garbage but print an error. 126 */ 127 static uchar* 128 readdisk(uchar *buf, vlong offset, int len) 129 { 130 int i, j, k, n; 131 132 if(offset >= partend){ 133 memset(buf, 0xFB, len); 134 return buf; 135 } 136 137 if(offset+len > partend){ 138 memset(buf, 0xFB, len); 139 len = partend - offset; 140 } 141 142 if(readpart(part, offset, buf, len) >= 0) 143 return buf; 144 145 /* 146 * The read failed. Clear the buffer to nonsense, and 147 * then try reading in smaller pieces. If that fails, 148 * read in even smaller pieces. And so on down to sectors. 149 */ 150 memset(buf, 0xFD, len); 151 for(i=0; i<len; i+=64*K){ 152 n = 64*K; 153 if(i+n > len) 154 n = len-i; 155 if(readpart(part, offset+i, buf+i, n) >= 0) 156 continue; 157 for(j=i; j<len && j<i+64*K; j+=4*K){ 158 n = 4*K; 159 if(j+n > len) 160 n = len-j; 161 if(readpart(part, offset+j, buf+j, n) >= 0) 162 continue; 163 for(k=j; k<len && k<j+4*K; k+=512){ 164 if(readpart(part, offset+k, buf+k, 512) >= 0) 165 continue; 166 bad("disk read failed at", k, 512); 167 badreads++; 168 } 169 } 170 } 171 bad(nil, 0, 0); 172 return buf; 173 } 174 175 /* 176 * Buffer to support running SHA1 hash of the disk. 177 */ 178 typedef struct Shabuf Shabuf; 179 struct Shabuf 180 { 181 int fd; 182 vlong offset; 183 DigestState state; 184 int rollback; 185 vlong r0; 186 DigestState *hist; 187 int nhist; 188 }; 189 190 void 191 sbdebug(Shabuf *sb, char *file) 192 { 193 int fd; 194 195 if(sb->fd > 0){ 196 close(sb->fd); 197 sb->fd = 0; 198 } 199 if((fd = create(file, OWRITE, 0666)) < 0) 200 return; 201 if(fd == 0){ 202 fd = dup(fd, -1); 203 close(0); 204 } 205 sb->fd = fd; 206 } 207 208 void 209 sbupdate(Shabuf *sb, uchar *p, vlong offset, int len) 210 { 211 int n, x; 212 vlong o; 213 214 if(sb->rollback && !sb->hist){ 215 sb->r0 = offset; 216 sb->nhist = 1; 217 sb->hist = vtmalloc(sb->nhist*sizeof *sb->hist); 218 memset(sb->hist, 0, sizeof sb->hist[0]); 219 } 220 if(sb->r0 == 0) 221 sb->r0 = offset; 222 223 if(sb->offset < offset || sb->offset >= offset+len){ 224 if(0) print("sbupdate %p %#llux+%d but offset=%#llux\n", 225 p, offset, len, sb->offset); 226 return; 227 } 228 x = sb->offset - offset; 229 if(0) print("sbupdate %p %#llux+%d skip %d\n", 230 sb, offset, len, x); 231 if(x){ 232 p += x; 233 offset += x; 234 len -= x; 235 } 236 assert(sb->offset == offset); 237 238 if(sb->fd > 0) 239 pwrite(sb->fd, p, len, offset - sb->r0); 240 241 if(!sb->rollback){ 242 sha1(p, len, nil, &sb->state); 243 sb->offset += len; 244 return; 245 } 246 247 /* save state every 4M so we can roll back quickly */ 248 o = offset - sb->r0; 249 while(len > 0){ 250 n = 4*M - o%(4*M); 251 if(n > len) 252 n = len; 253 sha1(p, n, nil, &sb->state); 254 sb->offset += n; 255 o += n; 256 p += n; 257 len -= n; 258 if(o%(4*M) == 0){ 259 x = o/(4*M); 260 if(x >= sb->nhist){ 261 if(x != sb->nhist) 262 print("oops! x=%d nhist=%d\n", x, sb->nhist); 263 sb->nhist += 32; 264 sb->hist = vtrealloc(sb->hist, sb->nhist*sizeof *sb->hist); 265 } 266 sb->hist[x] = sb->state; 267 } 268 } 269 } 270 271 void 272 sbdiskhash(Shabuf *sb, vlong eoffset) 273 { 274 static uchar dbuf[4*M]; 275 int n; 276 277 while(sb->offset < eoffset){ 278 n = sizeof dbuf; 279 if(sb->offset+n > eoffset) 280 n = eoffset - sb->offset; 281 readdisk(dbuf, sb->offset, n); 282 sbupdate(sb, dbuf, sb->offset, n); 283 } 284 } 285 286 void 287 sbrollback(Shabuf *sb, vlong offset) 288 { 289 int x; 290 vlong o; 291 Dir d; 292 293 if(!sb->rollback || !sb->r0){ 294 print("cannot rollback sha\n"); 295 return; 296 } 297 if(offset >= sb->offset) 298 return; 299 o = offset - sb->r0; 300 x = o/(4*M); 301 if(x >= sb->nhist){ 302 print("cannot rollback sha\n"); 303 return; 304 } 305 sb->state = sb->hist[x]; 306 sb->offset = sb->r0 + x*4*M; 307 assert(sb->offset <= offset); 308 309 if(sb->fd > 0){ 310 nulldir(&d); 311 d.length = sb->offset - sb->r0; 312 dirfwstat(sb->fd, &d); 313 } 314 } 315 316 void 317 sbscore(Shabuf *sb, uchar *score) 318 { 319 if(sb->hist){ 320 free(sb->hist); 321 sb->hist = nil; 322 } 323 sha1(nil, 0, score, &sb->state); 324 } 325 326 /* 327 * If we're fixing arenas, then editing this memory edits the disk! 328 * It will be written back out as new data is paged in. 329 */ 330 uchar buf[4*M]; 331 uchar sbuf[4*M]; 332 vlong bufoffset; 333 int buflen; 334 335 static void pageout(void); 336 static uchar* 337 pagein(vlong offset, int len) 338 { 339 pageout(); 340 if(offset >= partend){ 341 memset(buf, 0xFB, sizeof buf); 342 return buf; 343 } 344 345 if(offset+len > partend){ 346 memset(buf, 0xFB, sizeof buf); 347 len = partend - offset; 348 } 349 bufoffset = offset; 350 buflen = len; 351 readdisk(buf, offset, len); 352 memmove(sbuf, buf, len); 353 return buf; 354 } 355 356 static void 357 pageout(void) 358 { 359 if(buflen==0 || !fix || memcmp(buf, sbuf, buflen) == 0){ 360 buflen = 0; 361 return; 362 } 363 if(writepart(part, bufoffset, buf, buflen) < 0) 364 print("disk write failed at %#llux+%#ux (%,lld+%,d)\n", 365 bufoffset, buflen, bufoffset, buflen); 366 buflen = 0; 367 } 368 369 static void 370 zerorange(vlong offset, int len) 371 { 372 int i; 373 vlong ooff; 374 int olen; 375 enum { MinBlock = 4*K, MaxBlock = 8*K }; 376 377 if(0) 378 if(bufoffset <= offset && offset+len <= bufoffset+buflen){ 379 memset(buf+(offset-bufoffset), 0, len); 380 return; 381 } 382 383 ooff = bufoffset; 384 olen = buflen; 385 386 i = offset%MinBlock; 387 if(i+len < MaxBlock){ 388 pagein(offset-i, (len+MinBlock-1)&~(MinBlock-1)); 389 memset(buf+i, 0, len); 390 }else{ 391 pagein(offset-i, MaxBlock); 392 memset(buf+i, 0, MaxBlock-i); 393 offset += MaxBlock-i; 394 len -= MaxBlock-i; 395 while(len >= MaxBlock){ 396 pagein(offset, MaxBlock); 397 memset(buf, 0, MaxBlock); 398 offset += MaxBlock; 399 len -= MaxBlock; 400 } 401 pagein(offset, (len+MinBlock-1)&~(MinBlock-1)); 402 memset(buf, 0, len); 403 } 404 pagein(ooff, olen); 405 } 406 407 /* 408 * read/write integers 409 * 410 static void 411 p16(uchar *p, u16int u) 412 { 413 p[0] = (u>>8) & 0xFF; 414 p[1] = u & 0xFF; 415 } 416 */ 417 418 static u16int 419 u16(uchar *p) 420 { 421 return (p[0]<<8)|p[1]; 422 } 423 424 static void 425 p32(uchar *p, u32int u) 426 { 427 p[0] = (u>>24) & 0xFF; 428 p[1] = (u>>16) & 0xFF; 429 p[2] = (u>>8) & 0xFF; 430 p[3] = u & 0xFF; 431 } 432 433 static u32int 434 u32(uchar *p) 435 { 436 return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; 437 } 438 439 /* 440 static void 441 p64(uchar *p, u64int u) 442 { 443 p32(p, u>>32); 444 p32(p, u); 445 } 446 */ 447 448 static u64int 449 u64(uchar *p) 450 { 451 return ((u64int)u32(p)<<32) | u32(p+4); 452 } 453 454 static int 455 vlongcmp(const void *va, const void *vb) 456 { 457 vlong a, b; 458 459 a = *(vlong*)va; 460 b = *(vlong*)vb; 461 if(a < b) 462 return -1; 463 if(b > a) 464 return 1; 465 return 0; 466 } 467 468 /* D and S are in draw.h */ 469 #define D VD 470 #define S VS 471 472 enum 473 { 474 D = 0x10000, 475 Z = 0x20000, 476 S = 0x30000, 477 T = 0x40000, 478 N = 0xFFFF 479 }; 480 typedef struct Info Info; 481 struct Info 482 { 483 int len; 484 char *name; 485 }; 486 487 Info partinfo[] = { 488 4, "magic", 489 D|4, "version", 490 Z|4, "blocksize", 491 4, "arenabase", 492 0 493 }; 494 495 Info headinfo4[] = { 496 4, "magic", 497 D|4, "version", 498 S|ANameSize, "name", 499 Z|4, "blocksize", 500 Z|8, "size", 501 0 502 }; 503 504 Info headinfo5[] = { 505 4, "magic", 506 D|4, "version", 507 S|ANameSize, "name", 508 Z|4, "blocksize", 509 Z|8, "size", 510 4, "clumpmagic", 511 0 512 }; 513 514 Info tailinfo4[] = { 515 4, "magic", 516 D|4, "version", 517 S|ANameSize, "name", 518 D|4, "clumps", 519 D|4, "cclumps", 520 T|4, "ctime", 521 T|4, "wtime", 522 D|8, "used", 523 D|8, "uncsize", 524 1, "sealed", 525 0 526 }; 527 528 Info tailinfo4a[] = { 529 /* tailinfo 4 */ 530 4, "magic", 531 D|4, "version", 532 S|ANameSize, "name", 533 D|4, "clumps", 534 D|4, "cclumps", 535 T|4, "ctime", 536 T|4, "wtime", 537 D|8, "used", 538 D|8, "uncsize", 539 1, "sealed", 540 541 /* mem stats */ 542 1, "extension", 543 D|4, "mem.clumps", 544 D|4, "mem.cclumps", 545 D|8, "mem.used", 546 D|8, "mem.uncsize", 547 1, "mem.sealed", 548 0 549 }; 550 551 Info tailinfo5[] = { 552 4, "magic", 553 D|4, "version", 554 S|ANameSize, "name", 555 D|4, "clumps", 556 D|4, "cclumps", 557 T|4, "ctime", 558 T|4, "wtime", 559 4, "clumpmagic", 560 D|8, "used", 561 D|8, "uncsize", 562 1, "sealed", 563 0 564 }; 565 566 Info tailinfo5a[] = { 567 /* tailinfo 5 */ 568 4, "magic", 569 D|4, "version", 570 S|ANameSize, "name", 571 D|4, "clumps", 572 D|4, "cclumps", 573 T|4, "ctime", 574 T|4, "wtime", 575 4, "clumpmagic", 576 D|8, "used", 577 D|8, "uncsize", 578 1, "sealed", 579 580 /* mem stats */ 581 1, "extension", 582 D|4, "mem.clumps", 583 D|4, "mem.cclumps", 584 D|8, "mem.used", 585 D|8, "mem.uncsize", 586 1, "mem.sealed", 587 0 588 }; 589 590 void 591 showdiffs(uchar *want, uchar *have, int len, Info *info) 592 { 593 int n; 594 595 while(len > 0 && (n=info->len&N) > 0){ 596 if(memcmp(have, want, n) != 0){ 597 switch(info->len){ 598 case 1: 599 print("\t%s: correct=%d disk=%d\n", 600 info->name, *want, *have); 601 break; 602 case 4: 603 print("\t%s: correct=%#ux disk=%#ux\n", 604 info->name, u32(want), u32(have)); 605 break; 606 case D|4: 607 print("\t%s: correct=%,ud disk=%,ud\n", 608 info->name, u32(want), u32(have)); 609 break; 610 case T|4: 611 print("\t%s: correct=%t\n\t\tdisk=%t\n", 612 info->name, u32(want), u32(have)); 613 break; 614 case Z|4: 615 print("\t%s: correct=%z disk=%z\n", 616 info->name, (uvlong)u32(want), (uvlong)u32(have)); 617 break; 618 case D|8: 619 print("\t%s: correct=%,lld disk=%,lld\n", 620 info->name, u64(want), u64(have)); 621 break; 622 case Z|8: 623 print("\t%s: correct=%z disk=%z\n", 624 info->name, u64(want), u64(have)); 625 break; 626 case S|ANameSize: 627 print("\t%s: correct=%s disk=%.*s\n", 628 info->name, (char*)want, 629 utfnlen((char*)have, ANameSize-1), 630 (char*)have); 631 break; 632 default: 633 print("\t%s: correct=%.*H disk=%.*H\n", 634 info->name, n, want, n, have); 635 break; 636 } 637 } 638 have += n; 639 want += n; 640 len -= n; 641 info++; 642 } 643 if(len > 0 && memcmp(have, want, len) != 0){ 644 if(memcmp(want, zero, len) != 0) 645 print("!!\textra want data in showdiffs (bug in fixarenas)\n"); 646 else 647 print("\tnon-zero data on disk after structure\n"); 648 if(verbose > 1){ 649 print("want: %.*H\n", len, want); 650 print("have: %.*H\n", len, have); 651 } 652 } 653 } 654 655 /* 656 * Does part begin with an arena? 657 */ 658 int 659 isonearena(void) 660 { 661 return u32(pagein(0, Block)) == ArenaHeadMagic; 662 } 663 664 static int tabsizes[] = { 16*1024, 64*1024, 512*1024, 768*1024, }; 665 /* 666 * Poke around on the disk to guess what the ArenaPart numbers are. 667 */ 668 void 669 guessgeometry(void) 670 { 671 int i, j, n, bestn, ndiff, nhead, ntail; 672 uchar *p, *ep, *sp; 673 u64int diff[100], head[20], tail[20]; 674 u64int offset, bestdiff; 675 676 ap.version = ArenaPartVersion; 677 678 if(arenasize == 0 || ap.blocksize == 0){ 679 /* 680 * The ArenaPart block at offset PartBlank may be corrupt or just wrong. 681 * Instead, look for the individual arena headers and tails, which there 682 * are many of, and once we've seen enough, infer the spacing. 683 * 684 * Of course, nothing in the file format requires that arenas be evenly 685 * spaced, but fmtarenas always does that for us. 686 */ 687 nhead = 0; 688 ntail = 0; 689 for(offset=PartBlank; offset<partend; offset+=4*M){ 690 p = pagein(offset, 4*M); 691 for(sp=p, ep=p+4*M; p<ep; p+=K){ 692 if(u32(p) == ArenaHeadMagic && nhead < nelem(head)){ 693 if(verbose) 694 print("arena head at %#llx\n", offset+(p-sp)); 695 head[nhead++] = offset+(p-sp); 696 } 697 if(u32(p) == ArenaMagic && ntail < nelem(tail)){ 698 tail[ntail++] = offset+(p-sp); 699 if(verbose) 700 print("arena tail at %#llx\n", offset+(p-sp)); 701 } 702 } 703 if(nhead == nelem(head) && ntail == nelem(tail)) 704 break; 705 } 706 if(nhead < 3 && ntail < 3) 707 sysfatal("too few intact arenas: %d heads, %d tails", nhead, ntail); 708 709 /* 710 * Arena size is likely the most common 711 * inter-head or inter-tail spacing. 712 */ 713 ndiff = 0; 714 for(i=1; i<nhead; i++) 715 diff[ndiff++] = head[i] - head[i-1]; 716 for(i=1; i<ntail; i++) 717 diff[ndiff++] = tail[i] - tail[i-1]; 718 qsort(diff, ndiff, sizeof diff[0], vlongcmp); 719 bestn = 0; 720 bestdiff = 0; 721 for(i=1, n=1; i<=ndiff; i++, n++){ 722 if(i==ndiff || diff[i] != diff[i-1]){ 723 if(n > bestn){ 724 bestn = n; 725 bestdiff = diff[i-1]; 726 } 727 n = 0; 728 } 729 } 730 print("arena size likely %z (%d of %d)\n", bestdiff, bestn, ndiff); 731 if(arenasize != 0 && arenasize != bestdiff) 732 print("using user-specified size %z instead\n", arenasize); 733 else 734 arenasize = bestdiff; 735 736 /* 737 * The arena tail for an arena is arenasize-blocksize from the head. 738 */ 739 ndiff = 0; 740 for(i=j=0; i<nhead && j<ntail; ){ 741 if(tail[j] < head[i]){ 742 j++; 743 continue; 744 } 745 if(tail[j] < head[i]+arenasize){ 746 diff[ndiff++] = head[i]+arenasize - tail[j]; 747 j++; 748 continue; 749 } 750 i++; 751 } 752 if(ndiff < 3) 753 sysfatal("too few intact arenas: %d head, tail pairs", ndiff); 754 qsort(diff, ndiff, sizeof diff[0], vlongcmp); 755 bestn = 0; 756 bestdiff = 0; 757 for(i=1, n=1; i<=ndiff; i++, n++){ 758 if(i==ndiff || diff[i] != diff[i-1]){ 759 if(n > bestn){ 760 bestn = n; 761 bestdiff = diff[i-1]; 762 } 763 n = 0; 764 } 765 } 766 print("block size likely %z (%d of %d)\n", bestdiff, bestn, ndiff); 767 if(ap.blocksize != 0 && ap.blocksize != bestdiff) 768 print("using user-specified size %z instead\n", (vlong)ap.blocksize); 769 else 770 ap.blocksize = bestdiff; 771 if(ap.blocksize == 0 || ap.blocksize&(ap.blocksize-1)) 772 sysfatal("block size not a power of two"); 773 if(ap.blocksize > MaxDiskBlock) 774 sysfatal("block size too big (max=%d)", MaxDiskBlock); 775 776 /* 777 * Use head/tail information to deduce arena base. 778 */ 779 ndiff = 0; 780 for(i=0; i<nhead; i++) 781 diff[ndiff++] = head[i]%arenasize; 782 for(i=0; i<ntail; i++) 783 diff[ndiff++] = (tail[i]+ap.blocksize)%arenasize; 784 qsort(diff, ndiff, sizeof diff[0], vlongcmp); 785 bestn = 0; 786 bestdiff = 0; 787 for(i=1, n=1; i<=ndiff; i++, n++){ 788 if(i==ndiff || diff[i] != diff[i-1]){ 789 if(n > bestn){ 790 bestn = n; 791 bestdiff = diff[i-1]; 792 } 793 n = 0; 794 } 795 } 796 ap.arenabase = bestdiff; 797 } 798 799 ap.tabbase = ROUNDUP(PartBlank+HeadSize, ap.blocksize); 800 /* 801 * XXX pick up table, check arenabase. 802 * XXX pick up table, record base name. 803 */ 804 805 /* 806 * Somewhat standard computation. 807 * Fmtarenas used to use 64k tab, now uses 512k tab. 808 */ 809 if(ap.arenabase == 0){ 810 print("trying standard arena bases...\n"); 811 for(i=0; i<nelem(tabsizes); i++){ 812 ap.arenabase = ROUNDUP(PartBlank+HeadSize+tabsizes[i], ap.blocksize); 813 p = pagein(ap.arenabase, Block); 814 if(u32(p) == ArenaHeadMagic) 815 break; 816 } 817 } 818 p = pagein(ap.arenabase, Block); 819 print("arena base likely %z%s\n", (vlong)ap.arenabase, 820 u32(p)!=ArenaHeadMagic ? " (but no arena head there)" : ""); 821 822 ap.tabsize = ap.arenabase - ap.tabbase; 823 } 824 825 /* 826 * Check the arena partition blocks and then the arenas listed in range. 827 */ 828 void 829 checkarenas(char *range) 830 { 831 char *s, *t; 832 int i, lo, hi, narena; 833 uchar dbuf[HeadSize]; 834 uchar *p; 835 836 guessgeometry(); 837 838 partend -= partend%ap.blocksize; 839 840 memset(dbuf, 0, sizeof dbuf); 841 packarenapart(&ap, dbuf); 842 p = pagein(PartBlank, Block); 843 if(memcmp(p, dbuf, HeadSize) != 0){ 844 print("on-disk arena part superblock incorrect\n"); 845 showdiffs(dbuf, p, HeadSize, partinfo); 846 } 847 memmove(p, dbuf, HeadSize); 848 849 narena = (partend-ap.arenabase + arenasize-1)/arenasize; 850 if(range == nil){ 851 for(i=0; i<narena; i++) 852 checkarena(ap.arenabase+(vlong)i*arenasize, i); 853 }else if(strcmp(range, "none") == 0){ 854 /* nothing */ 855 }else{ 856 /* parse, e.g., -4,8-9,10- */ 857 for(s=range; *s; s=t){ 858 t = strchr(s, ','); 859 if(t) 860 *t++ = 0; 861 else 862 t = s+strlen(s); 863 if(*s == '-') 864 lo = 0; 865 else 866 lo = strtol(s, &s, 0); 867 hi = lo; 868 if(*s == '-'){ 869 s++; 870 if(*s == 0) 871 hi = narena-1; 872 else 873 hi = strtol(s, &s, 0); 874 } 875 if(*s != 0){ 876 print("bad arena range: %s\n", s); 877 continue; 878 } 879 for(i=lo; i<=hi; i++) 880 checkarena(ap.arenabase+(vlong)i*arenasize, i); 881 } 882 } 883 } 884 885 /* 886 * Is there a clump here at p? 887 */ 888 static int 889 isclump(uchar *p, Clump *cl, u32int *pmagic) 890 { 891 int n; 892 u32int magic; 893 uchar score[VtScoreSize], *bp; 894 Unwhack uw; 895 uchar ubuf[70*1024]; 896 897 bp = p; 898 magic = u32(p); 899 if(magic == 0) 900 return 0; 901 p += U32Size; 902 903 cl->info.type = vtfromdisktype(*p); 904 if(cl->info.type == 0xFF) 905 return 0; 906 p++; 907 cl->info.size = u16(p); 908 p += U16Size; 909 cl->info.uncsize = u16(p); 910 if(cl->info.size > cl->info.uncsize) 911 return 0; 912 p += U16Size; 913 scorecp(cl->info.score, p); 914 p += VtScoreSize; 915 cl->encoding = *p; 916 p++; 917 cl->creator = u32(p); 918 p += U32Size; 919 cl->time = u32(p); 920 p += U32Size; 921 922 switch(cl->encoding){ 923 case ClumpENone: 924 if(cl->info.size != cl->info.uncsize) 925 return 0; 926 scoremem(score, p, cl->info.size); 927 if(scorecmp(score, cl->info.score) != 0) 928 return 0; 929 break; 930 case ClumpECompress: 931 if(cl->info.size >= cl->info.uncsize) 932 return 0; 933 unwhackinit(&uw); 934 n = unwhack(&uw, ubuf, cl->info.uncsize, p, cl->info.size); 935 if(n != cl->info.uncsize) 936 return 0; 937 scoremem(score, ubuf, cl->info.uncsize); 938 if(scorecmp(score, cl->info.score) != 0) 939 return 0; 940 break; 941 default: 942 return 0; 943 } 944 p += cl->info.size; 945 946 /* it all worked out in the end */ 947 *pmagic = magic; 948 return p - bp; 949 } 950 951 /* 952 * All ClumpInfos seen in this arena. 953 * Kept in binary tree so we can look up by score. 954 */ 955 typedef struct Cit Cit; 956 struct Cit 957 { 958 int left; 959 int right; 960 vlong corrupt; 961 ClumpInfo ci; 962 }; 963 Cit *cibuf; 964 int ciroot; 965 int ncibuf, mcibuf; 966 967 void 968 resetcibuf(void) 969 { 970 ncibuf = 0; 971 ciroot = -1; 972 } 973 974 int* 975 ltreewalk(int *p, uchar *score) 976 { 977 int i; 978 979 for(;;){ 980 if(*p == -1) 981 return p; 982 i = scorecmp(cibuf[*p].ci.score, score); 983 if(i == 0) 984 return p; 985 if(i < 0) 986 p = &cibuf[*p].right; 987 else 988 p = &cibuf[*p].left; 989 } 990 } 991 992 void 993 addcibuf(ClumpInfo *ci, vlong corrupt) 994 { 995 Cit *cit; 996 997 if(ncibuf == mcibuf){ 998 mcibuf += 131072; 999 cibuf = vtrealloc(cibuf, mcibuf*sizeof cibuf[0]); 1000 } 1001 cit = &cibuf[ncibuf]; 1002 cit->ci = *ci; 1003 cit->left = -1; 1004 cit->right = -1; 1005 cit->corrupt = corrupt; 1006 if(!corrupt) 1007 *ltreewalk(&ciroot, ci->score) = ncibuf; 1008 ncibuf++; 1009 } 1010 1011 void 1012 addcicorrupt(vlong len) 1013 { 1014 static ClumpInfo zci; 1015 1016 addcibuf(&zci, len); 1017 } 1018 1019 int 1020 haveclump(uchar *score) 1021 { 1022 int i; 1023 int p; 1024 1025 p = ciroot; 1026 for(;;){ 1027 if(p == -1) 1028 return 0; 1029 i = scorecmp(cibuf[p].ci.score, score); 1030 if(i == 0) 1031 return 1; 1032 if(i < 0) 1033 p = cibuf[p].right; 1034 else 1035 p = cibuf[p].left; 1036 } 1037 } 1038 1039 int 1040 matchci(ClumpInfo *ci, uchar *p) 1041 { 1042 if(ci->type != vtfromdisktype(p[0])) 1043 return 0; 1044 if(ci->size != u16(p+1)) 1045 return 0; 1046 if(ci->uncsize != u16(p+3)) 1047 return 0; 1048 if(scorecmp(ci->score, p+5) != 0) 1049 return 0; 1050 return 1; 1051 } 1052 1053 int 1054 sealedarena(uchar *p, int blocksize) 1055 { 1056 int v, n; 1057 1058 v = u32(p+4); 1059 switch(v){ 1060 default: 1061 return 0; 1062 case ArenaVersion4: 1063 n = ArenaSize4; 1064 break; 1065 case ArenaVersion5: 1066 n = ArenaSize5; 1067 break; 1068 } 1069 if(p[n-1] != 1){ 1070 print("arena tail says not sealed\n"); 1071 return 0; 1072 } 1073 if(memcmp(p+n, zero, blocksize-VtScoreSize-n) != 0){ 1074 print("arena tail followed by non-zero data\n"); 1075 return 0; 1076 } 1077 if(memcmp(p+blocksize-VtScoreSize, zero, VtScoreSize) == 0){ 1078 print("arena score zero\n"); 1079 return 0; 1080 } 1081 return 1; 1082 } 1083 1084 int 1085 okayname(char *name, int n) 1086 { 1087 char buf[20]; 1088 1089 if(nameok(name) < 0) 1090 return 0; 1091 sprint(buf, "%d", n); 1092 if(n == 0) 1093 buf[0] = 0; 1094 if(strlen(name) < strlen(buf) 1095 || strcmp(name+strlen(name)-strlen(buf), buf) != 0) 1096 return 0; 1097 return 1; 1098 } 1099 1100 int 1101 clumpinfocmp(ClumpInfo *a, ClumpInfo *b) 1102 { 1103 if(a->type != b->type) 1104 return a->type - b->type; 1105 if(a->size != b->size) 1106 return a->size - b->size; 1107 if(a->uncsize != b->uncsize) 1108 return a->uncsize - b->uncsize; 1109 return scorecmp(a->score, b->score); 1110 } 1111 1112 ClumpInfo* 1113 loadci(vlong offset, Arena *arena, int nci) 1114 { 1115 int i, j, per; 1116 uchar *p, *sp; 1117 ClumpInfo *bci, *ci; 1118 1119 per = arena->blocksize/ClumpInfoSize; 1120 bci = vtmalloc(nci*sizeof bci[0]); 1121 ci = bci; 1122 offset += arena->size - arena->blocksize; 1123 p = sp = nil; 1124 for(i=0; i<nci; i+=per){ 1125 if(p == sp){ 1126 sp = pagein(offset-4*M, 4*M); 1127 p = sp+4*M; 1128 } 1129 p -= arena->blocksize; 1130 offset -= arena->blocksize; 1131 for(j=0; j<per && i+j<nci; j++) 1132 unpackclumpinfo(ci++, p+j*ClumpInfoSize); 1133 } 1134 return bci; 1135 } 1136 1137 vlong 1138 writeci(vlong offset, Arena *arena, ClumpInfo *ci, int nci) 1139 { 1140 int i, j, per; 1141 uchar *p, *sp; 1142 1143 per = arena->blocksize/ClumpInfoSize; 1144 offset += arena->size - arena->blocksize; 1145 p = sp = nil; 1146 for(i=0; i<nci; i+=per){ 1147 if(p == sp){ 1148 sp = pagein(offset-4*M, 4*M); 1149 p = sp+4*M; 1150 } 1151 p -= arena->blocksize; 1152 offset -= arena->blocksize; 1153 memset(p, 0, arena->blocksize); 1154 for(j=0; j<per && i+j<nci; j++) 1155 packclumpinfo(ci++, p+j*ClumpInfoSize); 1156 } 1157 pageout(); 1158 return offset; 1159 } 1160 1161 void 1162 loadarenabasics(vlong offset0, int anum, ArenaHead *head, Arena *arena) 1163 { 1164 char dname[ANameSize]; 1165 static char lastbase[ANameSize]; 1166 uchar *p; 1167 Arena oarena; 1168 ArenaHead ohead; 1169 1170 /* 1171 * Fmtarenas makes all arenas the same size 1172 * except the last, which may be smaller. 1173 * It uses the same block size for arenas as for 1174 * the arena partition blocks. 1175 */ 1176 arena->size = arenasize; 1177 if(offset0+arena->size > partend) 1178 arena->size = partend - offset0; 1179 head->size = arena->size; 1180 1181 arena->blocksize = ap.blocksize; 1182 head->blocksize = arena->blocksize; 1183 1184 /* 1185 * Look for clump magic and name in head/tail blocks. 1186 * All the other info we will reconstruct just in case. 1187 */ 1188 p = pagein(offset0, arena->blocksize); 1189 memset(&ohead, 0, sizeof ohead); 1190 if(unpackarenahead(&ohead, p) >= 0){ 1191 head->version = ohead.version; 1192 head->clumpmagic = ohead.clumpmagic; 1193 if(okayname(ohead.name, anum)) 1194 strcpy(head->name, ohead.name); 1195 } 1196 1197 p = pagein(offset0+arena->size-arena->blocksize, 1198 arena->blocksize); 1199 memset(&oarena, 0, sizeof oarena); 1200 if(unpackarena(&oarena, p) >= 0){ 1201 arena->version = oarena.version; 1202 arena->clumpmagic = oarena.clumpmagic; 1203 if(okayname(oarena.name, anum)) 1204 strcpy(arena->name, oarena.name); 1205 arena->diskstats.clumps = oarena.diskstats.clumps; 1206 print("old arena: sealed=%d\n", oarena.diskstats.sealed); 1207 arena->diskstats.sealed = oarena.diskstats.sealed; 1208 } 1209 1210 /* Head trumps arena. */ 1211 if(head->version){ 1212 arena->version = head->version; 1213 arena->clumpmagic = head->clumpmagic; 1214 } 1215 if(arena->version == 0) 1216 arena->version = ArenaVersion5; 1217 if(basename){ 1218 if(anum == -1) 1219 snprint(arena->name, ANameSize, "%s", basename); 1220 else 1221 snprint(arena->name, ANameSize, "%s%d", basename, anum); 1222 }else if(lastbase[0]) 1223 snprint(arena->name, ANameSize, "%s%d", lastbase, anum); 1224 else if(head->name[0]) 1225 strcpy(arena->name, head->name); 1226 else if(arena->name[0] == 0) 1227 sysfatal("cannot determine base name for arena; use -n"); 1228 strcpy(lastbase, arena->name); 1229 sprint(dname, "%d", anum); 1230 lastbase[strlen(lastbase)-strlen(dname)] = 0; 1231 1232 /* Was working in arena, now copy to head. */ 1233 head->version = arena->version; 1234 memmove(head->name, arena->name, sizeof head->name); 1235 head->blocksize = arena->blocksize; 1236 head->size = arena->size; 1237 } 1238 1239 void 1240 shahead(Shabuf *sb, vlong offset0, ArenaHead *head) 1241 { 1242 uchar headbuf[MaxDiskBlock]; 1243 1244 sb->offset = offset0; 1245 memset(headbuf, 0, sizeof headbuf); 1246 packarenahead(head, headbuf); 1247 sbupdate(sb, headbuf, offset0, head->blocksize); 1248 } 1249 1250 u32int 1251 newclumpmagic(int version) 1252 { 1253 u32int m; 1254 1255 if(version == ArenaVersion4) 1256 return _ClumpMagic; 1257 do{ 1258 m = fastrand(); 1259 }while(m==0 || m == _ClumpMagic); 1260 return m; 1261 } 1262 1263 /* 1264 * Poke around in the arena to find the clump data 1265 * and compute the relevant statistics. 1266 */ 1267 void 1268 guessarena(vlong offset0, int anum, ArenaHead *head, Arena *arena, 1269 uchar *oldscore, uchar *score) 1270 { 1271 uchar dbuf[MaxDiskBlock]; 1272 int needtozero, clumps, nb1, nb2, minclumps; 1273 int inbad, n, ncib, printed, sealing, smart; 1274 u32int magic; 1275 uchar *sp, *ep, *p; 1276 vlong boffset, eoffset, lastclumpend, leaked; 1277 vlong offset, toffset, totalcorrupt, v; 1278 Clump cl; 1279 ClumpInfo *bci, *ci, *eci, *xci; 1280 Cit *bcit, *cit, *ecit; 1281 Shabuf oldsha, newsha; 1282 1283 /* 1284 * We expect to find an arena, with data, between offset 1285 * and offset+arenasize. With any luck, the data starts at 1286 * offset+ap.blocksize. The blocks have variable size and 1287 * aren't padded at all, which doesn't give us any alignment 1288 * constraints. The blocks are compressed or high entropy, 1289 * but the headers are pretty low entropy (except the score): 1290 * 1291 * type[1] (range 0 thru 9, 13) 1292 * size[2] 1293 * uncsize[2] (<= size) 1294 * 1295 * so we can look for these. We check the scores as we go, 1296 * so we can't make any wrong turns. If we find ourselves 1297 * in a dead end, scan forward looking for a new start. 1298 */ 1299 1300 resetcibuf(); 1301 memset(head, 0, sizeof *head); 1302 memset(arena, 0, sizeof *arena); 1303 memset(oldscore, 0, VtScoreSize); 1304 memset(score, 0, VtScoreSize); 1305 memset(&oldsha, 0, sizeof oldsha); 1306 memset(&newsha, 0, sizeof newsha); 1307 newsha.rollback = 1; 1308 1309 if(0){ 1310 sbdebug(&oldsha, "old.sha"); 1311 sbdebug(&newsha, "new.sha"); 1312 } 1313 1314 loadarenabasics(offset0, anum, head, arena); 1315 1316 /* start the clump hunt */ 1317 1318 clumps = 0; 1319 totalcorrupt = 0; 1320 sealing = 1; 1321 boffset = offset0 + arena->blocksize; 1322 offset = boffset; 1323 eoffset = offset0+arena->size - arena->blocksize; 1324 toffset = eoffset; 1325 sp = pagein(offset0, 4*M); 1326 1327 if(arena->diskstats.sealed){ 1328 oldsha.offset = offset0; 1329 sbupdate(&oldsha, sp, offset0, 4*M); 1330 } 1331 ep = sp+4*M; 1332 p = sp + (boffset - offset0); 1333 ncib = arena->blocksize / ClumpInfoSize; /* ci per block in index */ 1334 lastclumpend = offset; 1335 nbad = 0; 1336 inbad = 0; 1337 needtozero = 0; 1338 minclumps = 0; 1339 while(offset < eoffset){ 1340 /* 1341 * Shift buffer if we're running out of room. 1342 */ 1343 if(p+70*K >= ep){ 1344 /* 1345 * Start the post SHA1 buffer. By now we should know the 1346 * clumpmagic and arena version, so we can create a 1347 * correct head block to get things going. 1348 */ 1349 if(sealing && fix && newsha.offset == 0){ 1350 newsha.offset = offset0; 1351 if(arena->clumpmagic == 0){ 1352 if(arena->version == 0) 1353 arena->version = ArenaVersion5; 1354 arena->clumpmagic = newclumpmagic(arena->version); 1355 } 1356 head->clumpmagic = arena->clumpmagic; 1357 shahead(&newsha, offset0, head); 1358 } 1359 n = 4*M-256*K; 1360 if(sealing && fix){ 1361 sbdiskhash(&newsha, bufoffset); 1362 sbupdate(&newsha, buf, bufoffset, 4*M-256*K); 1363 } 1364 pagein(bufoffset+n, 4*M); 1365 p -= n; 1366 if(arena->diskstats.sealed) 1367 sbupdate(&oldsha, buf, bufoffset, 4*M); 1368 } 1369 1370 /* 1371 * Check for a clump at p, which is at offset in the disk. 1372 * Duplicate clumps happen in corrupted disks 1373 * (the same pattern gets written many times in a row) 1374 * and should never happen during regular use. 1375 */ 1376 magic = 0; 1377 if((n = isclump(p, &cl, &magic)) > 0){ 1378 /* 1379 * If we were in the middle of some corrupted data, 1380 * flush a warning about it and then add any clump 1381 * info blocks as necessary. 1382 */ 1383 if(inbad){ 1384 inbad = 0; 1385 v = offset-lastclumpend; 1386 if(needtozero){ 1387 zerorange(lastclumpend, v); 1388 sbrollback(&newsha, lastclumpend); 1389 print("corrupt clump data - %#llux+%#llux (%,llud bytes)\n", 1390 lastclumpend, v, v); 1391 } 1392 addcicorrupt(v); 1393 totalcorrupt += v; 1394 nb1 = (minclumps+ncib-1)/ncib; 1395 minclumps += (v+ClumpSize+VtMaxLumpSize-1)/(ClumpSize+VtMaxLumpSize); 1396 nb2 = (minclumps+ncib-1)/ncib; 1397 eoffset -= (nb2-nb1)*arena->blocksize; 1398 } 1399 1400 if(haveclump(cl.info.score)) 1401 print("warning: duplicate clump %d %V at %#llux+%#d\n", cl.info.type, cl.info.score, offset, n); 1402 1403 /* 1404 * If clumps use different magic numbers, we don't care. 1405 * We'll just use the first one we find and make the others 1406 * follow suit. 1407 */ 1408 if(arena->clumpmagic == 0){ 1409 print("clump type %d size %d score %V magic %x\n", 1410 cl.info.type, cl.info.size, cl.info.score, magic); 1411 arena->clumpmagic = magic; 1412 if(magic == _ClumpMagic) 1413 arena->version = ArenaVersion4; 1414 else 1415 arena->version = ArenaVersion5; 1416 } 1417 if(magic != arena->clumpmagic) 1418 p32(p, arena->clumpmagic); 1419 if(clumps == 0) 1420 arena->ctime = cl.time; 1421 1422 /* 1423 * Record the clump, update arena stats, 1424 * grow clump info blocks if needed. 1425 */ 1426 if(verbose > 1) 1427 print("\tclump %d: %d %V at %#llux+%#ux (%d)\n", 1428 clumps, cl.info.type, cl.info.score, offset, n, n); 1429 addcibuf(&cl.info, 0); 1430 if(minclumps%ncib == 0) 1431 eoffset -= arena->blocksize; 1432 minclumps++; 1433 clumps++; 1434 if(cl.encoding != ClumpENone) 1435 arena->diskstats.cclumps++; 1436 arena->diskstats.uncsize += cl.info.uncsize; 1437 arena->wtime = cl.time; 1438 1439 /* 1440 * Move to next clump. 1441 */ 1442 offset += n; 1443 p += n; 1444 lastclumpend = offset; 1445 }else{ 1446 /* 1447 * Overwrite malformed clump data with zeros later. 1448 * For now, just record whether it needs to be overwritten. 1449 * Bad regions must be of size at least ClumpSize. 1450 * Postponing the overwriting keeps us from writing past 1451 * the end of the arena data (which might be directory data) 1452 * with zeros. 1453 */ 1454 if(!inbad){ 1455 inbad = 1; 1456 needtozero = 0; 1457 if(memcmp(p, zero, ClumpSize) != 0) 1458 needtozero = 1; 1459 p += ClumpSize; 1460 offset += ClumpSize; 1461 nbad++; 1462 }else{ 1463 if(*p != 0) 1464 needtozero = 1; 1465 p++; 1466 offset++; 1467 } 1468 } 1469 } 1470 pageout(); 1471 1472 if(verbose) 1473 print("readable clumps: %d; min. directory entries: %d\n", 1474 clumps, minclumps); 1475 arena->diskstats.used = lastclumpend - boffset; 1476 leaked = eoffset - lastclumpend; 1477 if(verbose) 1478 print("used from %#llux to %#llux = %,lld (%,lld unused)\n", 1479 boffset, lastclumpend, arena->diskstats.used, leaked); 1480 1481 /* 1482 * Finish the SHA1 of the old data. 1483 */ 1484 if(arena->diskstats.sealed){ 1485 sbdiskhash(&oldsha, toffset); 1486 readdisk(dbuf, toffset, arena->blocksize); 1487 scorecp(dbuf+arena->blocksize-VtScoreSize, zero); 1488 sbupdate(&oldsha, dbuf, toffset, arena->blocksize); 1489 sbscore(&oldsha, oldscore); 1490 } 1491 1492 /* 1493 * If we still don't know the clump magic, the arena 1494 * must be empty. It still needs a value, so make 1495 * something up. 1496 */ 1497 if(arena->version == 0) 1498 arena->version = ArenaVersion5; 1499 if(arena->clumpmagic == 0){ 1500 if(arena->version == ArenaVersion4) 1501 arena->clumpmagic = _ClumpMagic; 1502 else{ 1503 do 1504 arena->clumpmagic = fastrand(); 1505 while(arena->clumpmagic==_ClumpMagic 1506 ||arena->clumpmagic==0); 1507 } 1508 head->clumpmagic = arena->clumpmagic; 1509 } 1510 1511 /* 1512 * Guess at number of clumpinfo blocks to load. 1513 * If we guess high, it's no big deal. If we guess low, 1514 * we'll be forced into rewriting the whole directory. 1515 * Still not such a big deal. 1516 */ 1517 if(clumps == 0 || arena->diskstats.used == totalcorrupt) 1518 goto Nocib; 1519 if(clumps < arena->diskstats.clumps) 1520 clumps = arena->diskstats.clumps; 1521 if(clumps < ncibuf) 1522 clumps = ncibuf; 1523 clumps += totalcorrupt/ 1524 ((arena->diskstats.used - totalcorrupt)/clumps); 1525 clumps += totalcorrupt/2000; 1526 if(clumps < minclumps) 1527 clumps = minclumps; 1528 clumps += ncib-1; 1529 clumps -= clumps%ncib; 1530 1531 /* 1532 * Can't write into the actual data. 1533 */ 1534 v = offset0 + arena->size - arena->blocksize; 1535 v -= (clumps+ncib-1)/ncib * arena->blocksize; 1536 if(v < lastclumpend){ 1537 v = offset0 + arena->size - arena->blocksize; 1538 clumps = (v-lastclumpend)/arena->blocksize * ncib; 1539 } 1540 1541 if(clumps < minclumps) 1542 print("cannot happen?\n"); 1543 1544 /* 1545 * Check clumpinfo blocks against directory we created. 1546 * The tricky part is handling the corrupt sections of arena. 1547 * If possible, we remark just the affected directory entries 1548 * rather than slide everything down. 1549 * 1550 * Allocate clumps+1 blocks and check that we don't need 1551 * the last one at the end. 1552 */ 1553 bci = loadci(offset0, arena, clumps+1); 1554 eci = bci+clumps+1; 1555 bcit = cibuf; 1556 ecit = cibuf+ncibuf; 1557 1558 smart = 0; /* Somehow the smart code doesn't do corrupt clumps right. */ 1559 Again: 1560 nbad = 0; 1561 ci = bci; 1562 for(cit=bcit; cit<ecit && ci<eci; cit++){ 1563 if(cit->corrupt){ 1564 vlong n, m; 1565 if(smart){ 1566 /* 1567 * If we can, just mark existing entries as corrupt. 1568 */ 1569 n = cit->corrupt; 1570 for(xci=ci; n>0 && xci<eci; xci++) 1571 n -= ClumpSize+xci->size; 1572 if(n > 0 || xci >= eci) 1573 goto Dumb; 1574 printed = 0; 1575 for(; ci<xci; ci++){ 1576 if(verbose && ci->type != VtCorruptType){ 1577 if(!printed){ 1578 print("marking directory %d-%d as corrupt\n", 1579 (int)(ci-bci), (int)(xci-bci)); 1580 printed = 1; 1581 } 1582 print("\ttype=%d size=%d uncsize=%d score=%V\n", 1583 ci->type, ci->size, ci->uncsize, ci->score); 1584 } 1585 ci->type = VtCorruptType; 1586 } 1587 }else{ 1588 Dumb: 1589 print("\trewriting clump directory\n"); 1590 /* 1591 * Otherwise, blaze a new trail. 1592 */ 1593 n = cit->corrupt; 1594 while(n > 0 && ci < eci){ 1595 if(n < ClumpSize) 1596 sysfatal("bad math in clump corrupt"); 1597 if(n <= VtMaxLumpSize+ClumpSize) 1598 m = n; 1599 else{ 1600 m = VtMaxLumpSize+ClumpSize; 1601 if(n-m < ClumpSize) 1602 m -= ClumpSize; 1603 } 1604 ci->type = VtCorruptType; 1605 ci->size = m-ClumpSize; 1606 ci->uncsize = m-ClumpSize; 1607 memset(ci->score, 0, VtScoreSize); 1608 ci++; 1609 n -= m; 1610 } 1611 } 1612 continue; 1613 } 1614 if(clumpinfocmp(&cit->ci, ci) != 0){ 1615 if(verbose && (smart || verbose>1)){ 1616 print("clumpinfo %d\n", (int)(ci-bci)); 1617 print("\twant: %d %d %d %V\n", 1618 cit->ci.type, cit->ci.size, 1619 cit->ci.uncsize, cit->ci.score); 1620 print("\thave: %d %d %d %V\n", 1621 ci->type, ci->size, 1622 ci->uncsize, ci->score); 1623 } 1624 *ci = cit->ci; 1625 nbad++; 1626 } 1627 ci++; 1628 } 1629 if(ci >= eci || cit < ecit){ 1630 print("ran out of space editing existing directory; rewriting\n"); 1631 print("# eci %ld ci %ld ecit %ld cit %ld\n", eci-bci, ci-bci, ecit-bcit, cit-bcit); 1632 assert(smart); /* can't happen second time thru */ 1633 smart = 0; 1634 goto Again; 1635 } 1636 1637 assert(ci <= eci); 1638 arena->diskstats.clumps = ci-bci; 1639 eoffset = writeci(offset0, arena, bci, ci-bci); 1640 if(sealing && fix) 1641 sbrollback(&newsha, v); 1642 print("eoffset=%lld lastclumpend=%lld diff=%lld unseal=%d\n", eoffset, lastclumpend, eoffset-lastclumpend, unseal); 1643 if(lastclumpend > eoffset) 1644 print("arena directory overwrote blocks! cannot happen!\n"); 1645 free(bci); 1646 if(smart && nbad) 1647 print("arena directory has %d bad or missing entries\n", nbad); 1648 Nocib: 1649 if(eoffset - lastclumpend > 64*1024 && (!arena->diskstats.sealed || unseal)){ 1650 if(arena->diskstats.sealed) 1651 print("unsealing arena\n"); 1652 sealing = 0; 1653 memset(oldscore, 0, VtScoreSize); 1654 } 1655 1656 /* 1657 * Finish the SHA1 of the new data - only meaningful 1658 * if we've been writing to disk (`fix'). 1659 */ 1660 arena->diskstats.sealed = sealing; 1661 arena->memstats = arena->diskstats; 1662 if(sealing && fix){ 1663 uchar tbuf[MaxDiskBlock]; 1664 1665 sbdiskhash(&newsha, toffset); 1666 memset(tbuf, 0, sizeof tbuf); 1667 packarena(arena, tbuf); 1668 sbupdate(&newsha, tbuf, toffset, arena->blocksize); 1669 sbscore(&newsha, score); 1670 } 1671 } 1672 1673 void 1674 dumparena(vlong offset, int anum, Arena *arena) 1675 { 1676 char buf[1000]; 1677 vlong o, e; 1678 int fd, n; 1679 1680 snprint(buf, sizeof buf, "%s.%d", dumpbase, anum); 1681 if((fd = create(buf, OWRITE, 0666)) < 0){ 1682 fprint(2, "create %s: %r\n", buf); 1683 return; 1684 } 1685 e = offset+arena->size; 1686 for(o=offset; o<e; o+=n){ 1687 n = 4*M; 1688 if(o+n > e) 1689 n = e-o; 1690 if(pwrite(fd, pagein(o, n), n, o-offset) != n){ 1691 fprint(2, "write %s at %#llux: %r\n", buf, o-offset); 1692 return; 1693 } 1694 } 1695 } 1696 1697 void 1698 checkarena(vlong offset, int anum) 1699 { 1700 uchar dbuf[MaxDiskBlock]; 1701 uchar *p, oldscore[VtScoreSize], score[VtScoreSize]; 1702 Arena arena, oarena; 1703 ArenaHead head; 1704 Info *fmt, *fmta; 1705 int sz; 1706 1707 print("# arena %d: offset %#llux\n", anum, offset); 1708 1709 if(offset >= partend){ 1710 print("arena offset out of bounds\n"); 1711 return; 1712 } 1713 1714 guessarena(offset, anum, &head, &arena, oldscore, score); 1715 1716 if(verbose){ 1717 print("#\tversion=%d name=%s blocksize=%d size=%z", 1718 head.version, head.name, head.blocksize, head.size); 1719 if(head.clumpmagic) 1720 print(" clumpmagic=%#.8ux", head.clumpmagic); 1721 print("\n#\tclumps=%d cclumps=%d used=%,lld uncsize=%,lld\n", 1722 arena.diskstats.clumps, arena.diskstats.cclumps, 1723 arena.diskstats.used, arena.diskstats.uncsize); 1724 print("#\tctime=%t\n", arena.ctime); 1725 print("#\twtime=%t\n", arena.wtime); 1726 if(arena.diskstats.sealed) 1727 print("#\tsealed score=%V\n", score); 1728 } 1729 1730 if(dumpbase){ 1731 dumparena(offset, anum, &arena); 1732 return; 1733 } 1734 1735 memset(dbuf, 0, sizeof dbuf); 1736 packarenahead(&head, dbuf); 1737 p = pagein(offset, arena.blocksize); 1738 if(memcmp(dbuf, p, arena.blocksize) != 0){ 1739 print("on-disk arena header incorrect\n"); 1740 showdiffs(dbuf, p, arena.blocksize, 1741 arena.version==ArenaVersion4 ? headinfo4 : headinfo5); 1742 } 1743 memmove(p, dbuf, arena.blocksize); 1744 1745 memset(dbuf, 0, sizeof dbuf); 1746 packarena(&arena, dbuf); 1747 if(arena.diskstats.sealed) 1748 scorecp(dbuf+arena.blocksize-VtScoreSize, score); 1749 p = pagein(offset+arena.size-arena.blocksize, arena.blocksize); 1750 memset(&oarena, 0, sizeof oarena); 1751 unpackarena(&oarena, p); 1752 if(arena.version == ArenaVersion4){ 1753 sz = ArenaSize4; 1754 fmt = tailinfo4; 1755 fmta = tailinfo4a; 1756 }else{ 1757 sz = ArenaSize5; 1758 fmt = tailinfo5; 1759 fmta = tailinfo5a; 1760 } 1761 if(p[sz] == 1){ 1762 fmt = fmta; 1763 if(oarena.diskstats.sealed){ 1764 /* 1765 * some arenas were sealed with the extension 1766 * before we adopted the convention that if it didn't 1767 * add new information it gets dropped. 1768 */ 1769 _packarena(&arena, dbuf, 1); 1770 } 1771 } 1772 if(memcmp(dbuf, p, arena.blocksize-VtScoreSize) != 0){ 1773 print("on-disk arena tail incorrect\n"); 1774 showdiffs(dbuf, p, arena.blocksize-VtScoreSize, fmt); 1775 } 1776 if(arena.diskstats.sealed){ 1777 if(oarena.diskstats.sealed) 1778 if(scorecmp(p+arena.blocksize-VtScoreSize, oldscore) != 0){ 1779 print("on-disk arena seal score incorrect\n"); 1780 print("\tcorrect=%V\n", oldscore); 1781 print("\t disk=%V\n", p+arena.blocksize-VtScoreSize); 1782 } 1783 if(fix && scorecmp(p+arena.blocksize-VtScoreSize, score) != 0){ 1784 print("%ssealing arena%s: %V\n", 1785 oarena.diskstats.sealed ? "re" : "", 1786 scorecmp(oldscore, score) == 0 ? 1787 "" : " after changes", score); 1788 } 1789 } 1790 memmove(p, dbuf, arena.blocksize); 1791 1792 pageout(); 1793 } 1794 1795 AMapN* 1796 buildamap(void) 1797 { 1798 uchar *p; 1799 vlong o; 1800 ArenaHead h; 1801 AMapN *an; 1802 AMap *m; 1803 1804 an = vtmallocz(sizeof *an); 1805 for(o=ap.arenabase; o<partend; o+=arenasize){ 1806 p = pagein(o, Block); 1807 if(unpackarenahead(&h, p) >= 0){ 1808 an->map = vtrealloc(an->map, (an->n+1)*sizeof an->map[0]); 1809 m = &an->map[an->n++]; 1810 m->start = o; 1811 m->stop = o+h.size; 1812 strcpy(m->name, h.name); 1813 } 1814 } 1815 return an; 1816 } 1817 1818 void 1819 checkmap(void) 1820 { 1821 char *s; 1822 uchar *p; 1823 int i, len; 1824 AMapN *an; 1825 Fmt fmt; 1826 1827 an = buildamap(); 1828 fmtstrinit(&fmt); 1829 fmtprint(&fmt, "%ud\n", an->n); 1830 for(i=0; i<an->n; i++) 1831 fmtprint(&fmt, "%s\t%lld\t%lld\n", 1832 an->map[i].name, an->map[i].start, an->map[i].stop); 1833 s = fmtstrflush(&fmt); 1834 len = strlen(s); 1835 if(len > ap.tabsize){ 1836 print("arena partition map too long: need %z bytes have %z\n", 1837 (vlong)len, (vlong)ap.tabsize); 1838 len = ap.tabsize; 1839 } 1840 1841 if(ap.tabsize >= 4*M){ /* can't happen - max arenas is 2000 */ 1842 print("arena partition map *way* too long\n"); 1843 return; 1844 } 1845 1846 p = pagein(ap.tabbase, ap.tabsize); 1847 if(memcmp(p, s, len) != 0){ 1848 print("arena partition map incorrect; rewriting.\n"); 1849 memmove(p, s, len); 1850 } 1851 pageout(); 1852 } 1853 1854 int mainstacksize = 512*1024; 1855 1856 void 1857 threadmain(int argc, char **argv) 1858 { 1859 int mode; 1860 1861 mode = OREAD; 1862 readonly = 1; 1863 ARGBEGIN{ 1864 case 'U': 1865 unseal = 1; 1866 break; 1867 case 'a': 1868 arenasize = unittoull(EARGF(usage())); 1869 break; 1870 case 'b': 1871 ap.blocksize = unittoull(EARGF(usage())); 1872 break; 1873 case 'f': 1874 fix = 1; 1875 mode = ORDWR; 1876 readonly = 0; 1877 break; 1878 case 'n': 1879 basename = EARGF(usage()); 1880 break; 1881 case 'v': 1882 verbose++; 1883 break; 1884 case 'x': 1885 dumpbase = EARGF(usage()); 1886 break; 1887 default: 1888 usage(); 1889 }ARGEND 1890 1891 if(argc != 1 && argc != 2) 1892 usage(); 1893 1894 file = argv[0]; 1895 1896 ventifmtinstall(); 1897 fmtinstall('z', zfmt); 1898 fmtinstall('t', tfmt); 1899 quotefmtinstall(); 1900 1901 part = initpart(file, mode|ODIRECT); 1902 if(part == nil) 1903 sysfatal("can't open %s: %r", file); 1904 partend = part->size; 1905 1906 if(isonearena()){ 1907 checkarena(0, -1); 1908 threadexitsall(nil); 1909 } 1910 checkarenas(argc > 1 ? argv[1] : nil); 1911 checkmap(); 1912 threadexitsall(nil); 1913 }