plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

part.c (12618B)


      1 #ifdef PLAN9PORT	/* SORRY! */
      2 #	include <u.h>
      3 #	include <sys/types.h>
      4 #	ifdef __linux__	/* REALLY SORRY! */
      5 #		define CANBLOCKSIZE 1
      6 #		include <sys/vfs.h>
      7 #	elif defined(__FreeBSD__)
      8 #		define CANBLOCKSIZE 1
      9 #		include <sys/param.h>
     10 #		include <sys/stat.h>
     11 #		include <sys/mount.h>
     12 #	endif
     13 #endif
     14 #include "stdinc.h"
     15 #include <ctype.h>
     16 #include "dat.h"
     17 #include "fns.h"
     18 
     19 u32int	maxblocksize;
     20 int	readonly;
     21 
     22 int findsubpart(Part *part, char *name);
     23 
     24 static int
     25 strtoullsuf(char *p, char **pp, int rad, u64int *u)
     26 {
     27 	u64int v;
     28 
     29 	if(!isdigit((uchar)*p))
     30 		return -1;
     31 	v = strtoull(p, &p, rad);
     32 	switch(*p){
     33 	case 'k':
     34 	case 'K':
     35 		v *= 1024;
     36 		p++;
     37 		break;
     38 	case 'm':
     39 	case 'M':
     40 		v *= 1024*1024;
     41 		p++;
     42 		break;
     43 	case 'g':
     44 	case 'G':
     45 		v *= 1024*1024*1024;
     46 		p++;
     47 		break;
     48 	case 't':
     49 	case 'T':
     50 		v *= 1024*1024;
     51 		v *= 1024*1024;
     52 		p++;
     53 		break;
     54 	}
     55 	*pp = p;
     56 	*u = v;
     57 	return 0;
     58 }
     59 
     60 static int
     61 parsepart(char *name, char **file, char **subpart, u64int *lo, u64int *hi)
     62 {
     63 	char *p;
     64 
     65 	*file = estrdup(name);
     66 	*lo = 0;
     67 	*hi = 0;
     68 	*subpart = nil;
     69 	if((p = strrchr(*file, ':')) == nil)
     70 		return 0;
     71 	*p++ = 0;
     72 	if(isalpha(*p)){
     73 		*subpart = p;
     74 		return 0;
     75 	}
     76 	if(*p == '-')
     77 		*lo = 0;
     78 	else{
     79 		if(strtoullsuf(p, &p, 0, lo) < 0){
     80 			free(*file);
     81 			return -1;
     82 		}
     83 	}
     84 	if(*p == '-')
     85 		p++;
     86 	if(*p == 0){
     87 		*hi = 0;
     88 		return 0;
     89 	}
     90 	if(strtoullsuf(p, &p, 0, hi) < 0 || *p != 0){
     91 		free(*file);
     92 		return -1;
     93 	}
     94 	return 0;
     95 }
     96 
     97 #undef min
     98 #define min(a, b) ((a) < (b) ? (a) : (b))
     99 Part*
    100 initpart(char *name, int mode)
    101 {
    102 	Part *part;
    103 	Dir *dir;
    104 	char *file, *subname;
    105 	u64int lo, hi;
    106 
    107 	if(parsepart(name, &file, &subname, &lo, &hi) < 0){
    108 		werrstr("cannot parse name %s", name);
    109 		return nil;
    110 	}
    111 	trace(TraceDisk, "initpart %s file %s lo 0x%llx hi 0x%llx", name, file, lo, hi);
    112 	part = MKZ(Part);
    113 	part->name = estrdup(name);
    114 	part->filename = estrdup(file);
    115 	if(readonly){
    116 		mode &= ~(OREAD|OWRITE|ORDWR);
    117 		mode |= OREAD;
    118 	}
    119 #ifdef __linux__	/* sorry, but linus made O_DIRECT unusable! */
    120 	mode &= ~ODIRECT;
    121 #endif
    122 	part->fd = open(file, mode);
    123 	if(part->fd < 0){
    124 		if((mode&(OREAD|OWRITE|ORDWR)) == ORDWR)
    125 			part->fd = open(file, (mode&~ORDWR)|OREAD);
    126 		if(part->fd < 0){
    127 			freepart(part);
    128 			fprint(2, "can't open partition='%s': %r\n", file);
    129 			seterr(EOk, "can't open partition='%s': %r", file);
    130 			fprint(2, "%r\n");
    131 			free(file);
    132 			return nil;
    133 		}
    134 		fprint(2, "warning: %s opened for reading only\n", name);
    135 	}
    136 	part->offset = lo;
    137 	dir = dirfstat(part->fd);
    138 	if(dir == nil){
    139 		freepart(part);
    140 		seterr(EOk, "can't stat partition='%s': %r", file);
    141 		free(file);
    142 		return nil;
    143 	}
    144 	if(dir->length == 0){
    145 		free(dir);
    146 		dir = dirstat(file);
    147 		if(dir == nil || dir->length == 0) {
    148 			freepart(part);
    149 			seterr(EOk, "can't determine size of partition %s", file);
    150 			free(file);
    151 			return nil;
    152 		}
    153 	}
    154 	if(dir->length < hi || dir->length < lo){
    155 		freepart(part);
    156 		seterr(EOk, "partition '%s': bounds out of range (max %lld)", name, dir->length);
    157 		free(dir);
    158 		free(file);
    159 		return nil;
    160 	}
    161 	if(hi == 0)
    162 		hi = dir->length;
    163 	part->size = hi - part->offset;
    164 #ifdef CANBLOCKSIZE
    165 	{
    166 		struct statfs sfs;
    167 		if(fstatfs(part->fd, &sfs) >= 0 && sfs.f_bsize > 512)
    168 			part->fsblocksize = sfs.f_bsize;
    169 	}
    170 #endif
    171 
    172 	part->fsblocksize = min(part->fsblocksize, MaxIo);
    173 
    174 	if(subname && findsubpart(part, subname) < 0){
    175 		werrstr("cannot find subpartition %s", subname);
    176 		freepart(part);
    177 		return nil;
    178 	}
    179 	free(dir);
    180 	return part;
    181 }
    182 
    183 int
    184 flushpart(Part *part)
    185 {
    186 	USED(part);
    187 #ifdef __linux__	/* grrr! */
    188 	if(fsync(part->fd) < 0){
    189 		logerr(EAdmin, "flushpart %s: %r", part->name);
    190 		return -1;
    191 	}
    192 	posix_fadvise(part->fd, 0, 0, POSIX_FADV_DONTNEED);
    193 #endif
    194 	return 0;
    195 }
    196 
    197 void
    198 freepart(Part *part)
    199 {
    200 	if(part == nil)
    201 		return;
    202 	if(part->fd >= 0)
    203 		close(part->fd);
    204 	free(part->name);
    205 	free(part);
    206 }
    207 
    208 void
    209 partblocksize(Part *part, u32int blocksize)
    210 {
    211 	if(part->blocksize)
    212 		sysfatal("resetting partition=%s's block size", part->name);
    213 	part->blocksize = blocksize;
    214 	if(blocksize > maxblocksize)
    215 		maxblocksize = blocksize;
    216 }
    217 
    218 /*
    219  * Read/write some amount of data between a block device or file and a memory buffer.
    220  *
    221  * Most Unix systems require that when accessing a block device directly,
    222  * the buffer, offset, and count are all multiples of the device block size,
    223  * making this a lot more complicated than it otherwise would be.
    224  *
    225  * Most of our callers will make things easy on us, but for some callers it's best
    226  * if we just do the work here, with only one place to get it right (hopefully).
    227  *
    228  * If everything is aligned properly, prwb will try to do big transfers in the main
    229  * body of the loop: up to MaxIo bytes at a time.  If everything isn't aligned properly,
    230  * we work one block at a time.
    231  */
    232 int
    233 prwb(char *name, int fd, int isread, u64int offset, void *vbuf, u32int count, u32int blocksize)
    234 {
    235 	char *op;
    236 	u8int *buf, *freetmp, *dst;
    237 	u32int icount, opsize;
    238 	int r, count1;
    239 
    240 
    241 #ifndef PLAN9PORT
    242 	USED(blocksize);
    243 	icount = count;
    244 	buf = vbuf;
    245 	op = isread ? "read" : "write";
    246 	dst = buf;
    247 	freetmp = nil;
    248 	while(count > 0){
    249 		opsize = min(count, 131072 /* blocksize */);
    250 		if(isread)
    251 			r = pread(fd, dst, opsize, offset);
    252 		else
    253 			r = pwrite(fd, dst, opsize, offset);
    254 		if(r <= 0)
    255 			goto Error;
    256 		offset += r;
    257 		count -= r;
    258 		dst += r;
    259 		if(r != opsize)
    260 			goto Error;
    261 	}
    262 	return icount;
    263 #else
    264 	u32int c, delta;
    265 	u8int *tmp;
    266 
    267 	icount = count;
    268 	buf = vbuf;
    269 	tmp = nil;
    270 	freetmp = nil;
    271 	opsize = blocksize;
    272 
    273 	if(count == 0){
    274 		logerr(EStrange, "pwrb %s called to %s 0 bytes", name, isread ? "read" : "write");
    275 		return 0;
    276 	}
    277 
    278 	assert(blocksize > 0);
    279 
    280 	/* allocate blocksize-aligned temp buffer if needed */
    281 	if((ulong)offset%blocksize || (ulong)buf%blocksize || count%blocksize){
    282 		if((freetmp = malloc(blocksize*2)) == nil)
    283 			return -1;
    284 		tmp = freetmp;
    285 		tmp += blocksize - (ulong)tmp%blocksize;
    286 	}
    287 
    288 	/* handle beginning fringe */
    289 	if((delta = (ulong)offset%blocksize) != 0){
    290 		assert(tmp != nil);
    291 		if((r=pread(fd, tmp, blocksize, offset-delta)) != blocksize){
    292 			dst = tmp;
    293 			offset = offset-delta;
    294 			op = "read";
    295 			count1 = blocksize;
    296 			goto Error;
    297 		}
    298 		c = min(count, blocksize-delta);
    299 		assert(c > 0 && c < blocksize);
    300 		if(isread)
    301 			memmove(buf, tmp+delta, c);
    302 		else{
    303 			memmove(tmp+delta, buf, c);
    304 			if((r=pwrite(fd, tmp, blocksize, offset-delta)) != blocksize){
    305 				dst = tmp;
    306 				offset = offset-delta;
    307 				op = "read";
    308 				count1 = blocksize;
    309 				goto Error;
    310 			}
    311 		}
    312 		assert(c > 0);
    313 		offset += c;
    314 		buf += c;
    315 		count -= c;
    316 	}
    317 
    318 	/* handle full blocks */
    319 	while(count >= blocksize){
    320 		assert((ulong)offset%blocksize == 0);
    321 		if((ulong)buf%blocksize){
    322 			assert(tmp != nil);
    323 			dst = tmp;
    324 			opsize = blocksize;
    325 		}else{
    326 			dst = buf;
    327 			opsize = count - count%blocksize;
    328 			if(opsize > MaxIo)
    329 				opsize = MaxIo;
    330 		}
    331 		if(isread){
    332 			if((r=pread(fd, dst, opsize, offset))<=0 || r%blocksize){
    333 				op = "read";
    334 				count1 = opsize;
    335 				goto Error;
    336 			}
    337 			if(dst == tmp){
    338 				assert(r == blocksize);
    339 				memmove(buf, tmp, blocksize);
    340 			}
    341 		}else{
    342 			if(dst == tmp){
    343 				assert(opsize == blocksize);
    344 				memmove(dst, buf, blocksize);
    345 			}
    346 			if((r=pwrite(fd, dst, opsize, offset))<=0 || r%blocksize){
    347 				count1 = opsize;
    348 				op = "write";
    349 				goto Error;
    350 			}
    351 			if(dst == tmp)
    352 				assert(r == blocksize);
    353 		}
    354 		assert(r > 0);
    355 		offset += r;
    356 		buf += r;
    357 		count -= r;
    358 	}
    359 
    360 	/* handle ending fringe */
    361 	if(count > 0){
    362 		assert((ulong)offset%blocksize == 0);
    363 		assert(tmp != nil);
    364 		/*
    365 		 * Complicated condition: if we're reading it's okay to get less than
    366 		 * a block as long as it's enough to satisfy the read - maybe this is
    367 		 * a normal file.  (We never write to normal files, or else things would
    368 		 * be even more complicated.)
    369 		 */
    370 		r = pread(fd, tmp, blocksize, offset);
    371 		if((isread && r < count) || (!isread && r != blocksize)){
    372 print("FAILED isread=%d r=%d count=%d blocksize=%d\n", isread, r, count, blocksize);
    373 			dst = tmp;
    374 			op = "read";
    375 			count1 = blocksize;
    376 			goto Error;
    377 		}
    378 		if(isread)
    379 			memmove(buf, tmp, count);
    380 		else{
    381 			memmove(tmp, buf, count);
    382 			if(pwrite(fd, tmp, blocksize, offset) != blocksize){
    383 				dst = tmp;
    384 				count1 = blocksize;
    385 				op = "write";
    386 				goto Error;
    387 			}
    388 		}
    389 	}
    390 	if(freetmp)
    391 		free(freetmp);
    392 	return icount;
    393 #endif
    394 
    395 Error:
    396 	seterr(EAdmin, "%s %s offset 0x%llux count %ud buf %p returned %d: %r",
    397 		op, name, offset, count1, dst, r);
    398 	if(freetmp)
    399 		free(freetmp);
    400 	return -1;
    401 }
    402 
    403 #ifndef PLAN9PORT
    404 static int sdreset(Part*);
    405 static int reopen(Part*);
    406 static int threadspawnl(int[3], char*, char*, ...);
    407 #endif
    408 
    409 int
    410 rwpart(Part *part, int isread, u64int offset, u8int *buf, u32int count)
    411 {
    412 	int n, try;
    413 	u32int blocksize;
    414 
    415 	trace(TraceDisk, "%s %s %ud at 0x%llx",
    416 		isread ? "read" : "write", part->name, count, offset);
    417 	if(offset >= part->size || offset+count > part->size){
    418 		seterr(EStrange, "out of bounds %s offset 0x%llux count %ud to partition %s size 0x%llux",
    419 			isread ? "read" : "write", offset, count, part->name, part->size);
    420 		return -1;
    421 	}
    422 
    423 	blocksize = part->fsblocksize;
    424 	if(blocksize == 0)
    425 		blocksize = part->blocksize;
    426 	if(blocksize == 0)
    427 		blocksize = 4096;
    428 
    429 	for(try=0;; try++){
    430 		n = prwb(part->filename, part->fd, isread, part->offset+offset, buf, count, blocksize);
    431 		if(n >= 0 || try > 10)
    432 			break;
    433 
    434 #ifndef PLAN9PORT
    435 	    {
    436 		char err[ERRMAX];
    437 		/*
    438 		 * This happens with the sdmv disks frustratingly often.
    439 		 * Try to fix things up and continue.
    440 		 */
    441 		rerrstr(err, sizeof err);
    442 		if(strstr(err, "i/o timeout") || strstr(err, "i/o error") || strstr(err, "partition has changed")){
    443 			reopen(part);
    444 			continue;
    445 		}
    446 	    }
    447 #endif
    448 		break;
    449 	}
    450 #ifdef __linux__	/* sigh */
    451 	posix_fadvise(part->fd, part->offset+offset, n, POSIX_FADV_DONTNEED);
    452 #endif
    453 	return n;
    454 }
    455 int
    456 readpart(Part *part, u64int offset, u8int *buf, u32int count)
    457 {
    458 	return rwpart(part, 1, offset, buf, count);
    459 }
    460 
    461 int
    462 writepart(Part *part, u64int offset, u8int *buf, u32int count)
    463 {
    464 	return rwpart(part, 0, offset, buf, count);
    465 }
    466 
    467 ZBlock*
    468 readfile(char *name)
    469 {
    470 	Part *p;
    471 	ZBlock *b;
    472 
    473 	p = initpart(name, OREAD);
    474 	if(p == nil)
    475 		return nil;
    476 	b = alloczblock(p->size, 0, p->blocksize);
    477 	if(b == nil){
    478 		seterr(EOk, "can't alloc %s: %r", name);
    479 		freepart(p);
    480 		return nil;
    481 	}
    482 	if(readpart(p, 0, b->data, p->size) < 0){
    483 		seterr(EOk, "can't read %s: %r", name);
    484 		freepart(p);
    485 		freezblock(b);
    486 		return nil;
    487 	}
    488 	freepart(p);
    489 	return b;
    490 }
    491 
    492 /*
    493  * Search for the Plan 9 partition with the given name.
    494  * This lets you write things like /dev/ad4:arenas
    495  * if you move a disk from a Plan 9 system to a FreeBSD system.
    496  *
    497  * God I hope I never write this code again.
    498  */
    499 #define MAGIC "plan9 partitions"
    500 static int
    501 tryplan9part(Part *part, char *name)
    502 {
    503 	uchar buf[512];
    504 	char *line[40], *f[4];
    505 	int i, n;
    506 	vlong start, end;
    507 
    508 	/*
    509 	 * Partition table in second sector.
    510 	 * Could also look on 2nd last sector and last sector,
    511 	 * but those disks died out long before venti came along.
    512 	 */
    513 	if(readpart(part, 512, buf, 512) != 512)
    514 		return -1;
    515 
    516 	/* Plan 9 partition table is just text strings */
    517 	if(strncmp((char*)buf, "part ", 5) != 0)
    518 		return -1;
    519 
    520 	buf[511] = 0;
    521 	n = getfields((char*)buf, line, 40, 1, "\n");
    522 	for(i=0; i<n; i++){
    523 		if(getfields(line[i], f, 4, 1, " ") != 4)
    524 			break;
    525 		if(strcmp(f[0], "part") != 0)
    526 			break;
    527 		if(strcmp(f[1], name) == 0){
    528 			start = 512*strtoll(f[2], 0, 0);
    529 			end = 512*strtoll(f[3], 0, 0);
    530 			if(start  < end && end <= part->size){
    531 				part->offset += start;
    532 				part->size = end - start;
    533 				return 0;
    534 			}
    535 			return -1;
    536 		}
    537 	}
    538 	return -1;
    539 }
    540 
    541 #define	GSHORT(p)	(((p)[1]<<8)|(p)[0])
    542 #define	GLONG(p)	((GSHORT(p+2)<<16)|GSHORT(p))
    543 
    544 typedef struct Dospart Dospart;
    545 struct Dospart
    546 {
    547 	uchar flag;		/* active flag */
    548 	uchar shead;		/* starting head */
    549 	uchar scs[2];		/* starting cylinder/sector */
    550 	uchar type;		/* partition type */
    551 	uchar ehead;		/* ending head */
    552 	uchar ecs[2];		/* ending cylinder/sector */
    553 	uchar offset[4];		/* starting sector */
    554 	uchar size[4];		/* length in sectors */
    555 };
    556 
    557 
    558 int
    559 findsubpart(Part *part, char *name)
    560 {
    561 	int i;
    562 	uchar buf[512];
    563 	u64int size;
    564 	Dospart *dp;
    565 
    566 	/* See if this is a Plan 9 partition. */
    567 	if(tryplan9part(part, name) >= 0)
    568 		return 0;
    569 
    570 	/* Otherwise try for an MBR and then narrow to Plan 9 partition. */
    571 	if(readpart(part, 0, buf, 512) != 512)
    572 		return -1;
    573 	if(buf[0x1FE] != 0x55 || buf[0x1FF] != 0xAA)
    574 		return -1;
    575 	dp = (Dospart*)(buf+0x1BE);
    576 	size = part->size;
    577 	for(i=0; i<4; i++){
    578 		if(dp[i].type == '9'){
    579 			part->offset = 512LL*GLONG(dp[i].offset);
    580 			part->size = 512LL*GLONG(dp[i].size);
    581 			if(tryplan9part(part, name) >= 0)
    582 				return 0;
    583 			part->offset = 0;
    584 			part->size = size;
    585 		}
    586 		/* Not implementing extended partitions - enough is enough. */
    587 	}
    588 	return -1;
    589 }