plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

html.h (16156B)


      1 #ifndef _HTML_H_
      2 #define _HTML_H_ 1
      3 #ifdef __cplusplus
      4 extern "C" {
      5 #endif
      6 
      7 AUTOLIB(html)
      8 /*
      9  #pragma lib "libhtml.a"
     10  #pragma src "/sys/src/libhtml"
     11 */
     12 
     13 /* UTILS */
     14 extern uchar*	fromStr(Rune* buf, int n, int chset);
     15 extern Rune*	toStr(uchar* buf, int n, int chset);
     16 
     17 /* Common LEX and BUILD enums */
     18 
     19 /* Media types */
     20 enum
     21 {
     22 	ApplMsword,
     23 	ApplOctets,
     24 	ApplPdf,
     25 	ApplPostscript,
     26 	ApplRtf,
     27 	ApplFramemaker,
     28 	ApplMsexcel,
     29 	ApplMspowerpoint,
     30 	UnknownType,
     31 	Audio32kadpcm,
     32 	AudioBasic,
     33 	ImageCgm,
     34 	ImageG3fax,
     35 	ImageGif,
     36 	ImageIef,
     37 	ImageJpeg,
     38 	ImagePng,
     39 	ImageTiff,
     40 	ImageXBit,
     41 	ImageXBit2,
     42 	ImageXBitmulti,
     43 	ImageXXBitmap,
     44 	ModelVrml,
     45 	MultiDigest,
     46 	MultiMixed,
     47 	TextCss,
     48 	TextEnriched,
     49 	TextHtml,
     50 	TextJavascript,
     51 	TextPlain,
     52 	TextRichtext,
     53 	TextSgml,
     54 	TextTabSeparatedValues,
     55 	TextXml,
     56 	VideoMpeg,
     57 	VideoQuicktime,
     58 	NMEDIATYPES
     59 };
     60 
     61 /* HTTP methods */
     62 enum
     63 {
     64 	HGet,
     65 	HPost
     66 };
     67 
     68 /* Charsets */
     69 enum
     70 {
     71 	UnknownCharset,
     72 	US_Ascii,
     73 	ISO_8859_1,
     74 	UTF_8,
     75 	Unicode,
     76 	NCHARSETS
     77 };
     78 
     79 /* Frame Target IDs */
     80 enum {
     81 	FTtop,
     82 	FTself,
     83 	FTparent,
     84 	FTblank
     85 };
     86 
     87 /* LEX */
     88 typedef struct Token Token;
     89 typedef struct Attr Attr;
     90 
     91 /* BUILD */
     92 
     93 typedef struct Item Item;
     94 typedef struct Itext Itext;
     95 typedef struct Irule Irule;
     96 typedef struct Iimage Iimage;
     97 typedef struct Iformfield Iformfield;
     98 typedef struct Itable Itable;
     99 typedef struct Ifloat Ifloat;
    100 typedef struct Ispacer Ispacer;
    101 typedef struct Genattr Genattr;
    102 typedef struct SEvent SEvent;
    103 typedef struct Formfield Formfield;
    104 typedef struct Option Option;
    105 typedef struct Form Form;
    106 typedef struct Table Table;
    107 typedef struct Tablecol Tablecol;
    108 typedef struct Tablerow Tablerow;
    109 typedef struct Tablecell Tablecell;
    110 typedef struct Align Align;
    111 typedef struct Dimen Dimen;
    112 typedef struct Anchor Anchor;
    113 typedef struct DestAnchor DestAnchor;
    114 typedef struct Map Map;
    115 typedef struct Area Area;
    116 typedef struct Background Background;
    117 typedef struct Kidinfo Kidinfo;
    118 typedef struct Docinfo Docinfo;
    119 typedef struct Stack Stack;
    120 typedef struct Pstate Pstate;
    121 typedef struct ItemSource ItemSource;
    122 typedef struct Lay Lay;	/* defined in Layout module */
    123 
    124 /* Alignment types */
    125 enum {
    126 	ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
    127 	ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
    128 };
    129 
    130 struct Align
    131 {
    132 	uchar	halign;	/* one of ALnone, ALleft, etc. */
    133 	uchar	valign;	/* one of ALnone, ALtop, etc. */
    134 };
    135 
    136 /* A Dimen holds a dimension specification, especially for those */
    137 /* cases when a number can be followed by a % or a * to indicate */
    138 /* percentage of total or relative weight. */
    139 /* Dnone means no dimension was specified */
    140 
    141 /* To fit in a word, use top bits to identify kind, rest for value */
    142 enum {
    143 	Dnone =		0,
    144 	Dpixels =		(1<<29),
    145 	Dpercent =	(2<<29),
    146 	Drelative =	(3<<29),
    147 	Dkindmask =	(3<<29),
    148 	Dspecmask =	(~Dkindmask)
    149 };
    150 
    151 struct Dimen
    152 {
    153 	int	kindspec;		/* kind | spec */
    154 };
    155 
    156 /* Background is either an image or a color. */
    157 /* If both are set, the image has precedence. */
    158 struct Background
    159 {
    160 	Rune*	image;	/* url */
    161 	int		color;
    162 };
    163 
    164 
    165 /* There are about a half dozen Item variants. */
    166 /* The all look like this at the start (using Plan 9 C's */
    167 /* anonymous structure member mechanism), */
    168 /* and then the tag field dictates what extra fields there are. */
    169 struct Item
    170 {
    171 	Item*	next;		/* successor in list of items */
    172 	int		width;	/* width in pixels (0 for floating items) */
    173 	int		height;	/* height in pixels */
    174 	Rectangle	r;
    175 	int		ascent;	/* ascent (from top to baseline) in pixels */
    176 	int		anchorid;	/* if nonzero, which anchor we're in */
    177 	int		state;	/* flags and values (see below) */
    178 	Genattr*	genattr;	/* generic attributes and events */
    179 	int		tag;		/* variant discriminator: Itexttag, etc. */
    180 };
    181 
    182 /* Item variant tags */
    183 enum {
    184 	Itexttag,
    185 	Iruletag,
    186 	Iimagetag,
    187 	Iformfieldtag,
    188 	Itabletag,
    189 	Ifloattag,
    190 	Ispacertag
    191 };
    192 
    193 struct Itext
    194 {
    195 	Item item;				/* (with tag ==Itexttag) */
    196 	Rune*	s;			/* the characters */
    197 	int		fnt;			/* style*NumSize+size (see font stuff, below) */
    198 	int		fg;			/* Pixel (color) for text */
    199 	uchar	voff;			/* Voffbias+vertical offset from baseline, in pixels (+ve == down) */
    200 	uchar	ul;			/* ULnone, ULunder, or ULmid */
    201 };
    202 
    203 struct Irule
    204 {
    205 	Item item;				/* (with tag ==Iruletag) */
    206 	uchar	align;		/* alignment spec */
    207 	uchar	noshade;		/* if true, don't shade */
    208 	int		size;			/* size attr (rule height) */
    209 	Dimen	wspec;		/* width spec */
    210 };
    211 
    212 
    213 struct Iimage
    214 {
    215 	Item item;				/* (with tag ==Iimagetag) */
    216 	Rune*	imsrc;		/* image src url */
    217 	int		imwidth;		/* spec width (actual, if no spec) */
    218 	int		imheight;		/* spec height (actual, if no spec) */
    219 	Rune*	altrep;		/* alternate representation, in absence of image */
    220 	Map*	map;			/* if non-nil, client side map */
    221 	int		ctlid;			/* if animated */
    222 	uchar	align;		/* vertical alignment */
    223 	uchar	hspace;		/* in pixels; buffer space on each side */
    224 	uchar	vspace;		/* in pixels; buffer space on top and bottom */
    225 	uchar	border;		/* in pixels: border width to draw around image */
    226 	Iimage*	nextimage;	/* next in list of document's images */
    227 	void *aux;
    228 };
    229 
    230 
    231 struct Iformfield
    232 {
    233 	Item item;				/* (with tag ==Iformfieldtag) */
    234 	Formfield*	formfield;
    235 	void *aux;
    236 };
    237 
    238 
    239 struct Itable
    240 {
    241 	Item item;				/* (with tag ==Itabletag) */
    242 	Table*	table;
    243 };
    244 
    245 
    246 struct Ifloat
    247 {
    248 	Item _item;				/* (with tag ==Ifloattag) */
    249 	Item*	item;			/* table or image item that floats */
    250 	int		x;			/* x coord of top (from right, if ALright) */
    251 	int		y;			/* y coord of top */
    252 	uchar	side;			/* margin it floats to: ALleft or ALright */
    253 	uchar	infloats;		/* true if this has been added to a lay.floats */
    254 	Ifloat*	nextfloat;		/* in list of floats */
    255 };
    256 
    257 
    258 struct Ispacer
    259 {
    260 	Item item;				/* (with tag ==Ispacertag) */
    261 	int		spkind;		/* ISPnull, etc. */
    262 };
    263 
    264 /* Item state flags and value fields */
    265 enum {
    266 /*	IFbrk =			0x80000000,	// forced break before this item */
    267 #define	IFbrk		0x80000000 /* too big for sun */
    268 	IFbrksp =			0x40000000,	/* add 1 line space to break (IFbrk set too) */
    269 	IFnobrk =			0x20000000,	/* break not allowed before this item */
    270 	IFcleft =			0x10000000,	/* clear left floats (IFbrk set too) */
    271 	IFcright =			0x08000000,	/* clear right floats (IFbrk set too) */
    272 	IFwrap =			0x04000000,	/* in a wrapping (non-pre) line */
    273 	IFhang =			0x02000000,	/* in a hanging (into left indent) item */
    274 	IFrjust =			0x01000000,	/* right justify current line */
    275 	IFcjust =			0x00800000,	/* center justify current line */
    276 	IFsmap =			0x00400000,	/* image is server-side map */
    277 	IFindentshift =		8,
    278 	IFindentmask =		(255<<IFindentshift),	/* current indent, in tab stops */
    279 	IFhangmask =		255			/* current hang into left indent, in 1/10th tabstops */
    280 };
    281 
    282 /* Bias added to Itext's voff field */
    283 enum { Voffbias = 128 };
    284 
    285 /* Spacer kinds */
    286 enum {
    287 	ISPnull,			/* 0 height and width */
    288 	ISPvline,			/* height and ascent of current font */
    289 	ISPhspace,		/* width of space in current font */
    290 	ISPgeneral		/* other purposes (e.g., between markers and list) */
    291 };
    292 
    293 /* Generic attributes and events (not many elements will have any of these set) */
    294 struct Genattr
    295 {
    296 	Rune*	id;
    297 	Rune*	class;
    298 	Rune*	style;
    299 	Rune*	title;
    300 	SEvent*	events;
    301 };
    302 
    303 struct SEvent
    304 {
    305 	SEvent*	next;		/* in list of events */
    306 	int		type;		/* SEonblur, etc. */
    307 	Rune*	script;
    308 };
    309 
    310 enum {
    311 	SEonblur, SEonchange, SEonclick, SEondblclick,
    312 	SEonfocus, SEonkeypress, SEonkeyup, SEonload,
    313 	SEonmousedown, SEonmousemove, SEonmouseout,
    314 	SEonmouseover, SEonmouseup, SEonreset, SEonselect,
    315 	SEonsubmit, SEonunload,
    316 	Numscriptev
    317 };
    318 
    319 /* Form field types */
    320 enum {
    321 	Ftext,
    322 	Fpassword,
    323 	Fcheckbox,
    324 	Fradio,
    325 	Fsubmit,
    326 	Fhidden,
    327 	Fimage,
    328 	Freset,
    329 	Ffile,
    330 	Fbutton,
    331 	Fselect,
    332 	Ftextarea
    333 };
    334 
    335 /* Information about a field in a form */
    336 struct Formfield
    337 {
    338 	Formfield*	next;		/* in list of fields for a form */
    339 	int			ftype;	/* Ftext, Fpassword, etc. */
    340 	int			fieldid;	/* serial no. of field within its form */
    341 	Form*		form;	/* containing form */
    342 	Rune*		name;	/* name attr */
    343 	Rune*		value;	/* value attr */
    344 	int			size;		/* size attr */
    345 	int			maxlength;	/* maxlength attr */
    346 	int			rows;	/* rows attr */
    347 	int			cols;		/* cols attr */
    348 	uchar		flags;	/* FFchecked, etc. */
    349 	Option*		options;	/* for Fselect fields */
    350 	Item*		image;	/* image item, for Fimage fields */
    351 	int			ctlid;		/* identifies control for this field in layout */
    352 	SEvent*		events;	/* same as genattr->events of containing item */
    353 };
    354 
    355 enum {
    356 	FFchecked =	(1<<7),
    357 	FFmultiple =	(1<<6)
    358 };
    359 
    360 /* Option holds info about an option in a "select" form field */
    361 struct Option
    362 {
    363 	Option*	next;			/* next in list of options for a field */
    364 	int		selected;		/* true if selected initially */
    365 	Rune*	value;		/* value attr */
    366 	Rune*	display;		/* display string */
    367 };
    368 
    369 /* Form holds info about a form */
    370 struct Form
    371 {
    372 	Form*		next;		/* in list of forms for document */
    373 	int			formid;	/* serial no. of form within its doc */
    374 	Rune*		name;	/* name or id attr (netscape uses name, HTML 4.0 uses id) */
    375 	Rune*		action;	/* action attr */
    376 	int			target;	/* target attr as targetid */
    377 	int			method;	/* HGet or HPost */
    378 	int			nfields;	/* number of fields */
    379 	Formfield*	fields;	/* field's forms, in input order */
    380 };
    381 
    382 /* Flags used in various table structures */
    383 enum {
    384 	TFparsing =	(1<<7),
    385 	TFnowrap =	(1<<6),
    386 	TFisth =		(1<<5)
    387 };
    388 
    389 
    390 /* Information about a table */
    391 struct Table
    392 {
    393 	Table*		next;			/* next in list of document's tables */
    394 	int			tableid;		/* serial no. of table within its doc */
    395 	Tablerow*	rows;		/* array of row specs (list during parsing) */
    396 	int			nrow;		/* total number of rows */
    397 	Tablecol*		cols;			/* array of column specs */
    398 	int			ncol;			/* total number of columns */
    399 	Tablecell*		cells;			/* list of unique cells */
    400 	int			ncell;		/* total number of cells */
    401 	Tablecell***	grid;			/* 2-D array of cells */
    402 	Align		align;		/* alignment spec for whole table */
    403 	Dimen		width;		/* width spec for whole table */
    404 	int			border;		/* border attr */
    405 	int			cellspacing;	/* cellspacing attr */
    406 	int			cellpadding;	/* cellpadding attr */
    407 	Background	background;	/* table background */
    408 	Item*		caption;		/* linked list of Items, giving caption */
    409 	uchar		caption_place;	/* ALtop or ALbottom */
    410 	Lay*			caption_lay;	/* layout of caption */
    411 	int			totw;			/* total width */
    412 	int			toth;			/* total height */
    413 	int			caph;		/* caption height */
    414 	int			availw;		/* used for previous 3 sizes */
    415 	Token*		tabletok;		/* token that started the table */
    416 	uchar		flags;		/* Lchanged, perhaps */
    417 };
    418 
    419 
    420 struct Tablecol
    421 {
    422 	int		width;
    423 	Align	align;
    424 	Point		pos;
    425 };
    426 
    427 
    428 struct Tablerow
    429 {
    430 	Tablerow*	next;			/* Next in list of rows, during parsing */
    431 	Tablecell*		cells;			/* Cells in row, linked through nextinrow */
    432 	int			height;
    433 	int			ascent;
    434 	Align		align;
    435 	Background	background;
    436 	Point			pos;
    437 	uchar		flags;		/* 0 or TFparsing */
    438 };
    439 
    440 
    441 /* A Tablecell is one cell of a table. */
    442 /* It may span multiple rows and multiple columns. */
    443 /* Cells are linked on two lists: the list for all the cells of */
    444 /* a document (the next pointers), and the list of all the */
    445 /* cells that start in a given row (the nextinrow pointers) */
    446 struct Tablecell
    447 {
    448 	Tablecell*		next;			/* next in list of table's cells */
    449 	Tablecell*		nextinrow;	/* next in list of row's cells */
    450 	int			cellid;		/* serial no. of cell within table */
    451 	Item*		content;		/* contents before layout */
    452 	Lay*			lay;			/* layout of cell */
    453 	int			rowspan;		/* number of rows spanned by this cell */
    454 	int			colspan;		/* number of cols spanned by this cell */
    455 	Align		align;		/* alignment spec */
    456 	uchar		flags;		/* TFparsing, TFnowrap, TFisth */
    457 	Dimen		wspec;		/* suggested width */
    458 	int			hspec;		/* suggested height */
    459 	Background	background;	/* cell background */
    460 	int			minw;		/* minimum possible width */
    461 	int			maxw;		/* maximum width */
    462 	int			ascent;		/* cell's ascent */
    463 	int			row;			/* row of upper left corner */
    464 	int			col;			/* col of upper left corner */
    465 	Point			pos;			/* nw corner of cell contents, in cell */
    466 };
    467 
    468 /* Anchor is for info about hyperlinks that go somewhere */
    469 struct Anchor
    470 {
    471 	Anchor*		next;		/* next in list of document's anchors */
    472 	int			index;	/* serial no. of anchor within its doc */
    473 	Rune*		name;	/* name attr */
    474 	Rune*		href;		/* href attr */
    475 	int			target;	/* target attr as targetid */
    476 };
    477 
    478 
    479 /* DestAnchor is for info about hyperlinks that are destinations */
    480 struct DestAnchor
    481 {
    482 	DestAnchor*	next;		/* next in list of document's destanchors */
    483 	int			index;	/* serial no. of anchor within its doc */
    484 	Rune*		name;	/* name attr */
    485 	Item*		item;		/* the destination */
    486 };
    487 
    488 
    489 /* Maps (client side) */
    490 struct Map
    491 {
    492 	Map*	next;			/* next in list of document's maps */
    493 	Rune*	name;		/* map name */
    494 	Area*	areas;		/* list of map areas */
    495 };
    496 
    497 
    498 struct Area
    499 {
    500 	Area*		next;		/* next in list of a map's areas */
    501 	int			shape;	/* SHrect, etc. */
    502 	Rune*		href;		/* associated hypertext link */
    503 	int			target;	/* associated target frame */
    504 	Dimen*		coords;	/* array of coords for shape */
    505 	int			ncoords;	/* size of coords array */
    506 };
    507 
    508 /* Area shapes */
    509 enum {
    510 	SHrect, SHcircle, SHpoly
    511 };
    512 
    513 /* Fonts are represented by integers: style*NumSize + size */
    514 
    515 /* Font styles */
    516 enum {
    517 	FntR,			/* roman */
    518 	FntI,			/* italic */
    519 	FntB,			/* bold */
    520 	FntT,			/* typewriter */
    521 	NumStyle
    522 };
    523 
    524 /* Font sizes */
    525 enum {
    526 	Tiny,
    527 	Small,
    528 	Normal,
    529 	Large,
    530 	Verylarge,
    531 	NumSize
    532 };
    533 
    534 enum {
    535 	NumFnt = (NumStyle*NumSize),
    536 	DefFnt = (FntR*NumSize+Normal)
    537 };
    538 
    539 /* Lines are needed through some text items, for underlining or strikethrough */
    540 enum {
    541 	ULnone, ULunder, ULmid
    542 };
    543 
    544 /* Kidinfo flags */
    545 enum {
    546 	FRnoresize =	(1<<0),
    547 	FRnoscroll =	(1<<1),
    548 	FRhscroll = 	(1<<2),
    549 	FRvscroll =	(1<<3),
    550 	FRhscrollauto = (1<<4),
    551 	FRvscrollauto =	(1<<5)
    552 };
    553 
    554 /* Information about child frame or frameset */
    555 struct Kidinfo
    556 {
    557 	Kidinfo*		next;		/* in list of kidinfos for a frameset */
    558 	int			isframeset;
    559 
    560 	/* fields for "frame" */
    561 	Rune*		src;		/* only nil if a "dummy" frame or this is frameset */
    562 	Rune*		name;	/* always non-empty if this isn't frameset */
    563 	int			marginw;
    564 	int			marginh;
    565 	int			framebd;
    566 	int			flags;
    567 
    568 	/* fields for "frameset" */
    569 	Dimen*		rows;	/* array of row dimensions */
    570 	int			nrows;	/* length of rows */
    571 	Dimen*		cols;		/* array of col dimensions */
    572 	int			ncols;	/* length of cols */
    573 	Kidinfo*		kidinfos;
    574 	Kidinfo*		nextframeset;	/* parsing stack */
    575 };
    576 
    577 
    578 /* Document info (global information about HTML page) */
    579 struct Docinfo
    580 {
    581 	/* stuff from HTTP headers, doc head, and body tag */
    582 	Rune*		src;				/* original source of doc */
    583 	Rune*		base;			/* base URL of doc */
    584 	Rune*		doctitle;			/* from <title> element */
    585 	Background	background;		/* background specification */
    586 	Iimage*		backgrounditem;	/* Image Item for doc background image, or nil */
    587 	int			text;				/* doc foreground (text) color */
    588 	int			link;				/* unvisited hyperlink color */
    589 	int			vlink;			/* visited hyperlink color */
    590 	int			alink;			/* highlighting hyperlink color */
    591 	int			target;			/* target frame default */
    592 	int			chset;			/* ISO_8859, etc. */
    593 	int			mediatype;		/* TextHtml, etc. */
    594 	int			scripttype;		/* TextJavascript, etc. */
    595 	int			hasscripts;		/* true if scripts used */
    596 	Rune*		refresh;			/* content of <http-equiv=Refresh ...> */
    597 	Kidinfo*		kidinfo;			/* if a frameset */
    598 	int			frameid;			/* id of document frame */
    599 
    600 	/* info needed to respond to user actions */
    601 	Anchor*		anchors;			/* list of href anchors */
    602 	DestAnchor*	dests;			/* list of destination anchors */
    603 	Form*		forms;			/* list of forms */
    604 	Table*		tables;			/* list of tables */
    605 	Map*		maps;			/* list of maps */
    606 	Iimage*		images;			/* list of image items (through nextimage links) */
    607 };
    608 
    609 extern int			dimenkind(Dimen d);
    610 extern int			dimenspec(Dimen d);
    611 extern void		freedocinfo(Docinfo* d);
    612 extern void		freeitems(Item* ithead);
    613 extern Item*		parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
    614 extern void		printitems(Item* items, char* msg);
    615 extern int			targetid(Rune* s);
    616 extern Rune*		targetname(int targid);
    617 extern int			validitems(Item* i);
    618 
    619 /* #pragma varargck	type "I"	Item* */
    620 
    621 /* Control print output */
    622 extern int			warn;
    623 extern int			dbglex;
    624 extern int			dbgbuild;
    625 
    626 /* To be provided by caller */
    627 /* emalloc and erealloc should not return if can't get memory. */
    628 /* emalloc should zero its memory. */
    629 extern void*	emalloc(ulong);
    630 extern void*	erealloc(void* p, ulong size);
    631 #ifdef __cpluspplus
    632 }
    633 #endif
    634 #endif