plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

UTF.enc (4320B)


      1 %
      2 % Encoding vector, operator and procedure redefinitions for Plan 9 UTF
      3 % encoding. Prologues are expected to take steps to ensure operator
      4 % redefinitions given here are actually used. Current implementation
      5 % assumes UTF byte streams that represent ASCII or Latin1 text.
      6 %
      7 
      8 /UTFLatin1Encoding [
      9 	/.notdef
     10 	/.notdef
     11 	/.notdef
     12 	/.notdef
     13 	/.notdef
     14 	/.notdef
     15 	/.notdef
     16 	/.notdef
     17 	/.notdef
     18 	/.notdef
     19 	/.notdef
     20 	/.notdef
     21 	/.notdef
     22 	/.notdef
     23 	/.notdef
     24 	/.notdef
     25 	/.notdef
     26 	/.notdef
     27 	/.notdef
     28 	/.notdef
     29 	/.notdef
     30 	/.notdef
     31 	/.notdef
     32 	/.notdef
     33 	/.notdef
     34 	/.notdef
     35 	/.notdef
     36 	/.notdef
     37 	/.notdef
     38 	/.notdef
     39 	/.notdef
     40 	/.notdef
     41 	/space
     42 	/exclam
     43 	/quotedbl
     44 	/numbersign
     45 	/dollar
     46 	/percent
     47 	/ampersand
     48 	/quoteright
     49 	/parenleft
     50 	/parenright
     51 	/asterisk
     52 	/plus
     53 	/comma
     54 	/minus
     55 	/period
     56 	/slash
     57 	/zero
     58 	/one
     59 	/two
     60 	/three
     61 	/four
     62 	/five
     63 	/six
     64 	/seven
     65 	/eight
     66 	/nine
     67 	/colon
     68 	/semicolon
     69 	/less
     70 	/equal
     71 	/greater
     72 	/question
     73 	/at
     74 	/A
     75 	/B
     76 	/C
     77 	/D
     78 	/E
     79 	/F
     80 	/G
     81 	/H
     82 	/I
     83 	/J
     84 	/K
     85 	/L
     86 	/M
     87 	/N
     88 	/O
     89 	/P
     90 	/Q
     91 	/R
     92 	/S
     93 	/T
     94 	/U
     95 	/V
     96 	/W
     97 	/X
     98 	/Y
     99 	/Z
    100 	/bracketleft
    101 	/backslash
    102 	/bracketright
    103 	/asciicircum
    104 	/underscore
    105 	/quoteleft
    106 	/a
    107 	/b
    108 	/c
    109 	/d
    110 	/e
    111 	/f
    112 	/g
    113 	/h
    114 	/i
    115 	/j
    116 	/k
    117 	/l
    118 	/m
    119 	/n
    120 	/o
    121 	/p
    122 	/q
    123 	/r
    124 	/s
    125 	/t
    126 	/u
    127 	/v
    128 	/w
    129 	/x
    130 	/y
    131 	/z
    132 	/braceleft
    133 	/bar
    134 	/braceright
    135 	/asciitilde
    136 	/.notdef
    137 	/.notdef
    138 	/.notdef
    139 	/.notdef
    140 	/.notdef
    141 	/.notdef
    142 	/.notdef
    143 	/.notdef
    144 	/.notdef
    145 	/.notdef
    146 	/.notdef
    147 	/.notdef
    148 	/.notdef
    149 	/.notdef
    150 	/.notdef
    151 	/.notdef
    152 	/.notdef
    153 	/dotlessi
    154 	/grave
    155 	/acute
    156 	/circumflex
    157 	/tilde
    158 	/macron
    159 	/breve
    160 	/dotaccent
    161 	/dieresis
    162 	/.notdef
    163 	/ring
    164 	/cedilla
    165 	/.notdef
    166 	/hungarumlaut
    167 	/ogonek
    168 	/caron
    169 	/.notdef		% was space
    170 	/exclamdown
    171 	/cent
    172 	/sterling
    173 	/currency
    174 	/yen
    175 	/brokenbar
    176 	/section
    177 	/dieresis
    178 	/copyright
    179 	/ordfeminine
    180 	/guillemotleft
    181 	/logicalnot
    182 	/hyphen
    183 	/registered
    184 	/macron
    185 	/degree
    186 	/plusminus
    187 	/twosuperior
    188 	/threesuperior
    189 	/acute
    190 	/mu
    191 	/paragraph
    192 	/periodcentered
    193 	/cedilla
    194 	/onesuperior
    195 	/ordmasculine
    196 	/guillemotright
    197 	/onequarter
    198 	/onehalf
    199 	/threequarters
    200 	/questiondown
    201 	/Agrave
    202 	/Aacute
    203 	/Acircumflex
    204 	/Atilde
    205 	/Adieresis
    206 	/Aring
    207 	/AE
    208 	/Ccedilla
    209 	/Egrave
    210 	/Eacute
    211 	/Ecircumflex
    212 	/Edieresis
    213 	/Igrave
    214 	/Iacute
    215 	/Icircumflex
    216 	/Idieresis
    217 	/Eth
    218 	/Ntilde
    219 	/Ograve
    220 	/Oacute
    221 	/Ocircumflex
    222 	/Otilde
    223 	/Odieresis
    224 	/multiply
    225 	/Oslash
    226 	/Ugrave
    227 	/Uacute
    228 	/Ucircumflex
    229 	/Udieresis
    230 	/Yacute
    231 	/Thorn
    232 	/germandbls
    233 	/agrave
    234 	/aacute
    235 	/acircumflex
    236 	/atilde
    237 	/adieresis
    238 	/aring
    239 	/ae
    240 	/ccedilla
    241 	/egrave
    242 	/eacute
    243 	/ecircumflex
    244 	/edieresis
    245 	/igrave
    246 	/iacute
    247 	/icircumflex
    248 	/idieresis
    249 	/eth
    250 	/ntilde
    251 	/ograve
    252 	/oacute
    253 	/ocircumflex
    254 	/otilde
    255 	/odieresis
    256 	/divide
    257 	/oslash
    258 	/ugrave
    259 	/uacute
    260 	/ucircumflex
    261 	/udieresis
    262 	/yacute
    263 	/thorn
    264 	/ydieresis
    265 ] def
    266 
    267 /NewFontDirectory FontDirectory maxlength dict def
    268 
    269 %
    270 % Apparently no guarantee findfont is defined in systemdict so the obvious
    271 %
    272 %	systemdict /findfont get exec
    273 %
    274 % can generate an error. So far the only exception is a VT600 (version 48.0).
    275 %
    276 
    277 userdict /@RealFindfont known not {
    278 	userdict begin
    279 		/@RealFindfont systemdict begin /findfont load end def
    280 	end
    281 } if
    282 
    283 /findfont {
    284 	dup NewFontDirectory exch known not {
    285 		dup
    286 		%dup systemdict /findfont get exec	% not always in systemdict
    287 		dup userdict /@RealFindfont get exec
    288 		dup /Encoding get StandardEncoding eq {
    289 			dup length 1 add dict begin
    290 				{1 index /FID ne {def}{pop pop} ifelse} forall
    291 				/Encoding UTFLatin1Encoding def
    292 				/Metrics 1 dict def
    293 				Metrics /.notdef 0 put
    294 				currentdict
    295 			end
    296 			/DummyFontName exch definefont
    297 		} if
    298 		NewFontDirectory 3 1 roll put
    299 	} if
    300 	NewFontDirectory exch get
    301 } bind def
    302 
    303 %
    304 % Assume A0, except for A0A0 which is replaced by 20A0, can be ignored.
    305 % Works with ASCII or Latin1 because A0 has been re-encoded as a zero
    306 % width non-printing character. 
    307 %
    308 
    309 /UTFstring {
    310 	dup {
    311 		(\240\240) search {
    312 			pop
    313 			0 16#20 put
    314 		}{pop exit} ifelse
    315 	} loop
    316 } bind def
    317 
    318 /ashow {mark 4 1 roll UTFstring //ashow cvx exec cleartomark} bind def
    319 /awidthshow {mark 7 1 roll UTFstring //awidthshow cvx exec cleartomark} bind def
    320 /show {mark exch UTFstring //show cvx exec cleartomark} bind def
    321 /stringwidth {UTFstring //stringwidth cvx exec} bind def
    322 /widthshow {mark 5 1 roll UTFstring //widthshow cvx exec cleartomark} bind def
    323 
    324 %
    325 % kshow is harder - stack can't change because of the procedure.
    326 %
    327 
    328 /kshow dup load type /operatortype eq
    329 	{{UTFstring kshow} bind}
    330 	{{UTFstring //kshow cvx exec} bind}
    331 ifelse def
    332