plan9port

fork of plan9port with libvec, libstr and libsdb
Log | Files | Refs | README | LICENSE

rune.3 (3083B)


      1 .TH RUNE 3
      2 .SH NAME
      3 runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
      4 .SH SYNOPSIS
      5 .ta \w'\fLchar*xx'u
      6 .B #include <u.h>
      7 .br
      8 .B #include <libc.h>
      9 .PP
     10 .B
     11 int	runetochar(char *s, Rune *r)
     12 .PP
     13 .B
     14 int	chartorune(Rune *r, char *s)
     15 .PP
     16 .B
     17 int	runelen(long r)
     18 .PP
     19 .B
     20 int	runenlen(Rune *r, int n)
     21 .PP
     22 .B
     23 int	fullrune(char *s, int n)
     24 .PP
     25 .B
     26 char*	utfecpy(char *s1, char *es1, char *s2)
     27 .PP
     28 .B
     29 int	utflen(char *s)
     30 .PP
     31 .B
     32 int	utfnlen(char *s, long n)
     33 .PP
     34 .B
     35 char*	utfrune(char *s, long c)
     36 .PP
     37 .B
     38 char*	utfrrune(char *s, long c)
     39 .PP
     40 .B
     41 char*	utfutf(char *s1, char *s2)
     42 .SH DESCRIPTION
     43 These routines convert to and from a
     44 .SM UTF
     45 byte stream and runes.
     46 .PP
     47 .I Runetochar
     48 copies one rune at
     49 .I r
     50 to at most
     51 .B UTFmax
     52 bytes starting at
     53 .I s
     54 and returns the number of bytes copied.
     55 .BR UTFmax ,
     56 defined as
     57 .B 3
     58 in
     59 .BR <libc.h> ,
     60 is the maximum number of bytes required to represent a rune.
     61 .PP
     62 .I Chartorune
     63 copies at most
     64 .B UTFmax
     65 bytes starting at
     66 .I s
     67 to one rune at
     68 .I r
     69 and returns the number of bytes copied.
     70 If the input is not exactly in
     71 .SM UTF
     72 format,
     73 .I chartorune
     74 will convert to
     75 .B Runeerror
     76 (0xFFFD)
     77 and return 1.
     78 .PP
     79 .I Runelen
     80 returns the number of bytes
     81 required to convert
     82 .I r
     83 into
     84 .SM UTF.
     85 .PP
     86 .I Runenlen
     87 returns the number of bytes
     88 required to convert the
     89 .I n
     90 runes pointed to by
     91 .I r
     92 into
     93 .SM UTF.
     94 .PP
     95 .I Fullrune
     96 returns 1 if the string
     97 .I s
     98 of length
     99 .I n
    100 is long enough to be decoded by
    101 .I chartorune
    102 and 0 otherwise.
    103 This does not guarantee that the string
    104 contains a legal
    105 .SM UTF
    106 encoding.
    107 This routine is used by programs that
    108 obtain input a byte at
    109 a time and need to know when a full rune
    110 has arrived.
    111 .PP
    112 The following routines are analogous to the
    113 corresponding string routines with
    114 .B utf
    115 substituted for
    116 .B str
    117 and
    118 .B rune
    119 substituted for
    120 .BR chr .
    121 .PP
    122 .I Utfecpy
    123 copies UTF sequences until a null sequence has been copied, but writes no 
    124 sequences beyond
    125 .IR es1 .
    126 If any sequences are copied,
    127 .I s1
    128 is terminated by a null sequence, and a pointer to that sequence is returned.
    129 Otherwise, the original
    130 .I s1
    131 is returned.
    132 .PP
    133 .I Utflen
    134 returns the number of runes that
    135 are represented by the
    136 .SM UTF
    137 string
    138 .IR s .
    139 .PP
    140 .I Utfnlen
    141 returns the number of complete runes that
    142 are represented by the first
    143 .I n
    144 bytes of
    145 .SM UTF
    146 string
    147 .IR s .
    148 If the last few bytes of the string contain an incompletely coded rune,
    149 .I utfnlen
    150 will not count them; in this way, it differs from
    151 .IR utflen ,
    152 which includes every byte of the string.
    153 .PP
    154 .I Utfrune
    155 .RI ( utfrrune )
    156 returns a pointer to the first (last)
    157 occurrence of rune
    158 .I c
    159 in the
    160 .SM UTF
    161 string
    162 .IR s ,
    163 or 0 if
    164 .I c
    165 does not occur in the string.
    166 The NUL byte terminating a string is considered to
    167 be part of the string
    168 .IR s .
    169 .PP
    170 .I Utfutf
    171 returns a pointer to the first occurrence of
    172 the
    173 .SM UTF
    174 string
    175 .I s2
    176 as a
    177 .SM UTF
    178 substring of
    179 .IR s1 ,
    180 or 0 if there is none.
    181 If
    182 .I s2
    183 is the null string,
    184 .I utfutf
    185 returns
    186 .IR s1 .
    187 .SH SOURCE
    188 .B \*9/src/lib9/utf/rune.c
    189 .br
    190 .B \*9/src/lib9/utf/utfrune.c
    191 .SH SEE ALSO
    192 .MR utf (7) ,
    193 .MR tcs (1)