gen.key (1556B)
1 awk ' # gen.key 2 # Input: Each input line has one of the following two forms: 3 # string (tab) numlist 4 # string " %key " sort.key (tab) numlist 5 # Output: Each output line has the form: 6 # sort.key (tab) string (tab) numlist 7 8 BEGIN { FS = OFS = "\t" } 9 10 / %key / { # use sort.key if it is provided 11 i = index($1, " %key ") 12 print substr($1, i+6), substr($1, 1, i-1), $2 13 next 14 } 15 16 { # generate sort.key (in $2, by modifying string) if it is not provided 17 $3 = $2 18 $2 = $1 19 20 #Modify sort.key 21 # Remove some troff commands 22 gsub(/\\f\(..|\\f.|\\s[+-][0-9]|\\s[0-9][0-9]?/, "", $2) 23 24 # underscore -> 0, so "foo_gorp" sorts before "food" 25 gsub(/_/, "0", $2) 26 27 # quote character is %, space character is ~ 28 quoted = 0 29 if ($2 ~ /%/) { # hide quoted literals in Q 30 quoted = 1 31 gsub(/%%/, "QQ0QQ", $2) 32 gsub(/%\[/, "QQ1QQ", $2) 33 gsub(/%\]/, "QQ2QQ", $2) 34 gsub(/%\{/, "QQ3QQ", $2) 35 gsub(/%\}/, "QQ4QQ", $2) 36 gsub(/%~/, "QQ5QQ", $2) 37 } 38 gsub(/%e/, "\\", $2) # implement troff escape 39 gsub(/~/, " ", $2) # remove tildes 40 gsub(/[%\[\]\{\}]/, "", $2) # remove % and font-changing []{} 41 if (quoted) { # restore literals but without escape charcter 42 gsub(/QQ0QQ/, "%", $2) 43 gsub(/QQ1QQ/, "[", $2) 44 gsub(/QQ2QQ/, "]", $2) 45 gsub(/QQ3QQ/, "{", $2) 46 gsub(/QQ4QQ/, "}", $2) 47 gsub(/QQ5QQ/, "~", $2) 48 } 49 if ($2 ~ /^[^a-zA-Z]+$/) # purely nonalphabetic lines go first 50 $2 = " " $2 51 else if ($2 ~ /^[0-9]/) # lines with eading digits come next 52 $2 = " " $2 53 # otherwise whatever final.sort does 54 } 55 56 { print $2, $1, $3 } 57 ' $*