doclean (737B)
1 #!/bin/sh 2 3 awk ' # doclean 4 5 # Input: "number tab IX string 6 # 107 IX self-reference #1186 - 7 # 281 TL APPENDIX A AMPL Reference Manual #26 - 8 # Output: string (tab) number 9 # excess spaces are removed output string 10 # note reversal of order; rest of programs expect it 11 12 # This contains some special pleading for the AMPL book 13 14 BEGIN { FS = OFS = "\t" } 15 16 /\t(TL|H1|H2|H3|LASTPAGE)/ { next } # zap expected noise 17 18 $0 !~ /[0-9ixv]+\tIX / { 19 print "doclean: non index line: " $0 | "cat 1>&2"; next 20 } 21 22 { sub(/IX +/, "", $2) # zap "IX " 23 sub(/ +#[0-9]+ .*$/, "", $2) # zap trailing blanks, slug, file 24 gsub(/ +/, " ", $2) # compress internal blanks 25 print $2, $1 # item (tab) page number 26 } 27 ' $*