canonind.awk (665B)
1 # turn output of mkindex into form needed by dict 2 BEGIN { 3 if(ARGC != 2) { 4 print "Usage: awk -F' ' -f canonind.awk rawindex > index" 5 exit 1 6 } 7 file = ARGV[1] 8 ARGV[1] = "" 9 while ((getline < file) > 0) { 10 for(i = 2; i <= NF; i++) { 11 w = $i 12 if(length(w) == 0) 13 continue 14 b = index(w, "(") 15 e = index(w, ")") 16 if(b && e && b < e) { 17 w1 = substr(w, 1, b-1) 18 w2 = substr(w, b+1, e-b-1) 19 w3 = substr(w, e+1) 20 printf "%s%s\t%d\n", w1, w3, $1 > "junk" 21 printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk" 22 } else 23 printf "%s\t%d\n", w, $1 > "junk" 24 } 25 } 26 system("sort -u -t' ' +0f -1 +0 -1 +1n -2 < junk") 27 system("rm junk") 28 exit 0 29 }