hierarchy (3049B)
1 #!/bin/sh 2 3 # input: 4 # key (tab) string (tab) page numbers 5 # command command 123 6 # command, data command, [data] 11 7 # command, display command, [display] 11, 54, 63, 75 8 # command, model command, [model] 11 9 # command, quit command, [quit] 5, 16 10 # output: 11 # key (tab) string (tab) page numbers 12 # key command 123 13 # key [data] 11 14 # key [display] ... 15 # key [model] ... 16 # key [quit] ... 17 18 awk ' 19 BEGIN { FS = OFS = "\t" } 20 21 { line[NR] = $0; x[NR] = $2 "\t" $3; y[NR] = $1 } 22 23 # find a sequence that have the same prefix 24 # dump prefix, then each instance with spaces instead of prefix 25 END { 26 for (i = 1; i <= NR; i = j+1) { 27 j = findrun(i) # returns last elem of run 28 if (j > i) 29 printrun(i, j) 30 else 31 print y[i], x[i] 32 } 33 } 34 35 function findrun(s, j, p, np) { # find y[s],y[s+1]... with same prefix 36 p = prefix(y[s]) 37 np = length(p) 38 for (j = s+1; j <= NR; j++) { 39 if (y[j] == p) # same, so include 40 continue 41 if (index(y[j], p) != 1) # no match 42 break 43 c = substr(y[j], np+1, 1) 44 if (c != " " && c != ",") # has to be whole word prefix 45 break 46 } 47 return j-1 48 } 49 50 function prefix(s, n) { # find 1st word of s: same sort key, minus , 51 gsub(/,/, "", s) 52 n = index(s, " ") 53 if (n > 0) 54 return substr(s, 1, n-1) 55 else 56 return s 57 } 58 59 function printrun(s, e, i) { # move [...] to end, "see" to front 60 s1 = 0; e1 = 0; p1 = 0; i1 = 0 61 for (i = s; i <= e; i++) { 62 if (x[i] ~ /{see/) { # see, see also 63 sx[s1] = x[i] 64 sy[s1] = y[i] 65 s1++ 66 } else if (x[i] ~ /^\[/) { # prefix word is [...] 67 px[p1] = x[i] 68 py[p1] = y[i] 69 p1++ 70 } else if (x[i] ~ /\[.*\]/) { # [...] somewhere else 71 ex[e1] = x[i] 72 ey[e1] = y[i] 73 e1++ 74 } else { # none of the above 75 ix[i1] = x[i] 76 iy[i1] = y[i] 77 i1++ 78 } 79 } 80 if (e-s+1 != s1 + p1 + i1 + e1) print "oh shit" >"/dev/stderr" 81 82 for (i = 0; i < s1; i++) # "see", one/line 83 print sy[i], sx[i] 84 if (i1 > 1) 85 printgroup(ix,iy,0,i1) # non [...] items 86 else if (i1 == 1) 87 print iy[0], ix[0] 88 if (e1 > 1) 89 printgroup(ex,ey,0,e1) # prefix [...] items 90 else if (e1 == 1) 91 print ey[0], ex[0] 92 # for (i = 0; i < p1; i++) # [prefix] ... items 93 # print py[i], px[i] 94 if (p1 > 1) 95 printgroup(px,py,0,p1) # [prefix] ... items 96 else if (p1 == 1) 97 print py[0], px[0] 98 } 99 100 function printgroup(x, y, s, e, i, j) { 101 split(x[s], f23) 102 if (split(f23[1], temp, " ") > 1) { 103 pfx = temp[1] " " temp[2] # 2-word prefix 104 for (i = s+1; i < e; i++) { 105 if (index(x[i], pfx) != 1) 106 break 107 c = substr(x[i], length(pfx)+1, 1) 108 if (c != " " && c != ",") # has to be whole word prefix 109 break 110 } 111 if (i == e) { 112 # print "got a run with", pfx 113 sub(/ /, "@", f23[1]) 114 for (i = s; i < e; i++) 115 sub(/ /, "@", x[i]) # take @ out later 116 } 117 } 118 n = sub(/,?[ ~]+.*/, "", f23[1]) # zap rest of line 119 120 sub(/,$/, "", f23[1]) 121 if (n > 0) { # some change, so not a single word 122 sub(/@/, " ", f23[1]) 123 print y[s], f23[1] # print main entry 124 } 125 for (j = s; j < e; j++) { 126 split(x[j], f23) 127 sub(/^[^, ]+[, ]+/, " ", f23[1]) 128 sub(/@/, " ", f23[1]) 129 print y[s], f23[1], f23[2] 130 } 131 } 132 133 ' $*