summaryrefslogtreecommitdiff
path: root/webserver/wwwcvs-orig-files/modules/wwwgnu-nongnu/files/update-gnu-rewritemaps
blob: 96b78ef369e18c806fce2b4c29127b3c2d9cb6a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#! /bin/bash
# Create plain text maps for mod_rewrite from .symlinks files.
# Main control is at the end.
# 
# Originally written by Ineiev <ineiev@gmail.com>, feb 2011.
# Ineiev <ineiev@gmail.com>, Mar 2012: fix links to absolute paths.

web_root=${1-/srv/data/www-mirror}
www_maps_dir=${2-~}
verbose=${3-no}

# The fancy /%\// here removes any trailing / that might have been
# specified in the above variables, since we re-insert the / below.
# Maybe not 100% necessary, but cleaner.
web_root=${web_root/%\//}
www_maps_dir=${www_maps_dir/%\//}

max_link_depth=17

# These file names are used in the vhost configuration.
html_map_name=html-map.txt
nonhtml_map_name=nonhtml-map.txt
dir_map_name=dir-map.txt

SED=sed
FIND=find

# function to filter out from file "$1" any
# empty lines, comments, trailing spaces, invalid lines;
# and replace multiple spaces with single one;
# and prefix the line with its number if "$2" is "-n".
cleanup-symlinks () {
  ${SED} "s/#.*//;s/[[:blank:]]\+/ /g;\
	  s/[[:blank:]]$//;s/^[[:blank:]]//" "$1" \
  | grep $2 ".[[:blank:]]." | grep -v "[[:blank:]].*[[:blank:]]"
}

# check if the file or directory exists,
# assign the "type" variable accordignly
get-file-type () {
  if test -f "$1"; then
    type=file
    if test ${verbose} = yes; then
      echo found $1 file
    fi
    return 0;
  fi
  if test -d "$1"; then
    type=directory
    if test ${verbose} = yes; then
      echo found $1 directory
    fi
    return 0;
  fi
  type=
  return 1;
}

# sed programs to normalize paths
simplify_path=":cycle;s%/[^/]*/\.\./%/%;t cycle"
normalize_link="${simplify_path};"
# should we add something like
#  "s%\(^\| \)/savannah-checkouts/gnu%\1/software%g"
#  to normalize_link?

# function used to subsitute software <-> savannah-checkouts/gnu in paths
substitute-path () {
  aliased_path=$(echo $1 | ${SED} "s%^/$2%/$3%;${simplify_path}")
  if test ${verbose} = yes; then
    echo "$1 is absent; I'll look in /$3"
  fi
}

substitute-software () {
  substitute-path "$1" software savannah-checkouts/gnu
}

substitute-checkouts () {
  substitute-path "$1" savannah-checkouts/gnu software
}

# output the common part for diagnostic messages
show-current-line () {
  printf "%s" "${web_root}${symlinks_directory}.symlinks:$lineno: "
  echo ${current_line} | ${SED} "s/[[:blank:]]\+/ <- /"
}

# function to process a single .symlinks file.
append-symlinks () {

  if test ${verbose} = yes; then
    echo processing file "$1":
  fi
  symlinks_directory=$(echo $1 \
		       | ${SED} "s ^${web_root}/\(.*\)\.symlinks$ /\1 ")
  cleanup-symlinks "$1" -n | while read current_line; do
    lineno=$(echo ${current_line} | ${SED} "s/:.*//")
    current_line=$(echo ${current_line} | ${SED} "s/^[^:]*://")

    link=$(echo ${current_line} | ${SED} "s/^.*[[:blank:]]//")
    target=$(echo ${current_line} | ${SED} "s/[[:blank:]].*$//")
    target_directory=${symlinks_directory}

    if test ${verbose} = yes; then
      echo line ${lineno}: ${current_line}
    fi

    link_depth=0
    type=none
    # follow subsequent links until we get the real thing or find an error
    while true; do
      case "x${target}" in
	# external link
	x*://*)
	  full_target="${target}"
	  type=external
	  break
	  ;;
	# absolute link
	x/*)
	  target_directory=
	  ;;
      esac
      # check if the target exists
      if get-file-type "${web_root}${target_directory}${target}"; then 
	full_target="${target_directory}${target}"
	break
      fi
      case "x${target_directory}${target}" in
	# look for /software files in /savannah-checkouts/gnu
	x/software/*)
	  substitute-software "${target_directory}${target}"
	  if get-file-type "${web_root}${aliased_path}"; then
	    full_target="${aliased_path}"
	    break
	  fi
	  ;;
	# look for /savannah-checkouts/gnu files in /software
	x/savannah-checkouts/gnu*)
	  substitute-checkouts "${target_directory}${target}"
	  if get-file-type "${web_root}${aliased_path}"; then
	    full_target="${aliased_path}"
	    break
	  fi
	  ;;
      esac
      target_directory=$(echo ${target_directory}${target} \
			 | ${SED} "s%[^/]*$%%")
      target_symlinks=${target_directory}.symlinks
      if test ! -f "${web_root}${target_symlinks}"; then
	# no .symlinks file found: try alternative places
	# (/software for /savannah-checkouts/gnu and vice versa
        case "x${target_symlinks}" in
	  x/software/*)
	    substitute-software "${target_symlinks}"
	    target_symlinks="$aliased_path"
	    if test ! -f "${web_root}${target_symlinks}"; then
	      type=no-symlinks
	      break
	    fi
	    target_directory=${target_symlinks/%.symlinks/}
	    ;;
	  x/savannah-checkouts/gnu*)
	    substitute-checkouts "${target_symlinks}"
	    target_symlinks="$aliased_path"
	    if test ! -f "${web_root}${target_symlinks}"; then
	      type=no-symlinks
	      break
	    fi
	    target_directory=${target_symlinks/%.symlinks/}
	    ;;
	  *)
	    type=no-symlinks
	    break
	    ;;
	esac
      fi

      escaped_target=$(echo ${target} | ${SED} "s%.*/%%;s/\./\\\\./g")
      if ! cleanup-symlinks "${web_root}${target_symlinks}" \
	   | grep -q "[[:blank:]]${escaped_target}$"; then
	type=no-subsequent-link
	break
      fi
      target=$(cleanup-symlinks "${web_root}${target_symlinks}" \
	       | ${SED} -n "s/[[:blank:]]${escaped_target}$//p")
      link_depth=$(expr ${link_depth} + 1)
      if test ${link_depth} -gt ${max_link_depth}; then
	type=deep-link
	break
      fi
      if test ${verbose} = yes; then
	echo "go to the next target (depth ${link_depth}): ${target}"
      fi
    done # while true

    # output the results of our link analysis
    case ${type} in
      file)
	if (echo ${target} | grep -q "\.html$") \
	    && (echo ${link} | grep -q "\.html$"); then
	  echo ${symlinks_directory}${link} ${full_target} \
	  | ${SED} "s/\.html[[:blank:]]*/ /;s/\.html$//;\
		    ${normalize_link}"  >> "${html_map_name}"
	  if test ${verbose} = yes; then
	    echo link to a HTML file found
	  fi
	else
	  echo ${symlinks_directory}${link} ${full_target} \
	  | ${SED} "${normalize_link}" >> "${nonhtml_map_name}"
	  if test ${verbose} = yes; then
	    echo link to a non-HTML file found
	  fi
	fi
	;;
      directory | external)
	echo ${symlinks_directory}${link} ${full_target} \
	| ${SED} "${normalize_link}" >> "${dir_map_name}"
	if test ${verbose} = yes; then
	  echo ${type} link found
	fi
	;;
      no-symlinks)
	show-current-line
	echo "  ${target_symlinks} list"
	echo "  and ${target} file/directory are absent."
	;;
      no-subsequent-link)
	show-current-line
	echo "  ${target} file/directory is absent,"
	echo "  and no subsequent link in ${target_directory}.symlinks found."
	;;
      deep-link)
	show-current-line
	echo "  link depth limit ${max_link_depth} has been reached."
	;;
      *)
	show-current-line
	echo "  an unclassified error occurred."
	exit 1
	;;
    esac # case ${type} in
  done # cleanup-symlinks "$1" -n | while read current_line; do
}

# Main program.

if test "x${verbose}" != xyes; then
  verbose=no
fi

html_map_name="${www_maps_dir}/${html_map_name}"
nonhtml_map_name="${www_maps_dir}/${nonhtml_map_name}"
dir_map_name="${www_maps_dir}/${dir_map_name}"

# Initialize the output files.
WARNING="# DO NOT EDIT, GENERATED by $0 `date`"
echo ${WARNING} > "${html_map_name}"
echo "# links to HTML files" >> "${html_map_name}"
echo ${WARNING} > "${nonhtml_map_name}"
echo "# links to non-HTML files" >> "${nonhtml_map_name}"
echo ${WARNING} > "${dir_map_name}"
echo "# links to directories and external links" >> "${dir_map_name}"

# Scan every .symlinks file except
# in non-gnu projects (they are managed with another script).
${FIND} "${web_root}" -type f -name '.symlinks' \
        ! -path "${web_root}/savannah-checkouts/non-gnu/*" \
        -print \
| while read next_file; do
    append-symlinks ${next_file}
  done

# transform directory and external links into rewriterules
${SED} -i '/^[^#]/ { h; s/ .*//;s/\./\\./g;
		     x; s/.* //; H; x; s/\n/ /;
		     s/ /((\/.*)?)$ /;
		     s/^/RewriteRule ^/;
		     s/ *$/$1 [R=302,L]/
		   }' "${dir_map_name}"