summaryrefslogtreecommitdiff
path: root/gsv-eval-remote.sh
blob: 531c1a0eb5c2de772800fb8f428057153dd6b811 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#!/bin/sh

# Copyright (C) 2014 Assaf Gordon (assafgordon@gmail.com)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

##
## Given a remote source URL (e.g. a tarball),
## downloads the file, runs the gnu-savannah-eval perl script
## on a given directory, then generates an HTML report for it.
##

# Size limit of downloadable tarballs (in bytes)
TARBALL_SIZE_LIMIT=10000000

# Ugly hack:
# When given a tarball to download, limit the accepted URLs to this
# (very partial) character set.
# Since this script will be used from a website, and users can post
# which even URLs they want, this regex will hopefully avoid some potential
# problems (such as URLs doing 'GET' requests with CGI parameters).
# The downside is that some legitimate URLs will not work (e.g.
# some SourceForge URLs with extra CGI parameters).
TARBALL_REGEX='^(https?|ftp)://[A-Za-z0-9\_\.\/-]*\.tar\.(gz|bz2|xz)$'

OUTPUT_FILE=

die()
{
    BASE=$(basename "$0")
    echo "$BASE: error: $@" >&2
    test -n "$OUTPUT_FILE" && echo "error: $@" >"$OUTPUT_FILE"
    exit 1
}

usage()
{
    BASE=$(basename "$0")
    echo "GNU-Savannah Evaluation - helper script
Copyright (C) 2014 A. Gordon (assafgordon@gmail.com)
License: GPLv3-or-later

Usage: $BASE [OPTIONS]   OUTPUT-HTML  PROJECT-NAME   SOURCE-URL

Will download SOURCE-URL, run the gnu-savannal evaluation perl script
on the download files, and produce an HTML file named OUTPUT-HTML.

SOURCE-URL can be:
   http://
   https://
   ftp://
   git://
   tar.gz
   tar.bz2
   tar.xz

Options:
 -h              = show this help screen.

Examples:

Download 'hello-2.8.tar.gz', run the GNU-Savannah evaluation on it,
and generate '/tmp/out.html' report:

   $BASE /tmp/out.html 'GNU Hello' http://ftp.gnu.org/gnu/hello/hello-2.8.tar.gz


Download git-clone GNU Coreutils, run the GNU-Savannah evaluation on it,
and generate '/tmp/out.html' report:

   $BASE /tmp/out2.html 'GNU Coreutils' git://git.sv.gnu.org/coreutils.gut


"
    exit 0
}

test "x$1" = "x-h" && usage

OUTPUT_HTML=$1
PROJECT_NAME=$2
SOURCE=$3

test -z "$OUTPUT_HTML" \
    && die "missing OUTPUT-HTML parameter. See -h for help."
test -z "$PROJECT_NAME" \
    && die "missing PROJECT-NAME parameter. See -h for help."
test -z "$SOURCE" \
    && die "missing SOURCE-URL parameter. See -h for help."
touch "$OUTPUT_HTML" \
    || die "failed to create output file '$OUTPUT_HTML'"

## From here on, we can at least log the errors into the output HTML file
OUTPUT_FILE="$OUTPUT_HTML"

# Sanitize project name
PROJECT_NAME=$(echo "$PROJECT_NAME" | tr -dc 'A-Za-z0-9-_. ')

# Verify required files
SCRIPTPATH=$(dirname $(readlink -f "$0")) || die "failed to get script's directory"
EVAL_SCRIPT="$SCRIPTPATH/gnu_savannah_eval.pl"
test -x "$EVAL_SCRIPT" \
    || die "Perl script ($EVAL_SCRIPT) not found/not executable"
CSS_FILE="$SCRIPTPATH/gnu_savannah_eval.css"
test -e "$CSS_FILE" \
    || die "CSS file ($CSS_FILE) not found"

# Ugly Hack:
# If given a URL, but one that doesn't match the stricter REGEX, exit
# with a detailed explanation
if echo "$SOURCE" | grep -E -q '^(https?|ftp)://' ; then
    if ! echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ; then
        die "the given URL ($SOURCE) does not match the stricter URL " \
            " limitations of this script (which are '$TARBALL_REGEX'). " \
            "Consider running this script locally."
    fi
fi

##
## Create temporary directroy to process the file
##
DIRECTORY=$(mktemp -d /tmp/gnu_eval.XXXXXX) \
    || die "failed to create temporary directory"
trap "cd /tmp ; rm -rf $DIRECTORY" INT TERM EXIT

cd "$DIRECTORY" || die "failed to CD to '$DIRECTORY'"

##
## Fetch the remote source
##
if echo "$SOURCE" | grep -E -q '^git://|\.git$' ; then
    ##
    ## a Git repository source
    ##
    git ls-remote "$SOURCE" >/dev/null \
        || die "source ($SOURCE) is not a valid remote git repository"
    git clone --depth 1 "$SOURCE" \
        || die "git clone $SOURCE - failed"
    SOURCEDIR=$(basename "$SOURCE" .git)
    cd "$SOURCEDIR" \
        || die "failed to CD into source directory '$SOURCEDIR' " \
               "(based on 'git clone $SOURCE')"

elif echo "$SOURCE" | grep -E -q "$TARBALL_REGEX" ;
    then
    ##
    ## a Tarball source
    ##

    ## Find size before download
    TARBALL_HEAD=$(curl -f --silent -L --insecure --head "$SOURCE") \
        || die "Failed to get size of '$SOURCE' (using HTTP HEAD)"
    TARBALL_SIZE=$(echo "$TARBALL_HEAD" |
                    tr -d '\r' |
                    grep Content-Length |
                    tail -n 1 |
                    awk '{print $2}' ) \
        || die "failed to get size (content-length) of '$SOURCE'"
    test -z "$TARBALL_SIZE" \
        && die "failed to get size (content-length) of '$SOURCE'"
    test "$TARBALL_SIZE" -le "$TARBALL_SIZE_LIMIT" \
        || die "tarball '$SOURCE' size too big ($TARBALL_SIZE)," \
               "current limit is $TARBALL_SIZE_LIMIT bytes."

    ## a remote tarball source
    TMP1=$(basename "$SOURCE") \
        || die "failed to get basename of '$SOURCE'"
    wget -q --no-check-certificate -O "$TMP1" "$SOURCE" \
        || die "failed to download '$SOURCE'"

    ## GNU Tar should automatically detect and uncompress the tarball.
    tar -xf "$TMP1" \
        || die "failed to extract files from '$TMP1' (from '$SOURCE')"

    ##
    ## Some tarballs contain directories that are named differently than
    ## the tarball. Annoying, but common enough.
    ## So search for one sub-directory.
    ##
    COUNT=$(find . -maxdepth 1 -type d | sed 1d | wc -l)
    test "$COUNT" -eq 1 \
        || die "tarball '$SOURCE' contains more than one sub-directory."

    SOURCEDIR=$(find . -maxdepth 1 -type d | sed 1d)
    cd "$SOURCEDIR" \
        || die "failed to CD into '$SOURCEDIR' (extracted from '$SOURCE')"
else
    die "Unknown source type (SOURCE) - expecting GIT or TARBALL on HTTP/FTP"
fi

##
## Analize the project
##
"$EVAL_SCRIPT" --project "$PROJECT_NAME" \
    "$DIRECTORY/$SOURCEDIR" > "$DIRECTORY/eval.md" \
    || die "evaluation script failed (on '$SOURCE')"

pandoc --from markdown \
       --to html \
       --table-of-contents \
       --include-in-header "$CSS_FILE" < "$DIRECTORY/eval.md" > "$OUTPUT_FILE"\
    || die "pandoc failed (generated from '$SOURCE')"