2019-09-12 22:08:11 +00:00
|
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
|
|
# This script watches a directory for new files and uploads them to
|
|
|
|
|
# docspell. Or it uploads all files currently in the directory.
|
|
|
|
|
#
|
|
|
|
|
# It requires inotifywait, curl and sha256sum if the `-m' option is
|
|
|
|
|
# used.
|
|
|
|
|
|
|
|
|
|
# saner programming env: these switches turn some bugs into errors
|
|
|
|
|
set -o errexit -o pipefail -o noclobber -o nounset
|
|
|
|
|
|
|
|
|
|
CURL_CMD="curl"
|
|
|
|
|
INOTIFY_CMD="inotifywait"
|
|
|
|
|
SHA256_CMD="sha256sum"
|
|
|
|
|
MKTEMP_CMD="mktemp"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
CURL_OPTS=${CURL_OPTS:-}
|
2019-09-12 22:08:11 +00:00
|
|
|
|
|
|
|
|
|
! getopt --test > /dev/null
|
|
|
|
|
if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
|
|
|
|
|
echo 'I’m sorry, `getopt --test` failed in this environment.'
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2020-06-27 22:08:37 +00:00
|
|
|
|
OPTIONS=omhdp:vrmi
|
|
|
|
|
LONGOPTS=once,distinct,help,delete,path:,verbose,recursive,dry,integration,iuser:,iheader:
|
2019-09-12 22:08:11 +00:00
|
|
|
|
|
|
|
|
|
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
|
|
|
|
|
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
|
|
|
|
|
# e.g. return value is 1
|
|
|
|
|
# then getopt has complained about wrong arguments to stdout
|
|
|
|
|
exit 2
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# read getopt’s output this way to handle the quoting right:
|
|
|
|
|
eval set -- "$PARSED"
|
|
|
|
|
|
|
|
|
|
declare -a watchdir
|
2020-06-26 23:14:34 +00:00
|
|
|
|
help=n verbose=n delete=n once=n distinct=n recursive=n dryrun=n
|
2020-06-27 22:08:37 +00:00
|
|
|
|
integration=n iuser="" iheader=""
|
2019-09-12 22:08:11 +00:00
|
|
|
|
while true; do
|
|
|
|
|
case "$1" in
|
|
|
|
|
-h|--help)
|
|
|
|
|
help=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
-v|--verbose)
|
|
|
|
|
verbose=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
-d|--delete)
|
|
|
|
|
delete=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
-o|--once)
|
|
|
|
|
once=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
-p|--path)
|
|
|
|
|
watchdir+=("$2")
|
|
|
|
|
shift 2
|
|
|
|
|
;;
|
2019-12-31 22:17:07 +00:00
|
|
|
|
-m|--distinct)
|
|
|
|
|
distinct=y
|
|
|
|
|
shift
|
2019-09-12 22:08:11 +00:00
|
|
|
|
;;
|
2020-06-26 23:14:34 +00:00
|
|
|
|
-r|--recursive)
|
|
|
|
|
recursive=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
--dry)
|
|
|
|
|
dryrun=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
2020-06-27 22:08:37 +00:00
|
|
|
|
-i|--integration)
|
|
|
|
|
integration=y
|
|
|
|
|
recursive=y
|
|
|
|
|
shift
|
|
|
|
|
;;
|
|
|
|
|
--iuser)
|
|
|
|
|
iuser="$2"
|
|
|
|
|
shift 2
|
|
|
|
|
;;
|
|
|
|
|
--iheader)
|
|
|
|
|
iheader="$2"
|
|
|
|
|
shift 2
|
|
|
|
|
;;
|
2019-09-12 22:08:11 +00:00
|
|
|
|
--)
|
|
|
|
|
shift
|
|
|
|
|
break
|
|
|
|
|
;;
|
|
|
|
|
*)
|
|
|
|
|
echo "Programming error"
|
|
|
|
|
exit 3
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
done
|
|
|
|
|
|
2019-12-31 22:17:07 +00:00
|
|
|
|
|
|
|
|
|
showUsage() {
|
|
|
|
|
echo "Upload files in a directory"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Usage: $0 [options] url url ..."
|
|
|
|
|
echo
|
|
|
|
|
echo "Options:"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
echo " -v | --verbose Print more to stdout. (value: $verbose)"
|
|
|
|
|
echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)"
|
|
|
|
|
echo " -p | --path <dir> The directories to watch. This is required. (value: ${watchdir[@]})"
|
|
|
|
|
echo " -h | --help Prints this help text. (value: $help)"
|
|
|
|
|
echo " -m | --distinct Optional. Upload only if the file doesn't already exist. (value: $distinct)"
|
|
|
|
|
echo " -o | --once Instead of watching, upload all files in that dir. (value: $once)"
|
|
|
|
|
echo " -r | --recursive Traverse the directory(ies) recursively (value: $recursive)"
|
|
|
|
|
echo " -i | --integration Upload to the integration endpoint. It implies -r. This puts the script in"
|
|
|
|
|
echo " a different mode, where the first subdirectory of any given starting point"
|
|
|
|
|
echo " is read as the collective name. The url(s) are completed with this name in"
|
|
|
|
|
echo " order to upload files to the respective collective. So each directory"
|
|
|
|
|
echo " given is expected to contain one subdirectory per collective and the urls"
|
|
|
|
|
echo " are expected to identify the integration endpoint, which is"
|
|
|
|
|
echo " /api/v1/open/integration/item/<collective-name>. (value: $integration)"
|
|
|
|
|
echo " --iheader The header name and value to use with the integration endpoint. This must be"
|
|
|
|
|
echo " in form 'headername:value'. Only used if '-i' is supplied."
|
|
|
|
|
echo " (value: $iheader)"
|
|
|
|
|
echo " --iuser The username and password for basic auth to use with the integration"
|
|
|
|
|
echo " endpoint. This must be of form 'user:pass'. Only used if '-i' is supplied."
|
|
|
|
|
echo " (value: $iuser)"
|
|
|
|
|
echo " --dry Do a 'dry run', not uploading anything only printing to stdout (value: $dryrun)"
|
2019-12-31 22:17:07 +00:00
|
|
|
|
echo ""
|
|
|
|
|
echo "Arguments:"
|
|
|
|
|
echo " A list of URLs to upload the files to."
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Example: Watch directory"
|
|
|
|
|
echo "$0 --path ~/Downloads -m -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Example: Upload all files in a directory"
|
|
|
|
|
echo "$0 --path ~/Downloads -m -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
|
|
|
|
echo ""
|
2020-06-27 22:08:37 +00:00
|
|
|
|
echo "Example: Integration Endpoint"
|
|
|
|
|
echo "$0 -i -iheader 'Docspell-Integration:test123' -m -p ~/Downloads/ http://localhost:7880/api/v1/open/integration/item"
|
|
|
|
|
echo ""
|
2019-12-31 22:17:07 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if [ "$help" = "y" ]; then
|
|
|
|
|
showUsage
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
|
2019-09-12 22:08:11 +00:00
|
|
|
|
# handle non-option arguments
|
|
|
|
|
if [[ $# -eq 0 ]]; then
|
|
|
|
|
echo "$0: No upload URLs given."
|
|
|
|
|
exit 4
|
|
|
|
|
fi
|
|
|
|
|
urls=$@
|
|
|
|
|
|
|
|
|
|
if [ ! -d "$watchdir" ]; then
|
|
|
|
|
echo "The path '$watchdir' is not a directory."
|
|
|
|
|
exit 4
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trace() {
|
|
|
|
|
if [ "$verbose" = "y" ]; then
|
2020-06-27 22:08:37 +00:00
|
|
|
|
>&2 echo "$1"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
info() {
|
2020-06-27 22:08:37 +00:00
|
|
|
|
>&2 echo $1
|
2019-09-12 22:08:11 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-06-27 22:08:37 +00:00
|
|
|
|
getCollective() {
|
2020-06-28 00:10:45 +00:00
|
|
|
|
file="$(realpath $1)"
|
|
|
|
|
dir="$(realpath $2)"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
collective=${file#"$dir"}
|
|
|
|
|
coll=$(echo $collective | cut -d'/' -f1)
|
|
|
|
|
if [ -z "$coll" ]; then
|
|
|
|
|
coll=$(echo $collective | cut -d'/' -f2)
|
|
|
|
|
fi
|
|
|
|
|
echo $coll
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2019-09-12 22:08:11 +00:00
|
|
|
|
upload() {
|
2020-06-28 00:10:45 +00:00
|
|
|
|
dir="$(realpath $1)"
|
|
|
|
|
file="$(realpath $2)"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
url="$3"
|
|
|
|
|
OPTS="$CURL_OPTS"
|
|
|
|
|
if [ "$integration" = "y" ]; then
|
|
|
|
|
collective=$(getCollective "$file" "$dir")
|
|
|
|
|
trace "- upload: collective = $collective"
|
|
|
|
|
url="$url/$collective"
|
|
|
|
|
if [ $iuser ]; then
|
|
|
|
|
OPTS="$OPTS --user $iuser"
|
|
|
|
|
fi
|
|
|
|
|
if [ $iheader ]; then
|
|
|
|
|
OPTS="$OPTS -H $iheader"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
2020-06-26 23:14:34 +00:00
|
|
|
|
if [ "$dryrun" = "y" ]; then
|
2020-06-27 22:08:37 +00:00
|
|
|
|
info "- Not uploading (dry-run) $file to $url with opts $OPTS"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
else
|
2020-06-27 22:08:37 +00:00
|
|
|
|
trace "- Uploading $file to $url with options $OPTS"
|
|
|
|
|
tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD) rc=0
|
|
|
|
|
$CURL_CMD --fail -# -o "$tf1" --stderr "$tf2" $OPTS -XPOST -F file=@"$file" "$url"
|
|
|
|
|
if [ $? -ne 0 ]; then
|
2020-06-26 23:14:34 +00:00
|
|
|
|
info "Upload failed. Exit code: $rc"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
cat "$tf1"
|
|
|
|
|
cat "$tf2"
|
2020-06-26 23:14:34 +00:00
|
|
|
|
echo ""
|
2020-06-27 22:08:37 +00:00
|
|
|
|
rm "$tf1" "$tf2"
|
2020-06-26 23:14:34 +00:00
|
|
|
|
return $rc
|
|
|
|
|
else
|
2020-06-27 22:08:37 +00:00
|
|
|
|
if cat $tf1 | grep -q '{"success":false'; then
|
|
|
|
|
echo "Upload failed. Message from server:"
|
|
|
|
|
cat "$tf1"
|
|
|
|
|
echo ""
|
|
|
|
|
rm "$tf1" "$tf2"
|
|
|
|
|
return 1
|
|
|
|
|
else
|
|
|
|
|
info "- Upload done."
|
|
|
|
|
rm "$tf1" "$tf2"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
2020-06-26 23:14:34 +00:00
|
|
|
|
fi
|
2019-09-12 22:08:11 +00:00
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
checksum() {
|
|
|
|
|
$SHA256_CMD "$1" | cut -d' ' -f1 | xargs
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-31 22:17:07 +00:00
|
|
|
|
checkFile() {
|
2020-06-27 22:08:37 +00:00
|
|
|
|
local url="$1"
|
2019-12-31 22:17:07 +00:00
|
|
|
|
local file="$2"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
local dir="$3"
|
|
|
|
|
OPTS="$CURL_OPTS"
|
|
|
|
|
if [ "$integration" = "y" ]; then
|
|
|
|
|
collective=$(getCollective "$file" "$dir")
|
|
|
|
|
url="$url/$collective"
|
|
|
|
|
url=$(echo "$url" | sed 's,/item/,/checkfile/,g')
|
|
|
|
|
if [ $iuser ]; then
|
|
|
|
|
OPTS="$OPTS --user $iuser"
|
|
|
|
|
fi
|
|
|
|
|
if [ $iheader ]; then
|
|
|
|
|
OPTS="$OPTS -H $iheader"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
url=$(echo "$1" | sed 's,upload/item,checkfile,g')
|
|
|
|
|
fi
|
2020-06-28 00:10:45 +00:00
|
|
|
|
url="$url/$(checksum $file)"
|
|
|
|
|
trace "- Check file via $OPTS: $url"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD)
|
2020-06-28 00:10:45 +00:00
|
|
|
|
$CURL_CMD --fail -o "$tf1" --stderr "$tf2" $OPTS -XGET -s "$url"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
|
info "Checking file failed!"
|
|
|
|
|
cat "$tf1" >&2
|
|
|
|
|
cat "$tf2" >&2
|
|
|
|
|
info ""
|
|
|
|
|
rm "$tf1" "$tf2"
|
|
|
|
|
echo "failed"
|
|
|
|
|
return 1
|
|
|
|
|
else
|
|
|
|
|
if cat "$tf1" | grep -q '{"exists":true'; then
|
|
|
|
|
rm "$tf1" "$tf2"
|
|
|
|
|
echo "y"
|
|
|
|
|
else
|
|
|
|
|
rm "$tf1" "$tf2"
|
|
|
|
|
echo "n"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
2019-12-31 22:17:07 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-09-12 22:08:11 +00:00
|
|
|
|
process() {
|
2020-06-28 00:10:45 +00:00
|
|
|
|
file="$(realpath $1)"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
dir="$2"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
info "---- Processing $file ----------"
|
|
|
|
|
declare -i curlrc=0
|
|
|
|
|
set +e
|
|
|
|
|
for url in $urls; do
|
2019-12-31 22:17:07 +00:00
|
|
|
|
if [ "$distinct" = "y" ]; then
|
2019-09-12 22:08:11 +00:00
|
|
|
|
trace "- Checking if $file has been uploaded to $url already"
|
2020-06-27 22:08:37 +00:00
|
|
|
|
res=$(checkFile "$url" "$file" "$dir")
|
|
|
|
|
rc=$?
|
|
|
|
|
curlrc=$(expr $curlrc + $rc)
|
|
|
|
|
trace "- Result from checkfile: $res"
|
|
|
|
|
if [ "$res" = "y" ]; then
|
2019-09-12 22:08:11 +00:00
|
|
|
|
info "- Skipping file '$file' because it has been uploaded in the past."
|
|
|
|
|
continue
|
2020-06-27 22:08:37 +00:00
|
|
|
|
elif [ "$res" != "n" ]; then
|
|
|
|
|
info "- Checking file failed, skipping the file."
|
|
|
|
|
continue
|
2019-09-12 22:08:11 +00:00
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
trace "- Uploading '$file' to '$url'."
|
2020-06-27 22:08:37 +00:00
|
|
|
|
upload "$dir" "$file" "$url"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
rc=$?
|
|
|
|
|
curlrc=$(expr $curlrc + $rc)
|
|
|
|
|
if [ $rc -ne 0 ]; then
|
|
|
|
|
trace "Upload to '$url' failed!"
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
set -e
|
|
|
|
|
if [ $curlrc -ne 0 ]; then
|
|
|
|
|
info "-> Some uploads failed."
|
|
|
|
|
else
|
|
|
|
|
trace "= File processed for all URLs"
|
|
|
|
|
if [ "$delete" = "y" ]; then
|
|
|
|
|
info "- Deleting file '$file'"
|
|
|
|
|
set +e
|
|
|
|
|
rm "$file"
|
|
|
|
|
if [ $? -ne 0 ]; then
|
|
|
|
|
info "- Deleting failed!"
|
|
|
|
|
fi
|
|
|
|
|
set -e
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-27 22:08:37 +00:00
|
|
|
|
findDir() {
|
|
|
|
|
path="$1"
|
|
|
|
|
for dir in "${watchdir[@]}"; do
|
|
|
|
|
if [[ $path = ${dir}* ]]
|
|
|
|
|
then
|
|
|
|
|
echo $dir
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-12 22:08:11 +00:00
|
|
|
|
if [ "$once" = "y" ]; then
|
|
|
|
|
info "Uploading all files in '$watchdir'."
|
2020-06-26 23:14:34 +00:00
|
|
|
|
MD="-maxdepth 1"
|
|
|
|
|
if [ "$recursive" = "y" ]; then
|
|
|
|
|
MD=""
|
|
|
|
|
fi
|
2019-09-12 22:08:11 +00:00
|
|
|
|
for dir in "${watchdir[@]}"; do
|
2020-06-26 23:14:34 +00:00
|
|
|
|
find "$dir" $MD -type f -print0 | while IFS= read -d '' -r file; do
|
2020-06-27 22:08:37 +00:00
|
|
|
|
process "$file" "$dir"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
done
|
|
|
|
|
done
|
|
|
|
|
else
|
2020-06-26 23:14:34 +00:00
|
|
|
|
REC=""
|
|
|
|
|
if [ "$recursive" = "y" ]; then
|
|
|
|
|
REC="-r"
|
|
|
|
|
fi
|
|
|
|
|
$INOTIFY_CMD $REC -m "${watchdir[@]}" -e close_write -e moved_to |
|
2019-09-12 22:08:11 +00:00
|
|
|
|
while read path action file; do
|
2020-06-27 22:08:37 +00:00
|
|
|
|
dir=$(findDir "$path")
|
|
|
|
|
trace "The file '$file' appeared in directory '$path' below '$dir' via '$action'"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
sleep 1
|
2020-06-28 00:10:45 +00:00
|
|
|
|
process "$(realpath "$path$file")" "$dir"
|
2019-09-12 22:08:11 +00:00
|
|
|
|
done
|
|
|
|
|
fi
|