docspell/tools/consumedir.sh
Eike Kettner abecfeafec Add curl response to stdout if it fails
Apparently, it must be called with `-v` otherwise it is silent.
2020-11-23 22:17:14 +01:00

351 lines
10 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# This script watches a directory for new files and uploads them to
# docspell. Or it uploads all files currently in the directory.
#
# It requires inotifywait, curl and sha256sum if the `-m' option is
# used.
# saner programming env: these switches turn some bugs into errors
set -o errexit -o pipefail -o noclobber -o nounset
CURL_CMD="curl"
INOTIFY_CMD="inotifywait"
SHA256_CMD="sha256sum"
MKTEMP_CMD="mktemp"
CURL_OPTS=${CURL_OPTS:-}
! getopt --test > /dev/null
if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
echo 'Im sorry, `getopt --test` failed in this environment.'
exit 1
fi
OPTIONS=omhdp:vrmi
LONGOPTS=once,distinct,help,delete,path:,verbose,recursive,dry,integration,iuser:,iheader:
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
# e.g. return value is 1
# then getopt has complained about wrong arguments to stdout
exit 2
fi
# read getopts output this way to handle the quoting right:
eval set -- "$PARSED"
declare -a watchdir
help=n verbose=n delete=n once=n distinct=n recursive=n dryrun=n
integration=n iuser="" iheader=""
while true; do
case "$1" in
-h|--help)
help=y
shift
;;
-v|--verbose)
verbose=y
shift
;;
-d|--delete)
delete=y
shift
;;
-o|--once)
once=y
shift
;;
-p|--path)
watchdir+=("$2")
shift 2
;;
-m|--distinct)
distinct=y
shift
;;
-r|--recursive)
recursive=y
shift
;;
--dry)
dryrun=y
shift
;;
-i|--integration)
integration=y
recursive=y
shift
;;
--iuser)
iuser="$2"
shift 2
;;
--iheader)
iheader="$2"
shift 2
;;
--)
shift
break
;;
*)
echo "Programming error"
exit 3
;;
esac
done
showUsage() {
echo "Upload files in a directory"
echo ""
echo "Usage: $0 [options] url url ..."
echo
echo "Options:"
echo " -v | --verbose Print more to stdout. (value: $verbose)"
echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)"
echo " -p | --path <dir> The directories to watch. This is required. (value: ${watchdir[@]})"
echo " -h | --help Prints this help text. (value: $help)"
echo " -m | --distinct Optional. Upload only if the file doesn't already exist. (value: $distinct)"
echo " -o | --once Instead of watching, upload all files in that dir. (value: $once)"
echo " -r | --recursive Traverse the directory(ies) recursively (value: $recursive)"
echo " -i | --integration Upload to the integration endpoint. It implies -r. This puts the script in"
echo " a different mode, where the first subdirectory of any given starting point"
echo " is read as the collective name. The url(s) are completed with this name in"
echo " order to upload files to the respective collective. So each directory"
echo " given is expected to contain one subdirectory per collective and the urls"
echo " are expected to identify the integration endpoint, which is"
echo " /api/v1/open/integration/item/<collective-name>. (value: $integration)"
echo " --iheader The header name and value to use with the integration endpoint. This must be"
echo " in form 'headername:value'. Only used if '-i' is supplied."
echo " (value: $iheader)"
echo " --iuser The username and password for basic auth to use with the integration"
echo " endpoint. This must be of form 'user:pass'. Only used if '-i' is supplied."
echo " (value: $iuser)"
echo " --dry Do a 'dry run', not uploading anything only printing to stdout (value: $dryrun)"
echo ""
echo "Arguments:"
echo " A list of URLs to upload the files to."
echo ""
echo "Example: Watch directory"
echo "$0 --path ~/Downloads -m -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
echo ""
echo "Example: Upload all files in a directory"
echo "$0 --path ~/Downloads -m -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
echo ""
echo "Example: Integration Endpoint"
echo "$0 -i -iheader 'Docspell-Integration:test123' -m -p ~/Downloads/ http://localhost:7880/api/v1/open/integration/item"
echo ""
}
if [ "$help" = "y" ]; then
showUsage
exit 0
fi
# handle non-option arguments
if [[ $# -eq 0 ]]; then
echo "$0: No upload URLs given."
exit 4
fi
urls=$@
if [ ! -d "$watchdir" ]; then
echo "The path '$watchdir' is not a directory."
exit 4
fi
trace() {
if [ "$verbose" = "y" ]; then
>&2 echo "$1"
fi
}
info() {
>&2 echo $1
}
getCollective() {
file=$(realpath "$1")
dir=$(realpath "$2")
collective=${file#"$dir"}
coll=$(echo $collective | cut -d'/' -f1)
if [ -z "$coll" ]; then
coll=$(echo $collective | cut -d'/' -f2)
fi
echo $coll
}
upload() {
dir=$(realpath "$1")
file=$(realpath "$2")
url="$3"
OPTS="$CURL_OPTS"
if [ "$integration" = "y" ]; then
collective=$(getCollective "$file" "$dir")
trace "- upload: collective = $collective"
url="$url/$collective"
if [ $iuser ]; then
OPTS="$OPTS --user $iuser"
fi
if [ $iheader ]; then
OPTS="$OPTS -H $iheader"
fi
fi
if [ "$dryrun" = "y" ]; then
info "- Not uploading (dry-run) $file to $url with opts $OPTS"
else
trace "- Uploading $file to $url with options $OPTS"
tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD) rc=0
$CURL_CMD --fail -# -o "$tf1" --stderr "$tf2" $OPTS -XPOST -F file=@"$file" "$url"
if [ $? -ne 0 ]; then
info "Upload failed. Exit code: $rc"
cat "$tf1"
cat "$tf2"
echo ""
rm "$tf1" "$tf2"
return $rc
else
if cat $tf1 | grep -q '{"success":false'; then
echo "Upload failed. Message from server:"
cat "$tf1"
echo ""
rm "$tf1" "$tf2"
return 1
else
info "- Upload done."
rm "$tf1" "$tf2"
return 0
fi
fi
fi
}
checksum() {
$SHA256_CMD "$1" | cut -d' ' -f1 | xargs
}
checkFile() {
local url="$1"
local file="$2"
local dir="$3"
OPTS="$CURL_OPTS"
if [ "$integration" = "y" ]; then
collective=$(getCollective "$file" "$dir")
url="$url/$collective"
url=$(echo "$url" | sed 's,/item/,/checkfile/,g')
if [ $iuser ]; then
OPTS="$OPTS --user $iuser"
fi
if [ $iheader ]; then
OPTS="$OPTS -H $iheader"
fi
else
url=$(echo "$1" | sed 's,upload/item,checkfile,g')
fi
url=$url/$(checksum "$file")
trace "- Check file via $OPTS: $url"
tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD)
$CURL_CMD --fail -v -o "$tf1" --stderr "$tf2" $OPTS -XGET -s "$url"
if [ $? -ne 0 ]; then
info "Checking file failed!"
cat "$tf1" >&2
cat "$tf2" >&2
info ""
rm "$tf1" "$tf2"
echo "failed"
return 1
else
if cat "$tf1" | grep -q '{"exists":true'; then
rm "$tf1" "$tf2"
echo "y"
else
rm "$tf1" "$tf2"
echo "n"
fi
fi
}
process() {
file=$(realpath "$1")
dir="$2"
info "---- Processing $file ----------"
declare -i curlrc=0
set +e
for url in $urls; do
if [ "$distinct" = "y" ]; then
trace "- Checking if $file has been uploaded to $url already"
res=$(checkFile "$url" "$file" "$dir")
rc=$?
curlrc=$(expr $curlrc + $rc)
trace "- Result from checkfile: $res"
if [ "$res" = "y" ]; then
info "- Skipping file '$file' because it has been uploaded in the past."
continue
elif [ "$res" != "n" ]; then
info "- Checking file failed, skipping the file."
continue
fi
fi
trace "- Uploading '$file' to '$url'."
upload "$dir" "$file" "$url"
rc=$?
curlrc=$(expr $curlrc + $rc)
if [ $rc -ne 0 ]; then
trace "Upload to '$url' failed!"
fi
done
set -e
if [ $curlrc -ne 0 ]; then
info "-> Some uploads failed."
else
trace "= File processed for all URLs"
if [ "$delete" = "y" ]; then
info "- Deleting file '$file'"
set +e
rm "$file"
if [ $? -ne 0 ]; then
info "- Deleting failed!"
fi
set -e
fi
fi
}
findDir() {
path="$1"
for dir in "${watchdir[@]}"; do
if [[ $path = ${dir}* ]]
then
echo $dir
fi
done
}
if [ "$once" = "y" ]; then
info "Uploading all files in '$watchdir'."
MD="-maxdepth 1"
if [ "$recursive" = "y" ]; then
MD=""
fi
for dir in "${watchdir[@]}"; do
find "$dir" $MD -type f -print0 | while IFS= read -d '' -r file; do
process "$file" "$dir"
done
done
else
REC=""
if [ "$recursive" = "y" ]; then
REC="-r"
fi
$INOTIFY_CMD $REC -m --format '%w%f' -e close_write -e moved_to "${watchdir[@]}" |
while read pathfile; do
dir=$(findDir "$pathfile")
trace "The file '$pathfile' appeared below '$dir'"
sleep 1
process "$(realpath "$pathfile")" "$dir"
done
fi