#!/usr/bin/env bash # This script watches a directory for new files and uploads them to # docspell. Or it uploads all files currently in the directory. # # It requires inotifywait, curl and sha256sum if the `-m' option is # used. # saner programming env: these switches turn some bugs into errors set -o errexit -o pipefail -o noclobber -o nounset CURL_CMD="curl" INOTIFY_CMD="inotifywait" SHA256_CMD="sha256sum" MKTEMP_CMD="mktemp" CURL_OPTS=${CURL_OPTS:-} ! getopt --test > /dev/null if [[ ${PIPESTATUS[0]} -ne 4 ]]; then echo 'I’m sorry, `getopt --test` failed in this environment.' exit 1 fi OPTIONS=omhdp:vrmi LONGOPTS=once,distinct,help,delete,path:,verbose,recursive,dry,integration,iuser:,iheader: ! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@") if [[ ${PIPESTATUS[0]} -ne 0 ]]; then # e.g. return value is 1 # then getopt has complained about wrong arguments to stdout exit 2 fi # read getopt’s output this way to handle the quoting right: eval set -- "$PARSED" declare -a watchdir help=n verbose=n delete=n once=n distinct=n recursive=n dryrun=n integration=n iuser="" iheader="" while true; do case "$1" in -h|--help) help=y shift ;; -v|--verbose) verbose=y shift ;; -d|--delete) delete=y shift ;; -o|--once) once=y shift ;; -p|--path) watchdir+=("$2") shift 2 ;; -m|--distinct) distinct=y shift ;; -r|--recursive) recursive=y shift ;; --dry) dryrun=y shift ;; -i|--integration) integration=y recursive=y shift ;; --iuser) iuser="$2" shift 2 ;; --iheader) iheader="$2" shift 2 ;; --) shift break ;; *) echo "Programming error" exit 3 ;; esac done showUsage() { echo "Upload files in a directory" echo "" echo "Usage: $0 [options] url url ..." echo echo "Options:" echo " -v | --verbose Print more to stdout. (value: $verbose)" echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)" echo " -p | --path The directories to watch. This is required. (value: ${watchdir[@]})" echo " -h | --help Prints this help text. (value: $help)" echo " -m | --distinct Optional. Upload only if the file doesn't already exist. (value: $distinct)" echo " -o | --once Instead of watching, upload all files in that dir. (value: $once)" echo " -r | --recursive Traverse the directory(ies) recursively (value: $recursive)" echo " -i | --integration Upload to the integration endpoint. It implies -r. This puts the script in" echo " a different mode, where the first subdirectory of any given starting point" echo " is read as the collective name. The url(s) are completed with this name in" echo " order to upload files to the respective collective. So each directory" echo " given is expected to contain one subdirectory per collective and the urls" echo " are expected to identify the integration endpoint, which is" echo " /api/v1/open/integration/item/. (value: $integration)" echo " --iheader The header name and value to use with the integration endpoint. This must be" echo " in form 'headername:value'. Only used if '-i' is supplied." echo " (value: $iheader)" echo " --iuser The username and password for basic auth to use with the integration" echo " endpoint. This must be of form 'user:pass'. Only used if '-i' is supplied." echo " (value: $iuser)" echo " --dry Do a 'dry run', not uploading anything only printing to stdout (value: $dryrun)" echo "" echo "Arguments:" echo " A list of URLs to upload the files to." echo "" echo "Example: Watch directory" echo "$0 --path ~/Downloads -m -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" echo "" echo "Example: Upload all files in a directory" echo "$0 --path ~/Downloads -m -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" echo "" echo "Example: Integration Endpoint" echo "$0 -i -iheader 'Docspell-Integration:test123' -m -p ~/Downloads/ http://localhost:7880/api/v1/open/integration/item" echo "" } if [ "$help" = "y" ]; then showUsage exit 0 fi # handle non-option arguments if [[ $# -eq 0 ]]; then echo "$0: No upload URLs given." exit 4 fi urls=$@ if [ ! -d "$watchdir" ]; then echo "The path '$watchdir' is not a directory." exit 4 fi trace() { if [ "$verbose" = "y" ]; then >&2 echo "$1" fi } info() { >&2 echo $1 } getCollective() { file=$(realpath "$1") dir=$(realpath "$2") collective=${file#"$dir"} coll=$(echo $collective | cut -d'/' -f1) if [ -z "$coll" ]; then coll=$(echo $collective | cut -d'/' -f2) fi echo $coll } upload() { dir=$(realpath "$1") file=$(realpath "$2") url="$3" OPTS="$CURL_OPTS" if [ "$integration" = "y" ]; then collective=$(getCollective "$file" "$dir") trace "- upload: collective = $collective" url="$url/$collective" if [ $iuser ]; then OPTS="$OPTS --user $iuser" fi if [ $iheader ]; then OPTS="$OPTS -H $iheader" fi fi if [ "$dryrun" = "y" ]; then info "- Not uploading (dry-run) $file to $url with opts $OPTS" else trace "- Uploading $file to $url with options $OPTS" tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD) rc=0 $CURL_CMD --fail -# -o "$tf1" --stderr "$tf2" $OPTS -XPOST -F file=@"$file" "$url" if [ $? -ne 0 ]; then info "Upload failed. Exit code: $rc" cat "$tf1" cat "$tf2" echo "" rm "$tf1" "$tf2" return $rc else if cat $tf1 | grep -q '{"success":false'; then echo "Upload failed. Message from server:" cat "$tf1" echo "" rm "$tf1" "$tf2" return 1 else info "- Upload done." rm "$tf1" "$tf2" return 0 fi fi fi } checksum() { $SHA256_CMD "$1" | cut -d' ' -f1 | xargs } checkFile() { local url="$1" local file="$2" local dir="$3" OPTS="$CURL_OPTS" if [ "$integration" = "y" ]; then collective=$(getCollective "$file" "$dir") url="$url/$collective" url=$(echo "$url" | sed 's,/item/,/checkfile/,g') if [ $iuser ]; then OPTS="$OPTS --user $iuser" fi if [ $iheader ]; then OPTS="$OPTS -H $iheader" fi else url=$(echo "$1" | sed 's,upload/item,checkfile,g') fi url=$url/$(checksum "$file") trace "- Check file via $OPTS: $url" tf1=$($MKTEMP_CMD) tf2=$($MKTEMP_CMD) $CURL_CMD --fail -o "$tf1" --stderr "$tf2" $OPTS -XGET -s "$url" if [ $? -ne 0 ]; then info "Checking file failed!" cat "$tf1" >&2 cat "$tf2" >&2 info "" rm "$tf1" "$tf2" echo "failed" return 1 else if cat "$tf1" | grep -q '{"exists":true'; then rm "$tf1" "$tf2" echo "y" else rm "$tf1" "$tf2" echo "n" fi fi } process() { file=$(realpath "$1") dir="$2" info "---- Processing $file ----------" declare -i curlrc=0 set +e for url in $urls; do if [ "$distinct" = "y" ]; then trace "- Checking if $file has been uploaded to $url already" res=$(checkFile "$url" "$file" "$dir") rc=$? curlrc=$(expr $curlrc + $rc) trace "- Result from checkfile: $res" if [ "$res" = "y" ]; then info "- Skipping file '$file' because it has been uploaded in the past." continue elif [ "$res" != "n" ]; then info "- Checking file failed, skipping the file." continue fi fi trace "- Uploading '$file' to '$url'." upload "$dir" "$file" "$url" rc=$? curlrc=$(expr $curlrc + $rc) if [ $rc -ne 0 ]; then trace "Upload to '$url' failed!" fi done set -e if [ $curlrc -ne 0 ]; then info "-> Some uploads failed." else trace "= File processed for all URLs" if [ "$delete" = "y" ]; then info "- Deleting file '$file'" set +e rm "$file" if [ $? -ne 0 ]; then info "- Deleting failed!" fi set -e fi fi } findDir() { path="$1" for dir in "${watchdir[@]}"; do if [[ $path = ${dir}* ]] then echo $dir fi done } if [ "$once" = "y" ]; then info "Uploading all files in '$watchdir'." MD="-maxdepth 1" if [ "$recursive" = "y" ]; then MD="" fi for dir in "${watchdir[@]}"; do find "$dir" $MD -type f -print0 | while IFS= read -d '' -r file; do process "$file" "$dir" done done else REC="" if [ "$recursive" = "y" ]; then REC="-r" fi $INOTIFY_CMD $REC -m --format '%w%f' -e close_write -e moved_to "${watchdir[@]}" | while read pathfile; do dir=$(findDir "$pathfile") trace "The file '$pathfile' appeared below '$dir'" sleep 1 process "$(realpath "$pathfile")" "$dir" done fi