diff --git a/modules/microsite/src/main/resources/microsite/data/menu.yml b/modules/microsite/src/main/resources/microsite/data/menu.yml index 78c014a9..e4363d57 100644 --- a/modules/microsite/src/main/resources/microsite/data/menu.yml +++ b/modules/microsite/src/main/resources/microsite/data/menu.yml @@ -30,6 +30,9 @@ options: - title: Joex url: doc/joex.html + - title: Tools + url: doc/tools.html + - title: Development url: dev.html diff --git a/modules/microsite/src/main/tut/doc/tools.md b/modules/microsite/src/main/tut/doc/tools.md new file mode 100644 index 00000000..245f5967 --- /dev/null +++ b/modules/microsite/src/main/tut/doc/tools.md @@ -0,0 +1,45 @@ +--- +layout: docs +title: Tools +--- + +# {{ page.title }} + +The `tools/` folder contains some scripts and other resources intented +for integrating docspell. + +## consumedir + +The `consumerdir` is a bash script that works in two modes: + +- Go through all files in given directories (non recursively) and sent + each to docspell. +- Watch one or more directories for new files and upload them to + docspell. + +It can watch or go through one or more directories. Files can be +uploaded to multiple urls. + +Run the script with the `-h` option, to see a short help text. The +help text will also show the values for any given option. + +The script requires `curl` for uploading. It requires the +`inotifywait` command if directories should be watched for new +files. If the `-m` option is used, the script will skip duplicate +files. For this the `sha256sum` command is required. + +Example for watching two directories: + +``` bash +./tools/consumedir.sh --path ~/Downloads --path ~/pdfs/ -m /var/run/consumedir -dv http://localhost:7880/api/v1/open/upload/item/5DxhjkvWf9S-CkWqF3Kr892-WgoCspFWDo7-XBykwCyAUxQ +``` + +The script by default watches the given directories. If the `-o` +option is used, it will instead go through these directories and +upload all pdf files in there. + +Example for uploading all immediatly: + +``` bash +./tools/consumedir.sh -o --path ~/Downloads --path ~/pdfs/ -m /var/run/consumedir -dv http://localhost:7880/api/v1/open/upload/item/5DxhjkvWf9S-CkWqF3Kr892-WgoCspFWDo7-XBykwCyAUxQ +``` diff --git a/tools/consumedir.sh b/tools/consumedir.sh new file mode 100755 index 00000000..dfca1e39 --- /dev/null +++ b/tools/consumedir.sh @@ -0,0 +1,211 @@ +#!/usr/bin/env bash + +# This script watches a directory for new files and uploads them to +# docspell. Or it uploads all files currently in the directory. +# +# It requires inotifywait, curl and sha256sum if the `-m' option is +# used. + +# saner programming env: these switches turn some bugs into errors +set -o errexit -o pipefail -o noclobber -o nounset + +CURL_CMD="curl" +INOTIFY_CMD="inotifywait" +SHA256_CMD="sha256sum" +MKTEMP_CMD="mktemp" + +! getopt --test > /dev/null +if [[ ${PIPESTATUS[0]} -ne 4 ]]; then + echo 'I’m sorry, `getopt --test` failed in this environment.' + exit 1 +fi + +OPTIONS=om:hdp:v +LONGOPTS=once,memorize:,help,delete,path:,verbose + +! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@") +if [[ ${PIPESTATUS[0]} -ne 0 ]]; then + # e.g. return value is 1 + # then getopt has complained about wrong arguments to stdout + exit 2 +fi + +# read getopt’s output this way to handle the quoting right: +eval set -- "$PARSED" + +declare -a watchdir +help=n verbose=n delete=n once=n memodir= +while true; do + case "$1" in + -h|--help) + help=y + shift + ;; + -v|--verbose) + verbose=y + shift + ;; + -d|--delete) + delete=y + shift + ;; + -o|--once) + once=y + shift + ;; + -p|--path) + watchdir+=("$2") + shift 2 + ;; + -m|--memorize) + memodir="$2" + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Programming error" + exit 3 + ;; + esac +done + +# handle non-option arguments +if [[ $# -eq 0 ]]; then + echo "$0: No upload URLs given." + exit 4 +fi +urls=$@ + +if [ ! -d "$watchdir" ]; then + echo "The path '$watchdir' is not a directory." + exit 4 +fi + + +trace() { + if [ "$verbose" = "y" ]; then + echo "$1" + fi +} + +info() { + echo $1 +} + +upload() { + tf=$($MKTEMP_CMD) rc=0 + $CURL_CMD -s -o "$tf" -w "%{http_code}" -XPOST -F file=@"$1" "$2" | (2>&1 1>/dev/null grep 200) + rc=$(expr $rc + $?) + cat $tf | (2>&1 1>/dev/null grep '{"success":true') + rc=$(expr $rc + $?) + if [ $rc -ne 0 ]; then + info "Upload failed. Exit code: $rc" + cat "$tf" + echo "" + rm "$tf" + return $rc + else + rm "$tf" + return 0 + fi +} + +checksum() { + $SHA256_CMD "$1" | cut -d' ' -f1 | xargs +} + +process() { + file="$1" + info "---- Processing $file ----------" + declare -i curlrc=0 + set +e + for url in $urls; do + if [ -n "$memodir" ] && [ -f "$memodir/.docspell-consume" ]; then + trace "- Checking if $file has been uploaded to $url already" + cat "$memodir/.docspell-consume" | grep "$url" | (2>&1 1>/dev/null grep "$(checksum "$file")") + if [ $? -eq 0 ]; then + info "- Skipping file '$file' because it has been uploaded in the past." + continue + fi + fi + trace "- Uploading '$file' to '$url'." + upload "$file" "$url" + rc=$? + curlrc=$(expr $curlrc + $rc) + if [ $rc -ne 0 ]; then + trace "Upload to '$url' failed!" + else + if [ -n "$memodir" ]; then + trace "- Adding file '$file' to list of uploaded files for '$url'" + set +C + echo "$(checksum "$file") : $url" >> "$memodir/.docspell-consume" + set -C + fi + fi + done + set -e + if [ $curlrc -ne 0 ]; then + info "-> Some uploads failed." + else + trace "= File processed for all URLs" + if [ "$delete" = "y" ]; then + info "- Deleting file '$file'" + set +e + rm "$file" + if [ $? -ne 0 ]; then + info "- Deleting failed!" + fi + set -e + fi + fi +} + +showUsage() { + echo "Upload files in a directory" + echo "" + echo "Usage: $0 [options] url url ..." + echo + echo "Options:" + echo " -v | --verbose Print more to stdout. (value: $verbose)" + echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)" + echo " -p | --path