mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-04 12:30:12 +00:00 
			
		
		
		
	Add an export-files script
This commit is contained in:
		
							
								
								
									
										189
									
								
								tools/export-files.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										189
									
								
								tools/export-files.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,189 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env bash
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Simple script for downloading all your files. It goes through all
 | 
				
			||||||
 | 
					# items visible to the logged in user and downloads the attachments
 | 
				
			||||||
 | 
					# (the original files).
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# The item's metadata are stored next to the files to provide more
 | 
				
			||||||
 | 
					# information about the item. It is not meant to be imported back into
 | 
				
			||||||
 | 
					# docspell.
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Usage:
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# export-files.sh <docspell-base-url> <target-directory>
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# The docspell base url is required as well as a directory to store
 | 
				
			||||||
 | 
					# all the files into.
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# Example:
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					#    export-files.sh http://localhost:7880 /tmp/ds-download
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
 | 
					# The script then asks for username and password and starts downloading.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [ -z "$1" ]; then
 | 
				
			||||||
 | 
					    echo "The base-url to docspell is required."
 | 
				
			||||||
 | 
					    exit 1
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					    BASE_URL="$1"
 | 
				
			||||||
 | 
					    shift
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if [ -z "$1" ]; then
 | 
				
			||||||
 | 
					    echo "A directory is required to store the files into."
 | 
				
			||||||
 | 
					    exit 1
 | 
				
			||||||
 | 
					else
 | 
				
			||||||
 | 
					    TARGET="$1"
 | 
				
			||||||
 | 
					    shift
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set -o errexit -o pipefail -o noclobber -o nounset
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LOGIN_URL="$BASE_URL/api/v1/open/auth/login"
 | 
				
			||||||
 | 
					SEARCH_URL="$BASE_URL/api/v1/sec/item/search"
 | 
				
			||||||
 | 
					INSIGHT_URL="$BASE_URL/api/v1/sec/collective/insights"
 | 
				
			||||||
 | 
					DETAIL_URL="$BASE_URL/api/v1/sec/item"
 | 
				
			||||||
 | 
					ATTACH_URL="$BASE_URL/api/v1/sec/attachment"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					errout() {
 | 
				
			||||||
 | 
					    >&2 echo "$@"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					trap "{ rm -f ${TMPDIR-:/tmp}/ds-export.*; }" EXIT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mcurl() {
 | 
				
			||||||
 | 
					    tmpfile1=$(mktemp -t "ds-export.XXXXX")
 | 
				
			||||||
 | 
					    tmpfile2=$(mktemp -t "ds-export.XXXXX")
 | 
				
			||||||
 | 
					    set +e
 | 
				
			||||||
 | 
					    curl -# --fail --stderr "$tmpfile1" -o "$tmpfile2" -H "X-Docspell-Auth: $auth_token" "$@"
 | 
				
			||||||
 | 
					    status=$?
 | 
				
			||||||
 | 
					    set -e
 | 
				
			||||||
 | 
					    if [ $status -ne 0 ]; then
 | 
				
			||||||
 | 
					        errout "curl -H 'X-Docspell-Auth: …' $@"
 | 
				
			||||||
 | 
					        errout "Curl command failed (rc=$status)! Output is below."
 | 
				
			||||||
 | 
					        cat "$tmpfile1" >&2
 | 
				
			||||||
 | 
					        cat "$tmpfile2" >&2
 | 
				
			||||||
 | 
					        rm -f "$tmpfile1" "$tmpfile2"
 | 
				
			||||||
 | 
					        return 2
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					        ret=$(cat "$tmpfile2")
 | 
				
			||||||
 | 
					        rm "$tmpfile2" "$tmpfile1"
 | 
				
			||||||
 | 
					        echo $ret
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					errout "Login to Docspell."
 | 
				
			||||||
 | 
					errout "Using url: $BASE_URL"
 | 
				
			||||||
 | 
					if [ -z "$DS_USER" ]; then
 | 
				
			||||||
 | 
					    errout -n "Account: "
 | 
				
			||||||
 | 
					    read DS_USER
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					if [ -z "$DS_PASS" ]; then
 | 
				
			||||||
 | 
					    errout -n "Password: "
 | 
				
			||||||
 | 
					    read -s DS_PASS
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					echo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare auth
 | 
				
			||||||
 | 
					declare auth_token
 | 
				
			||||||
 | 
					declare auth_time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					login() {
 | 
				
			||||||
 | 
					    auth=$(curl -s --fail -XPOST \
 | 
				
			||||||
 | 
					                 --data-binary "{\"account\":\"$DS_USER\", \"password\":\"$DS_PASS\"}" "$LOGIN_URL")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if [ "$(echo $auth | jq .success)" == "true" ]; then
 | 
				
			||||||
 | 
					        errout "Login successful"
 | 
				
			||||||
 | 
					        auth_token=$(echo $auth | jq -r .token)
 | 
				
			||||||
 | 
					        auth_time=$(date +%s)
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					        errout "Login failed."
 | 
				
			||||||
 | 
					        exit 1
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					checkLogin() {
 | 
				
			||||||
 | 
					    elapsed=$((1000 * ($(date +%s) - $auth_time)))
 | 
				
			||||||
 | 
					    maxtime=$(echo $auth | jq .validMs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    elapsed=$(($elapsed + 1000))
 | 
				
			||||||
 | 
					    if [ $elapsed -gt $maxtime ]; then
 | 
				
			||||||
 | 
					        errout "Need to re-login $elapsed > $maxtime"
 | 
				
			||||||
 | 
					        login
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					listItems() {
 | 
				
			||||||
 | 
					    OFFSET="${1:-0}"
 | 
				
			||||||
 | 
					    LIMIT="${2:-50}"
 | 
				
			||||||
 | 
					    errout "Get next items with offset=$OFFSET, limit=$LIMIT"
 | 
				
			||||||
 | 
					    REQ="{\"offset\":$OFFSET, \"limit\":$LIMIT, \"tagsInclude\":[],\"tagsExclude\":[],\"tagCategoriesInclude\":[], \"tagCategoriesExclude\":[],\"customValues\":[],\"inbox\":false}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mcurl -XPOST -H 'ContentType: application/json' -d "$REQ" "$SEARCH_URL" | jq -r '.groups[].items[]|.id'
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fetchItemCount() {
 | 
				
			||||||
 | 
					    mcurl -XGET "$INSIGHT_URL" | jq '[.incomingCount, .outgoingCount] | add'
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fetchItem() {
 | 
				
			||||||
 | 
					    mcurl -XGET "$DETAIL_URL/$1"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					downloadItem() {
 | 
				
			||||||
 | 
					    checkLogin
 | 
				
			||||||
 | 
					    itemData=$(fetchItem "$1")
 | 
				
			||||||
 | 
					    errout "Get item $(echo $itemData | jq -r .id)"
 | 
				
			||||||
 | 
					    created=$(echo $itemData|jq '.created')
 | 
				
			||||||
 | 
					    created=$((($(echo $itemData|jq '.created') + 500) / 1000))
 | 
				
			||||||
 | 
					    itemId=$(echo $itemData | jq -r '.id')
 | 
				
			||||||
 | 
					    out="$TARGET/$(date -d @$created +%Y-%m)/$itemId"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mkdir -p "$out"
 | 
				
			||||||
 | 
					    echo $itemData | jq > "$out/metadata.json"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while read attachId attachName; do
 | 
				
			||||||
 | 
					        errout " - download $attachName ($attachId)"
 | 
				
			||||||
 | 
					        attachOut="$out/$attachName"
 | 
				
			||||||
 | 
					        checkLogin
 | 
				
			||||||
 | 
					        curl --fail -# -o "$attachOut" -H "X-Docspell-Auth: $auth_token" "$ATTACH_URL/$attachId"
 | 
				
			||||||
 | 
					    done < <(echo $itemData | jq -r '.sources[] | [.id,.name] | join(" ")')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					login
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					allCount=$(fetchItemCount)
 | 
				
			||||||
 | 
					errout "Downloading $allCount items…"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					allCounter=0 innerCounter=0 limit=100 offset=0 done=n
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					while [ "$done" = "n" ]; do
 | 
				
			||||||
 | 
					    checkLogin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    innerCounter=0
 | 
				
			||||||
 | 
					    while read id; do
 | 
				
			||||||
 | 
					        downloadItem "$id"
 | 
				
			||||||
 | 
					        innerCounter=$(($innerCounter + 1))
 | 
				
			||||||
 | 
					    done < <(listItems $offset $limit)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    allCounter=$(($allCounter + $innerCounter))
 | 
				
			||||||
 | 
					    offset=$(($offset + $limit))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if [ $innerCounter -lt $limit ]; then
 | 
				
			||||||
 | 
					        done=y
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					errout "Downloaded $allCounter/$allCount items"
 | 
				
			||||||
 | 
					if [[ $allCounter < $allCount ]]; then
 | 
				
			||||||
 | 
					    errout
 | 
				
			||||||
 | 
					    errout "  Downloaded less items than were reported as available. This"
 | 
				
			||||||
 | 
					    errout "  may be due to items in folders that you cannot see. Or it"
 | 
				
			||||||
 | 
					    errout "  may be a bug."
 | 
				
			||||||
 | 
					    errout
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
@@ -82,14 +82,11 @@ documentation, too.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
In order to move to a different tool, it is necessary to get the data
 | 
					In order to move to a different tool, it is necessary to get the data
 | 
				
			||||||
out of Docspell in a machine readable/automatic way. Currently, there
 | 
					out of Docspell in a machine readable/automatic way. Currently, there
 | 
				
			||||||
is no *easy way* for this. However, it is possible to get to all data
 | 
					is no *easy way* for this. However, everything can be queried using a
 | 
				
			||||||
with some scripting effort. Everything can be queried using a
 | 
					[HTTP/REST api](@/docs/api/_index.md) and so it is possible to get to
 | 
				
			||||||
[HTTP/REST api](@/docs/api/_index.md) and so you can write a
 | 
					all data with some scripting effort. There exists a script in the
 | 
				
			||||||
script/program that, for example, queries all items and downloads the
 | 
					`tools/` folder that at least can go and download all files that have
 | 
				
			||||||
files (something like this might be provided soon, for now there are
 | 
					been uploaded to docspell.
 | 
				
			||||||
starting points in the `/tools` folder). It is planned to provide a
 | 
					 | 
				
			||||||
more convenient way to export the data into the file system. But there
 | 
					 | 
				
			||||||
is no ETA for this.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
My recommendation is to run periodic database backups and also store
 | 
					My recommendation is to run periodic database backups and also store
 | 
				
			||||||
the binaries/docker images. This lets you re-create the current state
 | 
					the binaries/docker images. This lets you re-create the current state
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										33
									
								
								website/site/content/docs/tools/export-files.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								website/site/content/docs/tools/export-files.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Export Files"
 | 
				
			||||||
 | 
					description = "Downloads all files from docspell."
 | 
				
			||||||
 | 
					weight = 65
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# export-files.sh
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This script can be used to download all files from docspell that have
 | 
				
			||||||
 | 
					been uploaded before.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Requirements
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It is a bash script that additionally needs
 | 
				
			||||||
 | 
					[curl](https://curl.haxx.se/) and
 | 
				
			||||||
 | 
					[jq](https://stedolan.github.io/jq/).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Usage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					./export-files.sh <docspell-base-url> <target-directory>
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For example, if docspell is at `http://localhost:7880`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					./export-files.sh http://localhost:7880 /tmp/ds-downloads
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The script asks for your account name and password. It then logs in
 | 
				
			||||||
 | 
					and goes through all items downloading the metadata as json and the
 | 
				
			||||||
 | 
					attachments. It will fetch the original files (not the converted
 | 
				
			||||||
 | 
					ones).
 | 
				
			||||||
		Reference in New Issue
	
	Block a user