mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-07-04 16:48:26 +00:00
Add an export-files script
This commit is contained in:
189
tools/export-files.sh
Executable file
189
tools/export-files.sh
Executable file
@ -0,0 +1,189 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Simple script for downloading all your files. It goes through all
|
||||||
|
# items visible to the logged in user and downloads the attachments
|
||||||
|
# (the original files).
|
||||||
|
#
|
||||||
|
# The item's metadata are stored next to the files to provide more
|
||||||
|
# information about the item. It is not meant to be imported back into
|
||||||
|
# docspell.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
#
|
||||||
|
# export-files.sh <docspell-base-url> <target-directory>
|
||||||
|
#
|
||||||
|
# The docspell base url is required as well as a directory to store
|
||||||
|
# all the files into.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# export-files.sh http://localhost:7880 /tmp/ds-download
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# The script then asks for username and password and starts downloading.
|
||||||
|
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
echo "The base-url to docspell is required."
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
BASE_URL="$1"
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
echo "A directory is required to store the files into."
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
TARGET="$1"
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
set -o errexit -o pipefail -o noclobber -o nounset
|
||||||
|
|
||||||
|
LOGIN_URL="$BASE_URL/api/v1/open/auth/login"
|
||||||
|
SEARCH_URL="$BASE_URL/api/v1/sec/item/search"
|
||||||
|
INSIGHT_URL="$BASE_URL/api/v1/sec/collective/insights"
|
||||||
|
DETAIL_URL="$BASE_URL/api/v1/sec/item"
|
||||||
|
ATTACH_URL="$BASE_URL/api/v1/sec/attachment"
|
||||||
|
|
||||||
|
errout() {
|
||||||
|
>&2 echo "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
trap "{ rm -f ${TMPDIR-:/tmp}/ds-export.*; }" EXIT
|
||||||
|
|
||||||
|
mcurl() {
|
||||||
|
tmpfile1=$(mktemp -t "ds-export.XXXXX")
|
||||||
|
tmpfile2=$(mktemp -t "ds-export.XXXXX")
|
||||||
|
set +e
|
||||||
|
curl -# --fail --stderr "$tmpfile1" -o "$tmpfile2" -H "X-Docspell-Auth: $auth_token" "$@"
|
||||||
|
status=$?
|
||||||
|
set -e
|
||||||
|
if [ $status -ne 0 ]; then
|
||||||
|
errout "curl -H 'X-Docspell-Auth: …' $@"
|
||||||
|
errout "Curl command failed (rc=$status)! Output is below."
|
||||||
|
cat "$tmpfile1" >&2
|
||||||
|
cat "$tmpfile2" >&2
|
||||||
|
rm -f "$tmpfile1" "$tmpfile2"
|
||||||
|
return 2
|
||||||
|
else
|
||||||
|
ret=$(cat "$tmpfile2")
|
||||||
|
rm "$tmpfile2" "$tmpfile1"
|
||||||
|
echo $ret
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
errout "Login to Docspell."
|
||||||
|
errout "Using url: $BASE_URL"
|
||||||
|
if [ -z "$DS_USER" ]; then
|
||||||
|
errout -n "Account: "
|
||||||
|
read DS_USER
|
||||||
|
fi
|
||||||
|
if [ -z "$DS_PASS" ]; then
|
||||||
|
errout -n "Password: "
|
||||||
|
read -s DS_PASS
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
declare auth
|
||||||
|
declare auth_token
|
||||||
|
declare auth_time
|
||||||
|
|
||||||
|
|
||||||
|
login() {
|
||||||
|
auth=$(curl -s --fail -XPOST \
|
||||||
|
--data-binary "{\"account\":\"$DS_USER\", \"password\":\"$DS_PASS\"}" "$LOGIN_URL")
|
||||||
|
|
||||||
|
if [ "$(echo $auth | jq .success)" == "true" ]; then
|
||||||
|
errout "Login successful"
|
||||||
|
auth_token=$(echo $auth | jq -r .token)
|
||||||
|
auth_time=$(date +%s)
|
||||||
|
else
|
||||||
|
errout "Login failed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
checkLogin() {
|
||||||
|
elapsed=$((1000 * ($(date +%s) - $auth_time)))
|
||||||
|
maxtime=$(echo $auth | jq .validMs)
|
||||||
|
|
||||||
|
elapsed=$(($elapsed + 1000))
|
||||||
|
if [ $elapsed -gt $maxtime ]; then
|
||||||
|
errout "Need to re-login $elapsed > $maxtime"
|
||||||
|
login
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
listItems() {
|
||||||
|
OFFSET="${1:-0}"
|
||||||
|
LIMIT="${2:-50}"
|
||||||
|
errout "Get next items with offset=$OFFSET, limit=$LIMIT"
|
||||||
|
REQ="{\"offset\":$OFFSET, \"limit\":$LIMIT, \"tagsInclude\":[],\"tagsExclude\":[],\"tagCategoriesInclude\":[], \"tagCategoriesExclude\":[],\"customValues\":[],\"inbox\":false}"
|
||||||
|
|
||||||
|
mcurl -XPOST -H 'ContentType: application/json' -d "$REQ" "$SEARCH_URL" | jq -r '.groups[].items[]|.id'
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchItemCount() {
|
||||||
|
mcurl -XGET "$INSIGHT_URL" | jq '[.incomingCount, .outgoingCount] | add'
|
||||||
|
}
|
||||||
|
|
||||||
|
fetchItem() {
|
||||||
|
mcurl -XGET "$DETAIL_URL/$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
downloadItem() {
|
||||||
|
checkLogin
|
||||||
|
itemData=$(fetchItem "$1")
|
||||||
|
errout "Get item $(echo $itemData | jq -r .id)"
|
||||||
|
created=$(echo $itemData|jq '.created')
|
||||||
|
created=$((($(echo $itemData|jq '.created') + 500) / 1000))
|
||||||
|
itemId=$(echo $itemData | jq -r '.id')
|
||||||
|
out="$TARGET/$(date -d @$created +%Y-%m)/$itemId"
|
||||||
|
|
||||||
|
mkdir -p "$out"
|
||||||
|
echo $itemData | jq > "$out/metadata.json"
|
||||||
|
|
||||||
|
while read attachId attachName; do
|
||||||
|
errout " - download $attachName ($attachId)"
|
||||||
|
attachOut="$out/$attachName"
|
||||||
|
checkLogin
|
||||||
|
curl --fail -# -o "$attachOut" -H "X-Docspell-Auth: $auth_token" "$ATTACH_URL/$attachId"
|
||||||
|
done < <(echo $itemData | jq -r '.sources[] | [.id,.name] | join(" ")')
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
login
|
||||||
|
|
||||||
|
allCount=$(fetchItemCount)
|
||||||
|
errout "Downloading $allCount items…"
|
||||||
|
|
||||||
|
allCounter=0 innerCounter=0 limit=100 offset=0 done=n
|
||||||
|
|
||||||
|
while [ "$done" = "n" ]; do
|
||||||
|
checkLogin
|
||||||
|
|
||||||
|
innerCounter=0
|
||||||
|
while read id; do
|
||||||
|
downloadItem "$id"
|
||||||
|
innerCounter=$(($innerCounter + 1))
|
||||||
|
done < <(listItems $offset $limit)
|
||||||
|
|
||||||
|
allCounter=$(($allCounter + $innerCounter))
|
||||||
|
offset=$(($offset + $limit))
|
||||||
|
|
||||||
|
|
||||||
|
if [ $innerCounter -lt $limit ]; then
|
||||||
|
done=y
|
||||||
|
fi
|
||||||
|
|
||||||
|
done
|
||||||
|
errout "Downloaded $allCounter/$allCount items"
|
||||||
|
if [[ $allCounter < $allCount ]]; then
|
||||||
|
errout
|
||||||
|
errout " Downloaded less items than were reported as available. This"
|
||||||
|
errout " may be due to items in folders that you cannot see. Or it"
|
||||||
|
errout " may be a bug."
|
||||||
|
errout
|
||||||
|
fi
|
@ -82,14 +82,11 @@ documentation, too.
|
|||||||
|
|
||||||
In order to move to a different tool, it is necessary to get the data
|
In order to move to a different tool, it is necessary to get the data
|
||||||
out of Docspell in a machine readable/automatic way. Currently, there
|
out of Docspell in a machine readable/automatic way. Currently, there
|
||||||
is no *easy way* for this. However, it is possible to get to all data
|
is no *easy way* for this. However, everything can be queried using a
|
||||||
with some scripting effort. Everything can be queried using a
|
[HTTP/REST api](@/docs/api/_index.md) and so it is possible to get to
|
||||||
[HTTP/REST api](@/docs/api/_index.md) and so you can write a
|
all data with some scripting effort. There exists a script in the
|
||||||
script/program that, for example, queries all items and downloads the
|
`tools/` folder that at least can go and download all files that have
|
||||||
files (something like this might be provided soon, for now there are
|
been uploaded to docspell.
|
||||||
starting points in the `/tools` folder). It is planned to provide a
|
|
||||||
more convenient way to export the data into the file system. But there
|
|
||||||
is no ETA for this.
|
|
||||||
|
|
||||||
My recommendation is to run periodic database backups and also store
|
My recommendation is to run periodic database backups and also store
|
||||||
the binaries/docker images. This lets you re-create the current state
|
the binaries/docker images. This lets you re-create the current state
|
||||||
|
33
website/site/content/docs/tools/export-files.md
Normal file
33
website/site/content/docs/tools/export-files.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
+++
|
||||||
|
title = "Export Files"
|
||||||
|
description = "Downloads all files from docspell."
|
||||||
|
weight = 65
|
||||||
|
+++
|
||||||
|
|
||||||
|
# export-files.sh
|
||||||
|
|
||||||
|
This script can be used to download all files from docspell that have
|
||||||
|
been uploaded before.
|
||||||
|
|
||||||
|
# Requirements
|
||||||
|
|
||||||
|
It is a bash script that additionally needs
|
||||||
|
[curl](https://curl.haxx.se/) and
|
||||||
|
[jq](https://stedolan.github.io/jq/).
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
./export-files.sh <docspell-base-url> <target-directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
For example, if docspell is at `http://localhost:7880`:
|
||||||
|
|
||||||
|
```
|
||||||
|
./export-files.sh http://localhost:7880 /tmp/ds-downloads
|
||||||
|
```
|
||||||
|
|
||||||
|
The script asks for your account name and password. It then logs in
|
||||||
|
and goes through all items downloading the metadata as json and the
|
||||||
|
attachments. It will fetch the original files (not the converted
|
||||||
|
ones).
|
Reference in New Issue
Block a user