mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-25 16:45:05 +00:00
Add route to check for files by their checksum
Adopt scripts in `tools/` to check for existing files using these routes.
This commit is contained in:
parent
d05e919eb4
commit
eb6c483ef0
@ -1,6 +1,7 @@
|
||||
package docspell.backend.ops
|
||||
|
||||
import fs2.Stream
|
||||
import cats.data.OptionT
|
||||
import cats.implicits._
|
||||
import cats.effect.{Effect, Resource}
|
||||
import doobie._
|
||||
@ -11,6 +12,7 @@ import OItem.{AttachmentData, ItemData, ListItem, Query}
|
||||
import bitpeace.{FileMeta, RangeDef}
|
||||
import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp}
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem, RTagItem}
|
||||
import docspell.store.records.RSource
|
||||
|
||||
trait OItem[F[_]] {
|
||||
|
||||
@ -47,6 +49,11 @@ trait OItem[F[_]] {
|
||||
def delete(itemId: Ident, collective: Ident): F[Int]
|
||||
|
||||
def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]]
|
||||
|
||||
def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]]
|
||||
|
||||
def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]]
|
||||
|
||||
}
|
||||
|
||||
object OItem {
|
||||
@ -163,5 +170,15 @@ object OItem {
|
||||
|
||||
def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] =
|
||||
store.transact(QAttachment.getAttachmentMeta(id, collective))
|
||||
|
||||
def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]] =
|
||||
store.transact(QItem.findByChecksum(checksum, collective))
|
||||
|
||||
def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] =
|
||||
store.transact((for {
|
||||
coll <- OptionT(RSource.findCollective(sourceId))
|
||||
items <- OptionT.liftF(QItem.findByChecksum(checksum, coll))
|
||||
} yield items).getOrElse(Vector.empty))
|
||||
|
||||
})
|
||||
}
|
||||
|
@ -47,6 +47,26 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/AuthResult"
|
||||
/open/checkfile/{id}/{checksum}:
|
||||
get:
|
||||
tags: [ Upload ]
|
||||
summary: Check if a file is in docspell.
|
||||
description: |
|
||||
Checks if a file with the given SHA-256 checksum is in
|
||||
docspell. The id is a *source id* configured by a collective.
|
||||
|
||||
The result shows all items that contains a file with the given
|
||||
checksum.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/id"
|
||||
- $ref: "#/components/parameters/checksum"
|
||||
responses:
|
||||
200:
|
||||
description: Ok
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/CheckFileResult"
|
||||
/open/upload/item/{id}:
|
||||
post:
|
||||
tags: [ Upload ]
|
||||
@ -95,6 +115,25 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/BasicResult"
|
||||
/sec/checkfile/{checksum}:
|
||||
get:
|
||||
tags: [ Upload ]
|
||||
summary: Check if a file is in docspell.
|
||||
description: |
|
||||
Checks if a file with the given SHA-256 checksum is in
|
||||
docspell.
|
||||
|
||||
The result shows all items that contains a file with the given
|
||||
checksum.
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/checksum"
|
||||
responses:
|
||||
200:
|
||||
description: Ok
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/CheckFileResult"
|
||||
/sec/upload:
|
||||
post:
|
||||
tags: [ Upload ]
|
||||
@ -1169,6 +1208,46 @@ paths:
|
||||
$ref: "#/components/schemas/BasicResult"
|
||||
components:
|
||||
schemas:
|
||||
CheckFileResult:
|
||||
description: |
|
||||
Results when searching for file checksums.
|
||||
required:
|
||||
- exists
|
||||
- items
|
||||
properties:
|
||||
exists:
|
||||
type: boolean
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/BasicItem"
|
||||
BasicItem:
|
||||
description: |
|
||||
Basic properties about an item.
|
||||
required:
|
||||
- id
|
||||
- name
|
||||
- direction
|
||||
- state
|
||||
- created
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
format: ident
|
||||
name:
|
||||
type: string
|
||||
direction:
|
||||
type: string
|
||||
format: direction
|
||||
state:
|
||||
type: string
|
||||
format: itemstate
|
||||
created:
|
||||
type: integer
|
||||
format: date-time
|
||||
itemDate:
|
||||
type: integer
|
||||
format: date-time
|
||||
GenInvite:
|
||||
description: |
|
||||
A request to generate a new invitation key.
|
||||
@ -2083,3 +2162,10 @@ components:
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
checksum:
|
||||
name: checksum
|
||||
in: path
|
||||
description: A SHA-256 checksum
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -31,7 +31,7 @@ object RestServer {
|
||||
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
|
||||
securedRoutes(cfg, restApp, token)
|
||||
},
|
||||
"/api/doc" -> templates.doc,
|
||||
"/api/doc" -> templates.doc,
|
||||
"/app/assets" -> WebjarRoutes.appRoutes[F](blocker),
|
||||
"/app" -> templates.app
|
||||
).orNotFound
|
||||
@ -68,13 +68,15 @@ object RestServer {
|
||||
"queue" -> JobQueueRoutes(restApp.backend, token),
|
||||
"item" -> ItemRoutes(restApp.backend, token),
|
||||
"attachment" -> AttachmentRoutes(restApp.backend, token),
|
||||
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token)
|
||||
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
|
||||
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token)
|
||||
)
|
||||
|
||||
def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
|
||||
Router(
|
||||
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
|
||||
"signup" -> RegisterRoutes(restApp.backend, cfg),
|
||||
"upload" -> UploadRoutes.open(restApp.backend, cfg)
|
||||
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
|
||||
"signup" -> RegisterRoutes(restApp.backend, cfg),
|
||||
"upload" -> UploadRoutes.open(restApp.backend, cfg),
|
||||
"checkfile" -> CheckFileRoutes.open(restApp.backend)
|
||||
)
|
||||
}
|
||||
|
@ -0,0 +1,50 @@
|
||||
package docspell.restserver.routes
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.common.Ident
|
||||
import docspell.restapi.model.{BasicItem, CheckFileResult}
|
||||
import docspell.restserver.http4s.ResponseGenerator
|
||||
import org.http4s.HttpRoutes
|
||||
import org.http4s.circe.CirceEntityEncoder._
|
||||
import org.http4s.dsl.Http4sDsl
|
||||
import docspell.store.records.RItem
|
||||
|
||||
object CheckFileRoutes {
|
||||
|
||||
def secured[F[_]: Effect](backend: BackendApp[F], user: AuthToken): HttpRoutes[F] = {
|
||||
val dsl = new Http4sDsl[F] with ResponseGenerator[F] {}
|
||||
import dsl._
|
||||
|
||||
HttpRoutes.of {
|
||||
case GET -> Root / checksum =>
|
||||
for {
|
||||
items <- backend.item.findByFileCollective(checksum, user.account.collective)
|
||||
resp <- Ok(convert(items))
|
||||
} yield resp
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
def open[F[_]: Effect](backend: BackendApp[F]): HttpRoutes[F] = {
|
||||
val dsl = new Http4sDsl[F] with ResponseGenerator[F] {}
|
||||
import dsl._
|
||||
|
||||
HttpRoutes.of {
|
||||
case GET -> Root / Ident(id) / checksum =>
|
||||
for {
|
||||
items <- backend.item.findByFileSource(checksum, id)
|
||||
resp <- Ok(convert(items))
|
||||
} yield resp
|
||||
}
|
||||
}
|
||||
|
||||
private def convert(v: Vector[RItem]): CheckFileResult =
|
||||
CheckFileResult(
|
||||
v.nonEmpty,
|
||||
v.map(r => BasicItem(r.id, r.name, r.direction, r.state, r.created, r.itemDate)).toList
|
||||
)
|
||||
|
||||
}
|
@ -5,6 +5,7 @@ import cats.implicits._
|
||||
import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.common.{Ident, Priority}
|
||||
import docspell.restapi.model.BasicResult
|
||||
import docspell.restserver.Config
|
||||
import docspell.restserver.conv.Conversions._
|
||||
import docspell.restserver.http4s.ResponseGenerator
|
||||
@ -36,6 +37,9 @@ object UploadRoutes {
|
||||
res <- Ok(basicResult(result))
|
||||
} yield res
|
||||
|
||||
case GET -> Root / "checkfile" / checksum =>
|
||||
Ok(BasicResult(false, s"not implemented $checksum"))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,6 +55,9 @@ object UploadRoutes {
|
||||
result <- backend.upload.submit(updata, id)
|
||||
res <- Ok(basicResult(result))
|
||||
} yield res
|
||||
|
||||
case GET -> Root / "checkfile" / Ident(id) / checksum =>
|
||||
Ok(BasicResult(false, s"not implemented $id $checksum"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -239,6 +239,19 @@ object QItem {
|
||||
q.query[RItem].to[Vector]
|
||||
}
|
||||
|
||||
def findByChecksum(checksum: String, collective: Ident): ConnectionIO[Vector[RItem]] = {
|
||||
val IC = RItem.Columns.all.map(_.prefix("i"))
|
||||
val aItem = RAttachment.Columns.itemId.prefix("a")
|
||||
val iId = RItem.Columns.id.prefix("i")
|
||||
val iColl = RItem.Columns.cid.prefix("i")
|
||||
|
||||
val from = RItem.table ++ fr"i INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ aItem.is(iId) ++
|
||||
fr"INNER JOIN filemeta m ON m.id = a.filemetaid"
|
||||
selectSimple(IC, from, and(fr"m.checksum = $checksum", iColl.is(collective)))
|
||||
.query[RItem]
|
||||
.to[Vector]
|
||||
}
|
||||
|
||||
private def queryWildcard(value: String): String = {
|
||||
def prefix(n: String) =
|
||||
if (n.startsWith("*")) s"%${n.substring(1)}"
|
||||
|
@ -20,8 +20,8 @@ if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OPTIONS=om:hdp:v
|
||||
LONGOPTS=once,memorize:,help,delete,path:,verbose
|
||||
OPTIONS=omhdp:v
|
||||
LONGOPTS=once,distinct,help,delete,path:,verbose
|
||||
|
||||
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
|
||||
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
|
||||
@ -34,7 +34,7 @@ fi
|
||||
eval set -- "$PARSED"
|
||||
|
||||
declare -a watchdir
|
||||
help=n verbose=n delete=n once=n memodir=
|
||||
help=n verbose=n delete=n once=n distinct=n
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
@ -57,9 +57,9 @@ while true; do
|
||||
watchdir+=("$2")
|
||||
shift 2
|
||||
;;
|
||||
-m|--memorize)
|
||||
memodir="$2"
|
||||
shift 2
|
||||
-m|--distinct)
|
||||
distinct=y
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
@ -72,6 +72,36 @@ while true; do
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
showUsage() {
|
||||
echo "Upload files in a directory"
|
||||
echo ""
|
||||
echo "Usage: $0 [options] url url ..."
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -v | --verbose Print more to stdout. (value: $verbose)"
|
||||
echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)"
|
||||
echo " -p | --path <dir> The directories to watch. This is required. (value: ${watchdir[@]})"
|
||||
echo " -h | --help Prints this help text. (value: $help)"
|
||||
echo " -m | --distinct Optional. Upload only if the file doesn't already exist. (value: $distinct)"
|
||||
echo " -o | --once Instead of watching, upload all (pdf) files in that dir. (value: $once)"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " A list of URLs to upload the files to."
|
||||
echo ""
|
||||
echo "Example: Watch directory"
|
||||
echo "$0 --path ~/Downloads -m -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
||||
echo ""
|
||||
echo "Example: Upload all files in a directory"
|
||||
echo "$0 --path ~/Downloads -m -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
||||
echo ""
|
||||
}
|
||||
|
||||
if [ "$help" = "y" ]; then
|
||||
showUsage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# handle non-option arguments
|
||||
if [[ $# -eq 0 ]]; then
|
||||
echo "$0: No upload URLs given."
|
||||
@ -117,15 +147,22 @@ checksum() {
|
||||
$SHA256_CMD "$1" | cut -d' ' -f1 | xargs
|
||||
}
|
||||
|
||||
checkFile() {
|
||||
local url=$(echo "$1" | sed 's,upload/item,checkfile,g')
|
||||
local file="$2"
|
||||
trace "Check file: $url/$(checksum "$file")"
|
||||
$CURL_CMD -XGET -s "$url/$(checksum "$file")" | (2>&1 1>/dev/null grep '"exists":true')
|
||||
}
|
||||
|
||||
process() {
|
||||
file="$1"
|
||||
info "---- Processing $file ----------"
|
||||
declare -i curlrc=0
|
||||
set +e
|
||||
for url in $urls; do
|
||||
if [ -n "$memodir" ] && [ -f "$memodir/.docspell-consume" ]; then
|
||||
if [ "$distinct" = "y" ]; then
|
||||
trace "- Checking if $file has been uploaded to $url already"
|
||||
cat "$memodir/.docspell-consume" | grep "$url" | (2>&1 1>/dev/null grep "$(checksum "$file")")
|
||||
checkFile "$url" "$file"
|
||||
if [ $? -eq 0 ]; then
|
||||
info "- Skipping file '$file' because it has been uploaded in the past."
|
||||
continue
|
||||
@ -137,13 +174,6 @@ process() {
|
||||
curlrc=$(expr $curlrc + $rc)
|
||||
if [ $rc -ne 0 ]; then
|
||||
trace "Upload to '$url' failed!"
|
||||
else
|
||||
if [ -n "$memodir" ]; then
|
||||
trace "- Adding file '$file' to list of uploaded files for '$url'"
|
||||
set +C
|
||||
echo "$(checksum "$file") : $url" >> "$memodir/.docspell-consume"
|
||||
set -C
|
||||
fi
|
||||
fi
|
||||
done
|
||||
set -e
|
||||
@ -163,37 +193,6 @@ process() {
|
||||
fi
|
||||
}
|
||||
|
||||
showUsage() {
|
||||
echo "Upload files in a directory"
|
||||
echo ""
|
||||
echo "Usage: $0 [options] url url ..."
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -v | --verbose Print more to stdout. (value: $verbose)"
|
||||
echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)"
|
||||
echo " -p | --path <dir> The directories to watch. This is required. (value: ${watchdir[@]})"
|
||||
echo " -h | --help Prints this help text. (value: $help)"
|
||||
echo " -m | --memorize <dir> Optional directory (writable) to store checksums of"
|
||||
echo " uploaded files. This is used to skip duplicates. (value: $memodir)"
|
||||
echo " -o | --once Instead of watching, upload all (pdf) files in that dir. (value: $once)"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " A list of URLs to upload the files to."
|
||||
echo ""
|
||||
echo "Example: Watch directory"
|
||||
echo "$0 --path ~/Downloads -m ~/ -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
||||
echo ""
|
||||
echo "Example: Upload all files in a directory"
|
||||
echo "$0 --path ~/Downloads -m ~/ -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345"
|
||||
echo ""
|
||||
}
|
||||
|
||||
|
||||
if [ "$help" = "y" ]; then
|
||||
showUsage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$once" = "y" ]; then
|
||||
info "Uploading all files in '$watchdir'."
|
||||
for dir in "${watchdir[@]}"; do
|
||||
|
25
tools/ds.sh
25
tools/ds.sh
@ -23,6 +23,7 @@ CURL_CMD="curl"
|
||||
FILE_CMD="file"
|
||||
GREP_CMD="grep"
|
||||
MKTEMP_CMD="mktemp"
|
||||
SHA256_CMD="sha256sum"
|
||||
|
||||
! getopt --test > /dev/null
|
||||
if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
|
||||
@ -78,7 +79,17 @@ info() {
|
||||
echo "$1"
|
||||
}
|
||||
|
||||
upload() {
|
||||
checksum() {
|
||||
$SHA256_CMD "$1" | cut -d' ' -f1 | xargs
|
||||
}
|
||||
|
||||
checkFile() {
|
||||
local url=$(echo "$1" | sed 's,upload/item,checkfile,g')
|
||||
local file="$2"
|
||||
$CURL_CMD -XGET -s "$url/$(checksum "$file")" | (2>&1 1>/dev/null grep '"exists":true')
|
||||
}
|
||||
|
||||
upload_file() {
|
||||
tf=$($MKTEMP_CMD) rc=0
|
||||
$CURL_CMD -# -o "$tf" --stderr "$tf" -w "%{http_code}" -XPOST -F file=@"$1" "$2" | (2>&1 1>/dev/null grep 200)
|
||||
rc=$(expr $rc + $?)
|
||||
@ -96,6 +107,16 @@ upload() {
|
||||
fi
|
||||
}
|
||||
|
||||
upload() {
|
||||
checkFile "$2" "$1"
|
||||
if [ $? -eq 0 ]; then
|
||||
info "File already exists at url $2"
|
||||
return 0
|
||||
else
|
||||
upload_file "$1" "$2"
|
||||
fi
|
||||
}
|
||||
|
||||
showUsage() {
|
||||
info "Upload files to docspell"
|
||||
info ""
|
||||
@ -162,7 +183,7 @@ for file in $*; do
|
||||
set +e
|
||||
upload "$file" "$url"
|
||||
set -e
|
||||
if [ "$delete" = "y" ] && [ $rc -eq 0 ]; then
|
||||
if [ "$delete" = "y" ] && [ $? -eq 0 ]; then
|
||||
info "Deleting file: $file"
|
||||
rm -f "$file"
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user