diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index 72dd7e16..91194ed7 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -1,6 +1,7 @@ package docspell.backend.ops import fs2.Stream +import cats.data.OptionT import cats.implicits._ import cats.effect.{Effect, Resource} import doobie._ @@ -11,6 +12,7 @@ import OItem.{AttachmentData, ItemData, ListItem, Query} import bitpeace.{FileMeta, RangeDef} import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp} import docspell.store.records.{RAttachment, RAttachmentMeta, RItem, RTagItem} +import docspell.store.records.RSource trait OItem[F[_]] { @@ -47,6 +49,11 @@ trait OItem[F[_]] { def delete(itemId: Ident, collective: Ident): F[Int] def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] + + def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]] + + def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] + } object OItem { @@ -163,5 +170,15 @@ object OItem { def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] = store.transact(QAttachment.getAttachmentMeta(id, collective)) + + def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]] = + store.transact(QItem.findByChecksum(checksum, collective)) + + def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] = + store.transact((for { + coll <- OptionT(RSource.findCollective(sourceId)) + items <- OptionT.liftF(QItem.findByChecksum(checksum, coll)) + } yield items).getOrElse(Vector.empty)) + }) } diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index cc1a7efa..563e5687 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -47,6 +47,26 @@ paths: application/json: schema: $ref: "#/components/schemas/AuthResult" + /open/checkfile/{id}/{checksum}: + get: + tags: [ Upload ] + summary: Check if a file is in docspell. + description: | + Checks if a file with the given SHA-256 checksum is in + docspell. The id is a *source id* configured by a collective. + + The result shows all items that contains a file with the given + checksum. + parameters: + - $ref: "#/components/parameters/id" + - $ref: "#/components/parameters/checksum" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/CheckFileResult" /open/upload/item/{id}: post: tags: [ Upload ] @@ -95,6 +115,25 @@ paths: application/json: schema: $ref: "#/components/schemas/BasicResult" + /sec/checkfile/{checksum}: + get: + tags: [ Upload ] + summary: Check if a file is in docspell. + description: | + Checks if a file with the given SHA-256 checksum is in + docspell. + + The result shows all items that contains a file with the given + checksum. + parameters: + - $ref: "#/components/parameters/checksum" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/CheckFileResult" /sec/upload: post: tags: [ Upload ] @@ -1169,6 +1208,46 @@ paths: $ref: "#/components/schemas/BasicResult" components: schemas: + CheckFileResult: + description: | + Results when searching for file checksums. + required: + - exists + - items + properties: + exists: + type: boolean + items: + type: array + items: + $ref: "#/components/schemas/BasicItem" + BasicItem: + description: | + Basic properties about an item. + required: + - id + - name + - direction + - state + - created + properties: + id: + type: string + format: ident + name: + type: string + direction: + type: string + format: direction + state: + type: string + format: itemstate + created: + type: integer + format: date-time + itemDate: + type: integer + format: date-time GenInvite: description: | A request to generate a new invitation key. @@ -2083,3 +2162,10 @@ components: required: false schema: type: boolean + checksum: + name: checksum + in: path + description: A SHA-256 checksum + required: true + schema: + type: string diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala index ae5b254d..c88dfa9b 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala @@ -31,7 +31,7 @@ object RestServer { "/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token => securedRoutes(cfg, restApp, token) }, - "/api/doc" -> templates.doc, + "/api/doc" -> templates.doc, "/app/assets" -> WebjarRoutes.appRoutes[F](blocker), "/app" -> templates.app ).orNotFound @@ -68,13 +68,15 @@ object RestServer { "queue" -> JobQueueRoutes(restApp.backend, token), "item" -> ItemRoutes(restApp.backend, token), "attachment" -> AttachmentRoutes(restApp.backend, token), - "upload" -> UploadRoutes.secured(restApp.backend, cfg, token) + "upload" -> UploadRoutes.secured(restApp.backend, cfg, token), + "checkfile" -> CheckFileRoutes.secured(restApp.backend, token) ) def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] = Router( - "auth" -> LoginRoutes.login(restApp.backend.login, cfg), - "signup" -> RegisterRoutes(restApp.backend, cfg), - "upload" -> UploadRoutes.open(restApp.backend, cfg) + "auth" -> LoginRoutes.login(restApp.backend.login, cfg), + "signup" -> RegisterRoutes(restApp.backend, cfg), + "upload" -> UploadRoutes.open(restApp.backend, cfg), + "checkfile" -> CheckFileRoutes.open(restApp.backend) ) } diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/CheckFileRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/CheckFileRoutes.scala new file mode 100644 index 00000000..fb6cf7b2 --- /dev/null +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/CheckFileRoutes.scala @@ -0,0 +1,50 @@ +package docspell.restserver.routes + +import cats.effect._ +import cats.implicits._ +import docspell.backend.BackendApp +import docspell.backend.auth.AuthToken +import docspell.common.Ident +import docspell.restapi.model.{BasicItem, CheckFileResult} +import docspell.restserver.http4s.ResponseGenerator +import org.http4s.HttpRoutes +import org.http4s.circe.CirceEntityEncoder._ +import org.http4s.dsl.Http4sDsl +import docspell.store.records.RItem + +object CheckFileRoutes { + + def secured[F[_]: Effect](backend: BackendApp[F], user: AuthToken): HttpRoutes[F] = { + val dsl = new Http4sDsl[F] with ResponseGenerator[F] {} + import dsl._ + + HttpRoutes.of { + case GET -> Root / checksum => + for { + items <- backend.item.findByFileCollective(checksum, user.account.collective) + resp <- Ok(convert(items)) + } yield resp + + } + } + + def open[F[_]: Effect](backend: BackendApp[F]): HttpRoutes[F] = { + val dsl = new Http4sDsl[F] with ResponseGenerator[F] {} + import dsl._ + + HttpRoutes.of { + case GET -> Root / Ident(id) / checksum => + for { + items <- backend.item.findByFileSource(checksum, id) + resp <- Ok(convert(items)) + } yield resp + } + } + + private def convert(v: Vector[RItem]): CheckFileResult = + CheckFileResult( + v.nonEmpty, + v.map(r => BasicItem(r.id, r.name, r.direction, r.state, r.created, r.itemDate)).toList + ) + +} diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/UploadRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/UploadRoutes.scala index c8972a3a..99d0f483 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/UploadRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/UploadRoutes.scala @@ -5,6 +5,7 @@ import cats.implicits._ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.common.{Ident, Priority} +import docspell.restapi.model.BasicResult import docspell.restserver.Config import docspell.restserver.conv.Conversions._ import docspell.restserver.http4s.ResponseGenerator @@ -36,6 +37,9 @@ object UploadRoutes { res <- Ok(basicResult(result)) } yield res + case GET -> Root / "checkfile" / checksum => + Ok(BasicResult(false, s"not implemented $checksum")) + } } @@ -51,6 +55,9 @@ object UploadRoutes { result <- backend.upload.submit(updata, id) res <- Ok(basicResult(result)) } yield res + + case GET -> Root / "checkfile" / Ident(id) / checksum => + Ok(BasicResult(false, s"not implemented $id $checksum")) } } } diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 35d4dc5e..d5a80ab1 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -239,6 +239,19 @@ object QItem { q.query[RItem].to[Vector] } + def findByChecksum(checksum: String, collective: Ident): ConnectionIO[Vector[RItem]] = { + val IC = RItem.Columns.all.map(_.prefix("i")) + val aItem = RAttachment.Columns.itemId.prefix("a") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val from = RItem.table ++ fr"i INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ aItem.is(iId) ++ + fr"INNER JOIN filemeta m ON m.id = a.filemetaid" + selectSimple(IC, from, and(fr"m.checksum = $checksum", iColl.is(collective))) + .query[RItem] + .to[Vector] + } + private def queryWildcard(value: String): String = { def prefix(n: String) = if (n.startsWith("*")) s"%${n.substring(1)}" diff --git a/tools/consumedir.sh b/tools/consumedir.sh index c207da9c..c3cd0d5b 100755 --- a/tools/consumedir.sh +++ b/tools/consumedir.sh @@ -20,8 +20,8 @@ if [[ ${PIPESTATUS[0]} -ne 4 ]]; then exit 1 fi -OPTIONS=om:hdp:v -LONGOPTS=once,memorize:,help,delete,path:,verbose +OPTIONS=omhdp:v +LONGOPTS=once,distinct,help,delete,path:,verbose ! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@") if [[ ${PIPESTATUS[0]} -ne 0 ]]; then @@ -34,7 +34,7 @@ fi eval set -- "$PARSED" declare -a watchdir -help=n verbose=n delete=n once=n memodir= +help=n verbose=n delete=n once=n distinct=n while true; do case "$1" in -h|--help) @@ -57,9 +57,9 @@ while true; do watchdir+=("$2") shift 2 ;; - -m|--memorize) - memodir="$2" - shift 2 + -m|--distinct) + distinct=y + shift ;; --) shift @@ -72,6 +72,36 @@ while true; do esac done + +showUsage() { + echo "Upload files in a directory" + echo "" + echo "Usage: $0 [options] url url ..." + echo + echo "Options:" + echo " -v | --verbose Print more to stdout. (value: $verbose)" + echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)" + echo " -p | --path The directories to watch. This is required. (value: ${watchdir[@]})" + echo " -h | --help Prints this help text. (value: $help)" + echo " -m | --distinct Optional. Upload only if the file doesn't already exist. (value: $distinct)" + echo " -o | --once Instead of watching, upload all (pdf) files in that dir. (value: $once)" + echo "" + echo "Arguments:" + echo " A list of URLs to upload the files to." + echo "" + echo "Example: Watch directory" + echo "$0 --path ~/Downloads -m -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" + echo "" + echo "Example: Upload all files in a directory" + echo "$0 --path ~/Downloads -m -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" + echo "" +} + +if [ "$help" = "y" ]; then + showUsage + exit 0 +fi + # handle non-option arguments if [[ $# -eq 0 ]]; then echo "$0: No upload URLs given." @@ -117,15 +147,22 @@ checksum() { $SHA256_CMD "$1" | cut -d' ' -f1 | xargs } +checkFile() { + local url=$(echo "$1" | sed 's,upload/item,checkfile,g') + local file="$2" + trace "Check file: $url/$(checksum "$file")" + $CURL_CMD -XGET -s "$url/$(checksum "$file")" | (2>&1 1>/dev/null grep '"exists":true') +} + process() { file="$1" info "---- Processing $file ----------" declare -i curlrc=0 set +e for url in $urls; do - if [ -n "$memodir" ] && [ -f "$memodir/.docspell-consume" ]; then + if [ "$distinct" = "y" ]; then trace "- Checking if $file has been uploaded to $url already" - cat "$memodir/.docspell-consume" | grep "$url" | (2>&1 1>/dev/null grep "$(checksum "$file")") + checkFile "$url" "$file" if [ $? -eq 0 ]; then info "- Skipping file '$file' because it has been uploaded in the past." continue @@ -137,13 +174,6 @@ process() { curlrc=$(expr $curlrc + $rc) if [ $rc -ne 0 ]; then trace "Upload to '$url' failed!" - else - if [ -n "$memodir" ]; then - trace "- Adding file '$file' to list of uploaded files for '$url'" - set +C - echo "$(checksum "$file") : $url" >> "$memodir/.docspell-consume" - set -C - fi fi done set -e @@ -163,37 +193,6 @@ process() { fi } -showUsage() { - echo "Upload files in a directory" - echo "" - echo "Usage: $0 [options] url url ..." - echo - echo "Options:" - echo " -v | --verbose Print more to stdout. (value: $verbose)" - echo " -d | --delete Delete the file if successfully uploaded. (value: $delete)" - echo " -p | --path The directories to watch. This is required. (value: ${watchdir[@]})" - echo " -h | --help Prints this help text. (value: $help)" - echo " -m | --memorize Optional directory (writable) to store checksums of" - echo " uploaded files. This is used to skip duplicates. (value: $memodir)" - echo " -o | --once Instead of watching, upload all (pdf) files in that dir. (value: $once)" - echo "" - echo "Arguments:" - echo " A list of URLs to upload the files to." - echo "" - echo "Example: Watch directory" - echo "$0 --path ~/Downloads -m ~/ -dv http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" - echo "" - echo "Example: Upload all files in a directory" - echo "$0 --path ~/Downloads -m ~/ -dv --once http://localhost:7880/api/v1/open/upload/item/abcde-12345-abcde-12345" - echo "" -} - - -if [ "$help" = "y" ]; then - showUsage - exit 0 -fi - if [ "$once" = "y" ]; then info "Uploading all files in '$watchdir'." for dir in "${watchdir[@]}"; do diff --git a/tools/ds.sh b/tools/ds.sh index f6e7b77b..de97158f 100755 --- a/tools/ds.sh +++ b/tools/ds.sh @@ -23,6 +23,7 @@ CURL_CMD="curl" FILE_CMD="file" GREP_CMD="grep" MKTEMP_CMD="mktemp" +SHA256_CMD="sha256sum" ! getopt --test > /dev/null if [[ ${PIPESTATUS[0]} -ne 4 ]]; then @@ -78,7 +79,17 @@ info() { echo "$1" } -upload() { +checksum() { + $SHA256_CMD "$1" | cut -d' ' -f1 | xargs +} + +checkFile() { + local url=$(echo "$1" | sed 's,upload/item,checkfile,g') + local file="$2" + $CURL_CMD -XGET -s "$url/$(checksum "$file")" | (2>&1 1>/dev/null grep '"exists":true') +} + +upload_file() { tf=$($MKTEMP_CMD) rc=0 $CURL_CMD -# -o "$tf" --stderr "$tf" -w "%{http_code}" -XPOST -F file=@"$1" "$2" | (2>&1 1>/dev/null grep 200) rc=$(expr $rc + $?) @@ -96,6 +107,16 @@ upload() { fi } +upload() { + checkFile "$2" "$1" + if [ $? -eq 0 ]; then + info "File already exists at url $2" + return 0 + else + upload_file "$1" "$2" + fi +} + showUsage() { info "Upload files to docspell" info "" @@ -162,7 +183,7 @@ for file in $*; do set +e upload "$file" "$url" set -e - if [ "$delete" = "y" ] && [ $rc -eq 0 ]; then + if [ "$delete" = "y" ] && [ $? -eq 0 ]; then info "Deleting file: $file" rm -f "$file" fi