From dd89e05cc2c79f3eb5c0e3e0934ed71e6e764cce Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 26 Oct 2020 19:48:29 +0100 Subject: [PATCH 1/3] Convert exceptions when converting to pdf into an error result The file processing tries pdf conversion once and keeps going if it fails. Some errors (e.g. timeouts) are raised via an exception. Issue: #387 --- .../main/scala/docspell/convert/extern/ExternConv.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala index dcb02206..c0212f09 100644 --- a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala +++ b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala @@ -58,6 +58,13 @@ private[extern] object ExternConv { } .compile .lastOrError + .attempt + .flatMap { + case Right(v) => + v.pure[F] + case Left(ex) => + handler.run(ConversionResult.failure(ex)) + } def readResult[F[_]: Sync: ContextShift]( blocker: Blocker, From b59696a9d3fdfaa82e2a5ea5401006774f105f87 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 26 Oct 2020 22:02:31 +0100 Subject: [PATCH 2/3] Make sure to only remove/retry items in premature states --- .../docspell/joex/process/CreateItem.scala | 3 ++- .../docspell/joex/process/ItemHandler.scala | 7 +++-- .../scala/docspell/store/queries/QItem.scala | 27 ++++++++++++------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index f7dc0ada..92d275fa 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -121,9 +121,10 @@ object CreateItem { private def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] = Task { ctx => + val states = ItemState.invalidStates.toList.toSet val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet for { - cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq)) + cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states)) _ <- if (cand.nonEmpty) ctx.logger.warn(s"Found ${cand.size} existing item with these files.") diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index a5ef178b..757493d6 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -103,10 +103,13 @@ object ItemHandler { ) } - def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, Args, Unit] = + private def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, Args, Unit] = Task { ctx => + val states = ItemState.invalidStates.toList.toSet for { - items <- ctx.store.transact(QItem.findByFileIds(ctx.args.files.map(_.fileMetaId))) + items <- ctx.store.transact( + QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states) + ) _ <- if (items.nonEmpty) ctx.logger.info(s"Deleting items ${items.map(_.id.id)}") else diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 1a66d42e..a5311500 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -493,13 +493,15 @@ object QItem { private def findByFileIdsQuery( fileMetaIds: NonEmptyList[Ident], - limit: Option[Int] + limit: Option[Int], + states: Set[ItemState] ): Fragment = { val IC = RItem.Columns.all.map(_.prefix("i")) val aItem = RAttachment.Columns.itemId.prefix("a") val aId = RAttachment.Columns.id.prefix("a") val aFileId = RAttachment.Columns.fileId.prefix("a") val iId = RItem.Columns.id.prefix("i") + val iState = RItem.Columns.state.prefix("i") val sId = RAttachmentSource.Columns.id.prefix("s") val sFileId = RAttachmentSource.Columns.fileId.prefix("s") val rId = RAttachmentArchive.Columns.id.prefix("r") @@ -516,11 +518,15 @@ object QItem { fr"LEFT OUTER JOIN" ++ RAttachmentArchive.table ++ fr"r ON" ++ aId.is(rId) ++ fr"LEFT OUTER JOIN" ++ RFileMeta.table ++ fr"m3 ON" ++ m3Id.is(rFileId) - val q = selectSimple( - IC, - from, - and(or(m1Id.isIn(fileMetaIds), m2Id.isIn(fileMetaIds), m3Id.isIn(fileMetaIds))) - ) + val fileCond = + or(m1Id.isIn(fileMetaIds), m2Id.isIn(fileMetaIds), m3Id.isIn(fileMetaIds)) + val cond = NonEmptyList.fromList(states.toList) match { + case Some(nel) => + and(fileCond, iState.isIn(nel)) + case None => + fileCond + } + val q = selectSimple(IC, from, cond) limit match { case Some(n) => q ++ fr"LIMIT $n" @@ -531,15 +537,18 @@ object QItem { def findOneByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Option[RItem]] = NonEmptyList.fromList(fileMetaIds.toList) match { case Some(nel) => - findByFileIdsQuery(nel, Some(1)).query[RItem].option + findByFileIdsQuery(nel, Some(1), Set.empty).query[RItem].option case None => (None: Option[RItem]).pure[ConnectionIO] } - def findByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Vector[RItem]] = + def findByFileIds( + fileMetaIds: Seq[Ident], + states: Set[ItemState] + ): ConnectionIO[Vector[RItem]] = NonEmptyList.fromList(fileMetaIds.toList) match { case Some(nel) => - findByFileIdsQuery(nel, None).query[RItem].to[Vector] + findByFileIdsQuery(nel, None, states).query[RItem].to[Vector] case None => Vector.empty[RItem].pure[ConnectionIO] } From ab1139523aa929f8de1d041edcc415b91025986f Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 26 Oct 2020 22:08:20 +0100 Subject: [PATCH 3/3] Let the convert-all task retry when pdf conversion fails --- .../joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala index 07cc7c36..5069f0ec 100644 --- a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala @@ -110,7 +110,7 @@ object PdfConvTask { ctx.logger.warn(s"Unable to convert '${mime}' file ${ctx.args}: $reason") case ConversionResult.Failure(ex) => - ctx.logger.error(ex)(s"Failure converting file ${ctx.args}: ${ex.getMessage}") + Sync[F].raiseError(ex) }) def ocrMyPdf(lang: Language): F[Unit] =