Merge pull request #1443 from eikek/file-backends

File backends
This commit is contained in:
mergify[bot] 2022-03-13 14:44:32 +00:00 committed by GitHub
commit 7c85605687
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
165 changed files with 4044 additions and 1944 deletions

View File

@ -519,6 +519,29 @@ val pubsubNaive = project
)
.dependsOn(common, pubsubApi, store % "compile->compile;test->test")
val schedulerApi = project
.in(file("modules/scheduler/api"))
.disablePlugins(RevolverPlugin)
.settings(sharedSettings)
.withTestSettingsDependsOn(loggingScribe)
.settings(
name := "docspell-scheduler-api",
libraryDependencies ++=
Dependencies.fs2Core ++
Dependencies.circeCore
)
.dependsOn(loggingApi, common, pubsubApi)
val schedulerImpl = project
.in(file("modules/scheduler/impl"))
.disablePlugins(RevolverPlugin)
.settings(sharedSettings)
.withTestSettingsDependsOn(loggingScribe)
.settings(
name := "docspell-scheduler-impl"
)
.dependsOn(store, schedulerApi, notificationApi, pubsubApi)
val extract = project
.in(file("modules/extract"))
.disablePlugins(RevolverPlugin)
@ -641,7 +664,16 @@ val backend = project
Dependencies.http4sClient ++
Dependencies.emil
)
.dependsOn(store, notificationApi, joexapi, ftsclient, totp, pubsubApi, loggingApi)
.dependsOn(
store,
notificationApi,
joexapi,
ftsclient,
totp,
pubsubApi,
loggingApi,
schedulerApi
)
val oidc = project
.in(file("modules/oidc"))
@ -732,7 +764,8 @@ val joex = project
restapi,
ftssolr,
pubsubNaive,
notificationImpl
notificationImpl,
schedulerImpl
)
val restserver = project
@ -804,7 +837,8 @@ val restserver = project
ftssolr,
oidc,
pubsubNaive,
notificationImpl
notificationImpl,
schedulerImpl
)
// --- Website Documentation
@ -902,7 +936,9 @@ val root = project
pubsubApi,
pubsubNaive,
notificationApi,
notificationImpl
notificationImpl,
schedulerApi,
schedulerImpl
)
// --- Helpers

View File

@ -10,14 +10,13 @@ import cats.effect._
import docspell.backend.auth.Login
import docspell.backend.fulltext.CreateIndex
import docspell.backend.msg.JobQueuePublish
import docspell.backend.ops._
import docspell.backend.signup.OSignup
import docspell.ftsclient.FtsClient
import docspell.notification.api.{EventExchange, NotificationModule}
import docspell.pubsub.api.PubSubT
import docspell.scheduler.JobStoreModule
import docspell.store.Store
import docspell.store.usertask.UserTaskStore
import docspell.totp.Totp
import emil.Emil
@ -50,6 +49,7 @@ trait BackendApp[F[_]] {
def events: EventExchange[F]
def notification: ONotification[F]
def bookmarks: OQueryBookmarks[F]
def fileRepository: OFileRepository[F]
}
object BackendApp {
@ -59,29 +59,43 @@ object BackendApp {
javaEmil: Emil[F],
ftsClient: FtsClient[F],
pubSubT: PubSubT[F],
schedulerModule: JobStoreModule[F],
notificationMod: NotificationModule[F]
): Resource[F, BackendApp[F]] =
for {
utStore <- UserTaskStore(store)
queue <- JobQueuePublish(store, pubSubT, notificationMod)
totpImpl <- OTotp(store, Totp.default)
loginImpl <- Login[F](store, Totp.default)
signupImpl <- OSignup[F](store)
joexImpl <- OJoex(pubSubT)
collImpl <- OCollective[F](store, utStore, queue, joexImpl)
collImpl <- OCollective[F](
store,
schedulerModule.userTasks,
schedulerModule.jobs,
joexImpl
)
sourceImpl <- OSource[F](store)
tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store)
uploadImpl <- OUpload(store, queue, joexImpl)
uploadImpl <- OUpload(store, schedulerModule.jobs, joexImpl)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl, pubSubT)
createIndex <- CreateIndex.resource(ftsClient, store)
itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl)
itemImpl <- OItem(store, ftsClient, createIndex, schedulerModule.jobs, joexImpl)
itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
fulltextImpl <- OFulltext(
itemSearchImpl,
ftsClient,
store,
schedulerModule.jobs,
joexImpl
)
mailImpl <- OMail(store, javaEmil)
userTaskImpl <- OUserTask(utStore, store, queue, joexImpl)
userTaskImpl <- OUserTask(
schedulerModule.userTasks,
store,
joexImpl
)
folderImpl <- OFolder(store)
customFieldsImpl <- OCustomFields(store)
simpleSearchImpl = OSimpleSearch(fulltextImpl, itemSearchImpl)
@ -91,6 +105,7 @@ object BackendApp {
)
notifyImpl <- ONotification(store, notificationMod)
bookmarksImpl <- OQueryBookmarks(store)
fileRepoImpl <- OFileRepository(store, schedulerModule.jobs, joexImpl)
} yield new BackendApp[F] {
val pubSub = pubSubT
val login = loginImpl
@ -118,5 +133,6 @@ object BackendApp {
val events = notificationMod
val notification = notifyImpl
val bookmarks = bookmarksImpl
val fileRepository = fileRepoImpl
}
}

View File

@ -6,9 +6,13 @@
package docspell.backend
import cats.data.{Validated, ValidatedNec}
import cats.implicits._
import docspell.backend.signup.{Config => SignupConfig}
import docspell.common._
import docspell.store.JdbcConfig
import docspell.store.file.FileRepositoryConfig
import emil.javamail.Settings
@ -21,10 +25,45 @@ case class Config(
def mailSettings: Settings =
Settings.defaultSettings.copy(debug = mailDebug)
}
object Config {
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType])
case class Files(
chunkSize: Int,
validMimeTypes: Seq[MimeType],
defaultStore: Ident,
stores: Map[Ident, FileStoreConfig]
) {
val enabledStores: Map[Ident, FileStoreConfig] =
stores.view.filter(_._2.enabled).toMap
def defaultStoreConfig: FileStoreConfig =
enabledStores(defaultStore)
def defaultFileRepositoryConfig: FileRepositoryConfig =
FileRepositoryConfig.fromFileStoreConfig(chunkSize, defaultStoreConfig)
def getFileRepositoryConfig(id: Ident): Option[FileRepositoryConfig] =
stores.get(id).map(FileRepositoryConfig.fromFileStoreConfig(chunkSize, _))
def validate: ValidatedNec[String, Files] = {
val storesEmpty =
if (enabledStores.isEmpty)
Validated.invalidNec(
"No file stores defined! Make sure at least one enabled store is present."
)
else Validated.validNec(())
val defaultStorePresent =
enabledStores.get(defaultStore) match {
case Some(_) => Validated.validNec(())
case None =>
Validated.invalidNec(s"Default file store not present: ${defaultStore.id}")
}
(storesEmpty |+| defaultStorePresent).map(_ => this)
}
}
}

View File

@ -12,80 +12,88 @@ import cats.implicits._
import docspell.backend.MailAddressCodec
import docspell.common._
import docspell.notification.api.PeriodicQueryArgs
import docspell.store.records.RJob
import docspell.scheduler.Job
object JobFactory extends MailAddressCodec {
def periodicQuery[F[_]: Sync](args: PeriodicQueryArgs, submitter: AccountId): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
def integrityCheck[F[_]: Sync](
args: FileIntegrityCheckArgs,
submitter: AccountId = DocspellSystem.account
): F[Job[FileIntegrityCheckArgs]] =
Job.createNew(
FileIntegrityCheckArgs.taskName,
submitter.collective,
args,
s"Check integrity of files",
submitter.user,
Priority.High,
Some(FileIntegrityCheckArgs.taskName)
)
def fileCopy[F[_]: Sync](
args: FileCopyTaskArgs,
submitter: AccountId = DocspellSystem.account
): F[Job[FileCopyTaskArgs]] =
Job.createNew(
FileCopyTaskArgs.taskName,
submitter.collective,
args,
"Copying all files",
submitter.user,
Priority.High,
Some(FileCopyTaskArgs.taskName)
)
def periodicQuery[F[_]: Sync](
args: PeriodicQueryArgs,
submitter: AccountId
): F[Job[PeriodicQueryArgs]] =
Job.createNew(
PeriodicQueryArgs.taskName,
submitter.collective,
args,
s"Running periodic query, notify via ${args.channels.map(_.channelType)}",
now,
submitter.user,
Priority.Low,
None
)
} yield job
def makePageCount[F[_]: Sync](
args: MakePageCountArgs,
account: Option[AccountId]
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
): F[Job[MakePageCountArgs]] =
Job.createNew(
MakePageCountArgs.taskName,
account.map(_.collective).getOrElse(DocspellSystem.taskGroup),
args,
s"Find page-count metadata for ${args.attachment.id}",
now,
account.map(_.user).getOrElse(DocspellSystem.user),
Priority.Low,
Some(MakePageCountArgs.taskName / args.attachment)
)
} yield job
def makePreview[F[_]: Sync](
args: MakePreviewArgs,
account: Option[AccountId]
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
): F[Job[MakePreviewArgs]] =
Job.createNew(
MakePreviewArgs.taskName,
account.map(_.collective).getOrElse(DocspellSystem.taskGroup),
args,
s"Generate preview image",
now,
account.map(_.user).getOrElse(DocspellSystem.user),
Priority.Low,
Some(MakePreviewArgs.taskName / args.attachment)
)
} yield job
def allPreviews[F[_]: Sync](
args: AllPreviewsArgs,
submitter: Option[Ident]
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
): F[Job[AllPreviewsArgs]] =
Job.createNew(
AllPreviewsArgs.taskName,
args.collective.getOrElse(DocspellSystem.taskGroup),
args,
"Create preview images",
now,
submitter.getOrElse(DocspellSystem.user),
Priority.Low,
Some(DocspellSystem.allPreviewTaskTracker)
@ -95,127 +103,91 @@ object JobFactory extends MailAddressCodec {
collective: Option[Ident],
submitter: Option[Ident],
prio: Priority
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
): F[Job[ConvertAllPdfArgs]] =
Job.createNew(
ConvertAllPdfArgs.taskName,
collective.getOrElse(DocspellSystem.taskGroup),
ConvertAllPdfArgs(collective),
s"Convert all pdfs not yet converted",
now,
submitter.getOrElse(DocspellSystem.user),
prio,
collective
.map(c => c / ConvertAllPdfArgs.taskName)
.orElse(ConvertAllPdfArgs.taskName.some)
)
} yield job
def reprocessItem[F[_]: Sync](
args: ReProcessItemArgs,
account: AccountId,
prio: Priority
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
): F[Job[ReProcessItemArgs]] =
Job.createNew(
ReProcessItemArgs.taskName,
account.collective,
args,
s"Re-process files of item ${args.itemId.id}",
now,
account.user,
prio,
Some(ReProcessItemArgs.taskName / args.itemId)
)
} yield job
def processItem[F[_]: Sync](
args: ProcessItemArgs,
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
): F[Job[ProcessItemArgs]] =
Job.createNew(
ProcessItemArgs.taskName,
account.collective,
args,
args.makeSubject,
now,
account.user,
prio,
tracker
)
} yield job
def processItems[F[_]: Sync](
args: Vector[ProcessItemArgs],
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[Vector[RJob]] = {
def create(now: Timestamp, arg: ProcessItemArgs): F[RJob] =
Ident
.randomId[F]
.map(id =>
RJob.newJob(
id,
): F[Vector[Job[ProcessItemArgs]]] = {
def create(arg: ProcessItemArgs): F[Job[ProcessItemArgs]] =
Job.createNew(
ProcessItemArgs.taskName,
account.collective,
arg,
arg.makeSubject,
now,
account.user,
prio,
tracker
)
)
for {
now <- Timestamp.current[F]
jobs <- args.traverse(a => create(now, a))
} yield jobs
args.traverse(create)
}
def reIndexAll[F[_]: Sync]: F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
def reIndexAll[F[_]: Sync]: F[Job[ReIndexTaskArgs]] =
Job.createNew(
ReIndexTaskArgs.taskName,
DocspellSystem.taskGroup,
ReIndexTaskArgs(None),
s"Recreate full-text index",
now,
"Recreate full-text index",
DocspellSystem.taskGroup,
Priority.Low,
Some(DocspellSystem.migrationTaskTracker)
)
def reIndex[F[_]: Sync](account: AccountId): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
args = ReIndexTaskArgs(Some(account.collective))
} yield RJob.newJob(
id,
def reIndex[F[_]: Sync](account: AccountId): F[Job[ReIndexTaskArgs]] = {
val args = ReIndexTaskArgs(Some(account.collective))
Job.createNew(
ReIndexTaskArgs.taskName,
account.collective,
args,
s"Recreate full-text index",
now,
"Recreate full-text index",
account.user,
Priority.Low,
Some(ReIndexTaskArgs.tracker(args))
)
}
}

View File

@ -8,16 +8,19 @@ package docspell.backend.msg
import cats.data.NonEmptyList
import docspell.pubsub.api.{Topic, TypedTopic}
import docspell.pubsub.api.TypedTopic
import docspell.scheduler.msg._
/** All topics used in Docspell. */
object Topics {
/** A generic notification to the job executors to look for new work. */
val jobsNotify: TypedTopic[Unit] =
TypedTopic[Unit](Topic("jobs-notify"))
/** A list of all topics. It is required to list every topic in use here! */
val all: NonEmptyList[TypedTopic[_]] =
NonEmptyList.of(JobDone.topic, CancelJob.topic, jobsNotify, JobSubmitted.topic)
NonEmptyList.of(
JobDone.topic,
CancelJob.topic,
JobsNotify(),
JobSubmitted.topic,
PeriodicTaskNotify()
)
}

View File

@ -14,11 +14,11 @@ import docspell.backend.JobFactory
import docspell.backend.PasswordCrypt
import docspell.backend.ops.OCollective._
import docspell.common._
import docspell.scheduler.JobStore
import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.UpdateResult
import docspell.store.queries.{QCollective, QUser}
import docspell.store.queue.JobQueue
import docspell.store.records._
import docspell.store.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.{AddResult, Store}
import com.github.eikek.calev._
@ -133,7 +133,7 @@ object OCollective {
def apply[F[_]: Async](
store: Store[F],
uts: UserTaskStore[F],
queue: JobQueue[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OCollective[F]] =
Resource.pure[F, OCollective[F]](new OCollective[F] {
@ -196,32 +196,32 @@ object OCollective {
for {
id <- Ident.randomId[F]
args = LearnClassifierArgs(collective)
ut <- UserTask(
ut = UserTask(
id,
LearnClassifierArgs.taskName,
true,
CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All),
None,
args
).encode.toPeriodicTask(UserTaskScope(collective), args.makeSubject.some)
job <- ut.toJob
_ <- queue.insert(job)
)
_ <- uts
.updateOneTask(UserTaskScope(collective), args.makeSubject.some, ut)
_ <- joex.notifyAllNodes
} yield ()
def startEmptyTrash(args: EmptyTrashArgs): F[Unit] =
for {
id <- Ident.randomId[F]
ut <- UserTask(
ut = UserTask(
id,
EmptyTrashArgs.taskName,
true,
CalEvent(WeekdayComponent.All, DateEvent.All, TimeEvent.All),
None,
args
).encode.toPeriodicTask(UserTaskScope(args.collective), args.makeSubject.some)
job <- ut.toJob
_ <- queue.insert(job)
)
_ <- uts
.updateOneTask(UserTaskScope(args.collective), args.makeSubject.some, ut)
_ <- joex.notifyAllNodes
} yield ()
@ -321,7 +321,7 @@ object OCollective {
AllPreviewsArgs(Some(account.collective), storeMode),
Some(account.user)
)
_ <- queue.insertIfNew(job)
_ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success

View File

@ -0,0 +1,100 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.backend.ops
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import docspell.backend.JobFactory
import docspell.backend.ops.OFileRepository.IntegrityResult
import docspell.common._
import docspell.scheduler.{Job, JobStore}
import docspell.store.Store
import scodec.bits.ByteVector
trait OFileRepository[F[_]] {
/** Inserts the job or return None if such a job already is running. */
def cloneFileRepository(
args: FileCopyTaskArgs,
notifyJoex: Boolean
): F[Option[Job[FileCopyTaskArgs]]]
def checkIntegrityAll(
part: FileKeyPart,
notifyJoex: Boolean
): F[Option[Job[FileIntegrityCheckArgs]]]
def checkIntegrity(key: FileKey, hash: Option[ByteVector]): F[Option[IntegrityResult]]
}
object OFileRepository {
case class IntegrityResult(ok: Boolean, key: FileKey)
def apply[F[_]: Async](
store: Store[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OFileRepository[F]] =
Resource.pure(new OFileRepository[F] {
private[this] val logger = docspell.logging.getLogger[F]
def cloneFileRepository(
args: FileCopyTaskArgs,
notifyJoex: Boolean
): F[Option[Job[FileCopyTaskArgs]]] =
for {
job <- JobFactory.fileCopy(args)
flag <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield Option.when(flag)(job)
def checkIntegrityAll(
part: FileKeyPart,
notifyJoex: Boolean
): F[Option[Job[FileIntegrityCheckArgs]]] =
for {
job <- JobFactory.integrityCheck(FileIntegrityCheckArgs(part))
flag <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield Option.when(flag)(job)
def checkIntegrity(
key: FileKey,
hash: Option[ByteVector]
): F[Option[IntegrityResult]] =
(for {
_ <- OptionT.liftF(
logger.debugWith(s"Checking file $key")(_.data("fileKey", key))
)
expectedHash <-
hash.fold(OptionT(store.fileRepo.findMeta(key)).map(_.checksum))(h =>
OptionT.pure[F](h)
)
actualHash <-
OptionT.liftF(
logger.debugWith(s"Calculating new hash for $key")(
_.data("fileKey", key)
) *>
store.fileRepo
.getBytes(key)
.through(fs2.hash.sha256)
.compile
.foldChunks(ByteVector.empty)(_ ++ _.toByteVector)
)
res = IntegrityResult(expectedHash == actualHash, key)
_ <- OptionT.liftF {
if (res.ok) logger.debug(s"File hashes match for $key")
else logger.warnWith(s"File hashes differ for: $key")(_.data("fileKey", key))
}
} yield res).value
})
}

View File

@ -17,8 +17,8 @@ import docspell.common._
import docspell.ftsclient._
import docspell.query.ItemQuery._
import docspell.query.ItemQueryDsl._
import docspell.scheduler.JobStore
import docspell.store.queries.{QFolder, QItem, SelectedItem}
import docspell.store.queue.JobQueue
import docspell.store.records.RJob
import docspell.store.{Store, qb}
@ -81,7 +81,7 @@ object OFulltext {
itemSearch: OItemSearch[F],
fts: FtsClient[F],
store: Store[F],
queue: JobQueue[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OFulltext[F]] =
Resource.pure[F, OFulltext[F]](new OFulltext[F] {
@ -90,7 +90,7 @@ object OFulltext {
for {
_ <- logger.info(s"Re-index all.")
job <- JobFactory.reIndexAll[F]
_ <- queue.insertIfNew(job) *> joex.notifyAllNodes
_ <- jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes
} yield ()
def reindexCollective(account: AccountId): F[Unit] =
@ -102,7 +102,7 @@ object OFulltext {
job <- JobFactory.reIndex(account)
_ <-
if (exist.isDefined) ().pure[F]
else queue.insertIfNew(job) *> joex.notifyAllNodes
else jobStore.insertIfNew(job.encode) *> joex.notifyAllNodes
} yield ()
def findIndexOnly(maxNoteLen: Int)(
@ -324,9 +324,7 @@ object OFulltext {
def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev
def from[A](f: A => Ident): ItemId[A] =
new ItemId[A] {
def itemId(a: A) = f(a)
}
(a: A) => f(a)
implicit val listItemId: ItemId[ListItem] =
ItemId.from(_.id)

View File

@ -18,8 +18,8 @@ import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.logging.Logger
import docspell.notification.api.Event
import docspell.scheduler.JobStore
import docspell.store.queries.{QAttachment, QItem, QMoveAttachment}
import docspell.store.queue.JobQueue
import docspell.store.records._
import docspell.store.{AddResult, Store, UpdateResult}
@ -228,7 +228,7 @@ object OItem {
store: Store[F],
fts: FtsClient[F],
createIndex: CreateIndex[F],
queue: JobQueue[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OItem[F]] =
for {
@ -288,7 +288,7 @@ object OItem {
)
ev = Event.TagsChanged.partial(
itemIds,
added.toList.flatten.map(_.id).toList,
added.toList.flatten.map(_.id),
Nil
)
} yield AttachedEvent(UpdateResult.success)(ev))
@ -763,7 +763,7 @@ object OItem {
job <- OptionT.liftF(
JobFactory.reprocessItem[F](args, account, Priority.Low)
)
_ <- OptionT.liftF(queue.insertIfNew(job))
_ <- OptionT.liftF(jobStore.insertIfNew(job.encode))
_ <- OptionT.liftF(if (notifyJoex) joex.notifyAllNodes else ().pure[F])
} yield UpdateResult.success).getOrElse(UpdateResult.notFound)
@ -777,7 +777,8 @@ object OItem {
jobs <- items
.map(item => ReProcessItemArgs(item, Nil))
.traverse(arg => JobFactory.reprocessItem[F](arg, account, Priority.Low))
_ <- queue.insertAllIfNew(jobs)
.map(_.map(_.encode))
_ <- jobStore.insertAllIfNew(jobs)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield items.size)
@ -788,7 +789,7 @@ object OItem {
): F[UpdateResult] =
for {
job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low)
_ <- queue.insertIfNew(job)
_ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success
@ -799,7 +800,7 @@ object OItem {
): F[UpdateResult] =
for {
job <- JobFactory.makePreview[F](args, account.some)
_ <- queue.insertIfNew(job)
_ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success
@ -809,7 +810,7 @@ object OItem {
): F[UpdateResult] =
for {
job <- JobFactory.allPreviews[F](AllPreviewsArgs(None, storeMode), None)
_ <- queue.insertIfNew(job)
_ <- jobStore.insertIfNew(job.encode)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success

View File

@ -10,10 +10,10 @@ import cats.data.OptionT
import cats.effect._
import cats.implicits._
import docspell.backend.msg.JobDone
import docspell.backend.ops.OJob.{CollectiveQueueState, JobCancelResult}
import docspell.common._
import docspell.pubsub.api.PubSubT
import docspell.scheduler.msg.JobDone
import docspell.store.Store
import docspell.store.UpdateResult
import docspell.store.queries.QJob

View File

@ -10,14 +10,16 @@ import cats.Applicative
import cats.effect._
import cats.implicits._
import docspell.backend.msg.{CancelJob, Topics}
import docspell.common.Ident
import docspell.pubsub.api.PubSubT
import docspell.scheduler.msg.{CancelJob, JobsNotify, PeriodicTaskNotify}
trait OJoex[F[_]] {
def notifyAllNodes: F[Unit]
def notifyPeriodicTasks: F[Unit]
def cancelJob(job: Ident, worker: Ident): F[Unit]
}
@ -26,7 +28,10 @@ object OJoex {
Resource.pure[F, OJoex[F]](new OJoex[F] {
def notifyAllNodes: F[Unit] =
pubSub.publish1IgnoreErrors(Topics.jobsNotify, ()).as(())
pubSub.publish1IgnoreErrors(JobsNotify(), ()).void
def notifyPeriodicTasks: F[Unit] =
pubSub.publish1IgnoreErrors(PeriodicTaskNotify(), ()).void
def cancelJob(job: Ident, worker: Ident): F[Unit] =
pubSub.publish1IgnoreErrors(CancelJob.topic, CancelJob(job, worker)).as(())

View File

@ -14,8 +14,8 @@ import fs2.Stream
import docspell.backend.JobFactory
import docspell.common._
import docspell.scheduler.{Job, JobStore}
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records._
trait OUpload[F[_]] {
@ -108,7 +108,7 @@ object OUpload {
def apply[F[_]: Sync](
store: Store[F],
queue: JobQueue[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Resource[F, OUpload[F]] =
Resource.pure[F, OUpload[F]](new OUpload[F] {
@ -187,10 +187,10 @@ object OUpload {
private def submitJobs(
notifyJoex: Boolean
)(jobs: Vector[RJob]): F[OUpload.UploadResult] =
)(jobs: Vector[Job[String]]): F[OUpload.UploadResult] =
for {
_ <- logger.debug(s"Storing jobs: $jobs")
_ <- queue.insertAll(jobs)
_ <- jobStore.insertAll(jobs)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UploadResult.Success
@ -244,7 +244,9 @@ object OUpload {
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[Vector[RJob]] =
JobFactory.processItems[F](args, account, prio, tracker)
): F[Vector[Job[String]]] =
JobFactory
.processItems[F](args, account, prio, tracker)
.map(_.map(_.encode))
})
}

View File

@ -13,10 +13,9 @@ import fs2.Stream
import docspell.common._
import docspell.notification.api.{ChannelRef, PeriodicDueItemsArgs, PeriodicQueryArgs}
import docspell.scheduler.usertask.{UserTask, UserTaskScope, UserTaskStore}
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records.RNotificationChannel
import docspell.store.usertask._
import io.circe.Encoder
@ -86,7 +85,6 @@ object OUserTask {
def apply[F[_]: Async](
taskStore: UserTaskStore[F],
store: Store[F],
queue: JobQueue[F],
joex: OJoex[F]
): Resource[F, OUserTask[F]] =
Resource.pure[F, OUserTask[F]](new OUserTask[F] {
@ -95,9 +93,7 @@ object OUserTask {
implicit E: Encoder[A]
): F[Unit] =
for {
ptask <- task.encode.toPeriodicTask(scope, subject)
job <- ptask.toJob
_ <- queue.insert(job)
_ <- taskStore.executeNow(scope, subject, task)
_ <- joex.notifyAllNodes
} yield ()
@ -124,7 +120,7 @@ object OUserTask {
): F[Unit] =
for {
_ <- taskStore.updateTask[ScanMailboxArgs](scope, subject, task)
_ <- joex.notifyAllNodes
_ <- joex.notifyPeriodicTasks
} yield ()
def getNotifyDueItems(
@ -153,7 +149,7 @@ object OUserTask {
): F[Unit] =
for {
_ <- taskStore.updateTask[PeriodicDueItemsArgs](scope, subject, task)
_ <- joex.notifyAllNodes
_ <- joex.notifyPeriodicTasks
} yield ()
def getPeriodicQuery(scope: UserTaskScope): Stream[F, UserTask[PeriodicQueryArgs]] =
@ -180,7 +176,7 @@ object OUserTask {
): F[Unit] =
for {
_ <- taskStore.updateTask[PeriodicQueryArgs](scope, subject, task)
_ <- joex.notifyAllNodes
_ <- joex.notifyPeriodicTasks
} yield ()
// When retrieving arguments containing channel references, we must update

View File

@ -14,7 +14,8 @@ case class Banner(
configFile: Option[String],
appId: Ident,
baseUrl: LenientUri,
ftsUrl: Option[LenientUri]
ftsUrl: Option[LenientUri],
fileStoreConfig: FileStoreConfig
) {
private val banner =
@ -36,6 +37,7 @@ case class Banner(
s"Database: ${jdbcUrl.asString}",
s"Fts: ${ftsUrl.map(_.asString).getOrElse("-")}",
s"Config: ${configFile.getOrElse("")}",
s"FileRepo: ${fileStoreConfig}",
""
)

View File

@ -10,6 +10,8 @@ object DocspellSystem {
val user = Ident.unsafe("docspell-system")
val taskGroup = user
val account: AccountId = AccountId(taskGroup, user)
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
val allPreviewTaskTracker = Ident.unsafe("generate-all-previews")
val allPageCountTaskTracker = Ident.unsafe("all-page-count-tracker")

View File

@ -0,0 +1,56 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.data.NonEmptyList
import docspell.common.FileCopyTaskArgs.Selection
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.syntax._
import io.circe.{Decoder, Encoder}
/** This is the input to the `FileCopyTask`. The task copies all files from on
* FileRepository to one ore more target repositories.
*
* If no `from` is given, the default file repository is used. For targets, a list of ids
* can be specified that must match a configured file store in the config file. When
* selecting "all", it means all enabled stores.
*/
final case class FileCopyTaskArgs(from: Option[Ident], to: Selection)
object FileCopyTaskArgs {
val taskName = Ident.unsafe("copy-file-repositories")
sealed trait Selection
object Selection {
case object All extends Selection
case class Stores(ids: NonEmptyList[Ident]) extends Selection
implicit val jsonEncoder: Encoder[Selection] =
Encoder.instance {
case All => "!all".asJson
case Stores(ids) => ids.toList.asJson
}
implicit val jsonDecoder: Decoder[Selection] =
Decoder.instance { cursor =>
cursor.value.asString match {
case Some(s) if s.equalsIgnoreCase("!all") => Right(All)
case _ => cursor.value.as[NonEmptyList[Ident]].map(Stores.apply)
}
}
}
implicit val jsonDecoder: Decoder[FileCopyTaskArgs] =
deriveDecoder
implicit val jsonEncoder: Encoder[FileCopyTaskArgs] =
deriveEncoder
}

View File

@ -0,0 +1,22 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
final case class FileIntegrityCheckArgs(pattern: FileKeyPart) {}
object FileIntegrityCheckArgs {
val taskName: Ident = Ident.unsafe("all-file-integrity-check")
implicit val jsonDecoder: Decoder[FileIntegrityCheckArgs] =
deriveDecoder
implicit val jsonEncoder: Encoder[FileIntegrityCheckArgs] =
deriveEncoder
}

View File

@ -9,7 +9,10 @@ package docspell.common
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
case class FileKey(collective: Ident, category: FileCategory, id: Ident)
final case class FileKey(collective: Ident, category: FileCategory, id: Ident) {
override def toString =
s"${collective.id}/${category.id.id}/${id.id}"
}
object FileKey {

View File

@ -0,0 +1,53 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.implicits._
import io.circe.syntax._
import io.circe.{Decoder, DecodingFailure, Encoder}
sealed trait FileKeyPart {}
object FileKeyPart {
case object Empty extends FileKeyPart
final case class Collective(collective: Ident) extends FileKeyPart
final case class Category(collective: Ident, category: FileCategory) extends FileKeyPart
final case class Key(key: FileKey) extends FileKeyPart
implicit val jsonEncoder: Encoder[FileKeyPart] =
Encoder.instance {
case Empty => ().asJson
case Collective(cid) =>
Map("collective" -> cid.asJson).asJson
case Category(cid, cat) =>
Map("collective" -> cid.asJson, "category" -> cat.asJson).asJson
case Key(key) =>
key.asJson
}
implicit val jsonDecoder: Decoder[FileKeyPart] =
Decoder.instance { cursor =>
for {
cid <- cursor.getOrElse[Option[Ident]]("collective")(None)
cat <- cursor.getOrElse[Option[FileCategory]]("category")(None)
emptyObj = cursor.keys.exists(_.isEmpty)
c3 = cursor.as[FileKey].map(Key).toOption
c2 = (cid, cat).mapN(Category)
c1 = cid.map(Collective)
c0 = Option.when(emptyObj)(Empty)
c = c3.orElse(c2).orElse(c1).orElse(c0)
res <- c.toRight(DecodingFailure("", cursor.history))
} yield res
}
}

View File

@ -0,0 +1,39 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import fs2.io.file.Path
sealed trait FileStoreConfig {
def enabled: Boolean
def storeType: FileStoreType
}
object FileStoreConfig {
case class DefaultDatabase(enabled: Boolean) extends FileStoreConfig {
val storeType = FileStoreType.DefaultDatabase
}
case class FileSystem(
enabled: Boolean,
directory: Path
) extends FileStoreConfig {
val storeType = FileStoreType.FileSystem
}
case class S3(
enabled: Boolean,
endpoint: String,
accessKey: String,
secretKey: String,
bucket: String
) extends FileStoreConfig {
val storeType = FileStoreType.S3
override def toString =
s"S3(enabled=$enabled, endpoint=$endpoint, bucket=$bucket, accessKey=$accessKey, secretKey=***)"
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.common
import cats.data.NonEmptyList
sealed trait FileStoreType { self: Product =>
def name: String =
productPrefix.toLowerCase
}
object FileStoreType {
case object DefaultDatabase extends FileStoreType
case object S3 extends FileStoreType
case object FileSystem extends FileStoreType
val all: NonEmptyList[FileStoreType] =
NonEmptyList.of(DefaultDatabase, S3, FileSystem)
def fromString(str: String): Either[String, FileStoreType] =
all
.find(_.name.equalsIgnoreCase(str))
.toRight(s"Invalid file store type: $str")
def unsafeFromString(str: String): FileStoreType =
fromString(str).fold(sys.error, identity)
}

View File

@ -6,10 +6,8 @@
package docspell.common.syntax
import cats.implicits._
import io.circe.Decoder
import io.circe.parser._
import io.circe.parser
trait StringSyntax {
implicit class EvenMoreStringOps(s: String) {
@ -18,9 +16,8 @@ trait StringSyntax {
def parseJsonAs[A](implicit d: Decoder[A]): Either[Throwable, A] =
for {
json <- parse(s).leftMap(_.underlying)
value <- json.as[A]
} yield value
json <- parser.decode[A](s)
} yield json
}
}

View File

@ -18,9 +18,18 @@ import docspell.logging.{Level, LogConfig}
import com.github.eikek.calev.CalEvent
import pureconfig.ConfigReader
import pureconfig.error.{CannotConvert, FailureReason}
import pureconfig.generic.{CoproductHint, FieldCoproductHint}
import scodec.bits.ByteVector
object Implicits {
// the value "s-3" looks strange, this is to allow to write "s3" in the config
implicit val fileStoreCoproductHint: CoproductHint[FileStoreConfig] =
new FieldCoproductHint[FileStoreConfig]("type") {
override def fieldValue(name: String) =
if (name.equalsIgnoreCase("S3")) "s3"
else super.fieldValue(name)
}
implicit val accountIdReader: ConfigReader[AccountId] =
ConfigReader[String].emap(reason(AccountId.parse))
@ -42,6 +51,9 @@ object Implicits {
implicit val identReader: ConfigReader[Ident] =
ConfigReader[String].emap(reason(Ident.fromString))
implicit def identMapReader[B: ConfigReader]: ConfigReader[Map[Ident, B]] =
pureconfig.configurable.genericMapReader[Ident, B](reason(Ident.fromString))
implicit val byteVectorReader: ConfigReader[ByteVector] =
ConfigReader[String].emap(reason { str =>
if (str.startsWith("hex:"))
@ -70,6 +82,9 @@ object Implicits {
implicit val logLevelReader: ConfigReader[Level] =
ConfigReader[String].emap(reason(Level.fromString))
implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
ConfigReader[String].emap(reason(FileStoreType.fromString))
def reason[A: ClassTag](
f: String => Either[String, A]
): String => Either[FailureReason, A] =

View File

@ -194,6 +194,11 @@ docspell.joex {
# How often the node must be unreachable, before it is removed.
min-not-found = 2
}
# Checks all files against their checksum
integrity-check {
enabled = true
}
}
# A periodic task to check for new releases of docspell. It can
@ -646,6 +651,41 @@ Docpell Update Check
# restrict file types that should be handed over to processing.
# By default all files are allowed.
valid-mime-types = [ ]
# The id of an enabled store from the `stores` array that should
# be used.
#
# IMPORTANT NOTE: All nodes must have the exact same file store
# configuration!
default-store = "database"
# A list of possible file stores. Each entry must have a unique
# id. The `type` is one of: default-database, filesystem, s3.
#
# The enabled property serves currently to define target stores
# for te "copy files" task. All stores with enabled=false are
# removed from the list. The `default-store` must be enabled.
stores = {
database =
{ enabled = true
type = "default-database"
}
filesystem =
{ enabled = false
type = "file-system"
directory = "/some/directory"
}
minio =
{ enabled = false
type = "s3"
endpoint = "http://localhost:9000"
access-key = "username"
secret-key = "password"
bucket = "docspell"
}
}
}
# Configuration of the full-text search engine.

View File

@ -19,10 +19,10 @@ import docspell.ftssolr.SolrConfig
import docspell.joex.analysis.RegexNerFile
import docspell.joex.hk.HouseKeepingConfig
import docspell.joex.routes.InternalHeader
import docspell.joex.scheduler.{PeriodicSchedulerConfig, SchedulerConfig}
import docspell.joex.updatecheck.UpdateCheckConfig
import docspell.logging.LogConfig
import docspell.pubsub.naive.PubSubConfig
import docspell.scheduler.{PeriodicSchedulerConfig, SchedulerConfig}
import docspell.store.JdbcConfig
case class Config(

View File

@ -10,7 +10,7 @@ import cats.effect.Async
import docspell.config.Implicits._
import docspell.config.{ConfigFactory, Validation}
import docspell.joex.scheduler.CountingScheme
import docspell.scheduler.CountingScheme
import emil.MailAddress
import emil.javamail.syntax._
@ -19,6 +19,7 @@ import pureconfig.generic.auto._
import yamusca.imports._
object ConfigFile {
// IntelliJ is wrong, this is required
import Implicits._
def loadConfig[F[_]: Async](args: List[String]): F[Config] = {
@ -51,6 +52,7 @@ object ConfigFile {
Validation.failWhen(
cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
"No subject given for enabled update check!"
)
),
Validation(cfg => cfg.files.validate.map(_ => cfg))
)
}

View File

@ -7,7 +7,7 @@
package docspell.joex
import docspell.common.Ident
import docspell.joex.scheduler.{PeriodicScheduler, Scheduler}
import docspell.scheduler.{PeriodicScheduler, Scheduler}
import docspell.store.records.RJobLog
trait JoexApp[F[_]] {

View File

@ -10,37 +10,23 @@ import cats.effect._
import cats.implicits._
import fs2.concurrent.SignallingRef
import docspell.analysis.TextAnalyser
import docspell.backend.MailAddressCodec
import docspell.backend.fulltext.CreateIndex
import docspell.backend.msg.{CancelJob, JobQueuePublish, Topics}
import docspell.backend.ops._
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.joex.analysis.RegexNerFile
import docspell.joex.emptytrash._
import docspell.joex.fts.{MigrationTask, ReIndexTask}
import docspell.joex.fts.MigrationTask
import docspell.joex.hk._
import docspell.joex.learn.LearnClassifierTask
import docspell.joex.notify._
import docspell.joex.pagecount._
import docspell.joex.pdfconv.ConvertAllPdfTask
import docspell.joex.pdfconv.PdfConvTask
import docspell.joex.preview._
import docspell.joex.process.ItemHandler
import docspell.joex.process.ReProcessItem
import docspell.joex.scanmailbox._
import docspell.joex.scheduler._
import docspell.joex.updatecheck._
import docspell.notification.api.NotificationModule
import docspell.notification.impl.NotificationModuleImpl
import docspell.pubsub.api.{PubSub, PubSubT}
import docspell.scheduler._
import docspell.scheduler.impl.{JobStoreModuleBuilder, SchedulerModuleBuilder}
import docspell.scheduler.usertask.{UserTaskScope, UserTaskStore}
import docspell.store.Store
import docspell.store.queue._
import docspell.store.records.{REmptyTrashSetting, RJobLog}
import docspell.store.usertask.UserTaskScope
import docspell.store.usertask.UserTaskStore
import emil.javamail._
import org.http4s.client.Client
@ -48,9 +34,8 @@ import org.http4s.client.Client
final class JoexAppImpl[F[_]: Async](
cfg: Config,
store: Store[F],
queue: JobQueue[F],
pubSubT: PubSubT[F],
pstore: PeriodicTaskStore[F],
uts: UserTaskStore[F],
jobStore: JobStore[F],
termSignal: SignallingRef[F, Boolean],
notificationMod: NotificationModule[F],
val scheduler: Scheduler[F],
@ -67,20 +52,11 @@ final class JoexAppImpl[F[_]: Async](
_ <- Async[F].start(eventConsume)
_ <- scheduler.periodicAwake
_ <- periodicScheduler.periodicAwake
_ <- subscriptions
_ <- scheduler.startSubscriptions
_ <- periodicScheduler.startSubscriptions
} yield ()
}
def subscriptions =
for {
_ <- Async[F].start(pubSubT.subscribeSink(Topics.jobsNotify) { _ =>
scheduler.notifyChange
})
_ <- Async[F].start(pubSubT.subscribeSink(CancelJob.topic) { msg =>
scheduler.requestCancel(msg.body.jobId).as(())
})
} yield ()
def findLogs(jobId: Ident): F[Vector[RJobLog]] =
store.transact(RJobLog.findLogs(jobId))
@ -90,24 +66,23 @@ final class JoexAppImpl[F[_]: Async](
private def scheduleBackgroundTasks: F[Unit] =
HouseKeepingTask
.periodicTask[F](cfg.houseKeeping.schedule)
.flatMap(pstore.insert) *>
.flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *>
scheduleEmptyTrashTasks *>
UpdateCheckTask
.periodicTask(cfg.updateCheck)
.flatMap(pstore.insert) *>
MigrationTask.job.flatMap(queue.insertIfNew) *>
.flatMap(t => uts.updateTask(UserTaskScope.system, t.summary, t)) *>
MigrationTask.job.flatMap(jobStore.insertIfNew) *>
AllPreviewsTask
.job(MakePreviewArgs.StoreMode.WhenMissing, None)
.flatMap(queue.insertIfNew) *>
AllPageCountTask.job.flatMap(queue.insertIfNew).as(())
.flatMap(jobStore.insertIfNew) *>
AllPageCountTask.job.flatMap(jobStore.insertIfNew).void
private def scheduleEmptyTrashTasks: F[Unit] =
store
.transact(
REmptyTrashSetting.findForAllCollectives(OCollective.EmptyTrash.default, 50)
)
.evalMap(es =>
UserTaskStore(store).use { uts =>
.evalMap { es =>
val args = EmptyTrashArgs(es.cid, es.minAge)
uts.updateOneTask(
UserTaskScope(args.collective),
@ -115,7 +90,6 @@ final class JoexAppImpl[F[_]: Async](
EmptyTrashTask.userTask(args, es.schedule)
)
}
)
.compile
.drain
@ -131,179 +105,45 @@ object JoexAppImpl extends MailAddressCodec {
pubSub: PubSub[F]
): Resource[F, JoexApp[F]] =
for {
pstore <- PeriodicTaskStore.create(store)
joexLogger = docspell.logging.getLogger[F](s"joex-${cfg.appId.id}")
joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
pubSubT = PubSubT(pubSub, joexLogger)
javaEmil =
JavaMailEmil(Settings.defaultSettings.copy(debug = cfg.mailDebug))
notificationMod <- Resource.eval(
NotificationModuleImpl[F](store, javaEmil, httpClient, 200)
)
queue <- JobQueuePublish(store, pubSubT, notificationMod)
joex <- OJoex(pubSubT)
upload <- OUpload(store, queue, joex)
fts <- createFtsClient(cfg)(httpClient)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, queue, joex)
itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
updateCheck <- UpdateCheck.resource(httpClient)
notification <- ONotification(store, notificationMod)
sch <- SchedulerBuilder(cfg.scheduler, store)
.withQueue(queue)
.withPubSub(pubSubT)
jobStoreModule = JobStoreModuleBuilder(store)
.withPubsub(pubSubT)
.withEventSink(notificationMod)
.withTask(
JobTask.json(
ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg, itemOps, fts, analyser, regexNer),
ItemHandler.onCancel[F]
)
)
.withTask(
JobTask.json(
ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer),
ReProcessItem.onCancel[F]
)
)
.withTask(
JobTask.json(
ScanMailboxArgs.taskName,
ScanMailboxTask[F](cfg.userTasks.scanMailbox, javaEmil, upload, joex),
ScanMailboxTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts, createIndex),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts, createIndex),
ReIndexTask.onCancel[F]
)
)
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg),
HouseKeepingTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PdfConvTask.taskName,
PdfConvTask[F](cfg),
PdfConvTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ConvertAllPdfArgs.taskName,
ConvertAllPdfTask[F](queue, joex),
ConvertAllPdfTask.onCancel[F]
)
)
.withTask(
JobTask.json(
LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, analyser),
LearnClassifierTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.extraction.preview),
MakePreviewTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPreviewsArgs.taskName,
AllPreviewsTask[F](queue, joex),
AllPreviewsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePageCountArgs.taskName,
MakePageCountTask[F](),
MakePageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPageCountTask.taskName,
AllPageCountTask[F](queue, joex),
AllPageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
EmptyTrashArgs.taskName,
EmptyTrashTask[F](itemOps, itemSearchOps),
EmptyTrashTask.onCancel[F]
)
)
.withTask(
JobTask.json(
UpdateCheckTask.taskName,
UpdateCheckTask[F](
cfg.updateCheck,
cfg.sendMail,
javaEmil,
updateCheck,
ThisVersion.default
),
UpdateCheckTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicQueryTask.taskName,
PeriodicQueryTask[F](notification),
PeriodicQueryTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicDueItemsTask.taskName,
PeriodicDueItemsTask[F](notification),
PeriodicDueItemsTask.onCancel[F]
)
.build
tasks <- JoexTasks.resource(
cfg,
jobStoreModule,
httpClient,
pubSubT,
notificationMod,
javaEmil
)
schedulerModule <- SchedulerModuleBuilder(jobStoreModule)
.withSchedulerConfig(cfg.scheduler)
.withPeriodicSchedulerConfig(cfg.periodicScheduler)
.withTaskRegistry(tasks.get)
.resource
psch <- PeriodicScheduler.create(
cfg.periodicScheduler,
sch,
queue,
pstore,
joex
)
app = new JoexAppImpl(
cfg,
store,
queue,
pubSubT,
pstore,
jobStoreModule.userTasks,
jobStoreModule.jobs,
termSignal,
notificationMod,
sch,
psch
schedulerModule.scheduler,
schedulerModule.periodicScheduler
)
appR <- Resource.make(app.init.map(_ => app))(_.initShutdown)
} yield appR
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
}

View File

@ -41,7 +41,7 @@ object JoexServer {
store <- Store.create[F](
cfg.jdbc,
cfg.files.chunkSize,
cfg.files.defaultFileRepositoryConfig,
pools.connectEC
)
settings <- Resource.eval(store.transact(RInternalSetting.create))

View File

@ -0,0 +1,248 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex
import cats.effect.{Async, Resource}
import docspell.analysis.TextAnalyser
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.joex.analysis.RegexNerFile
import docspell.joex.emptytrash.EmptyTrashTask
import docspell.joex.filecopy.{FileCopyTask, FileIntegrityCheckTask}
import docspell.joex.fts.{MigrationTask, ReIndexTask}
import docspell.joex.hk.HouseKeepingTask
import docspell.joex.learn.LearnClassifierTask
import docspell.joex.notify.{PeriodicDueItemsTask, PeriodicQueryTask}
import docspell.joex.pagecount.{AllPageCountTask, MakePageCountTask}
import docspell.joex.pdfconv.{ConvertAllPdfTask, PdfConvTask}
import docspell.joex.preview.{AllPreviewsTask, MakePreviewTask}
import docspell.joex.process.{ItemHandler, ReProcessItem}
import docspell.joex.scanmailbox.ScanMailboxTask
import docspell.joex.updatecheck.{ThisVersion, UpdateCheck, UpdateCheckTask}
import docspell.notification.api.NotificationModule
import docspell.pubsub.api.PubSubT
import docspell.scheduler.impl.JobStoreModuleBuilder
import docspell.scheduler.{JobStoreModule, JobTask, JobTaskRegistry}
import docspell.store.Store
import emil.Emil
import org.http4s.client.Client
final class JoexTasks[F[_]: Async](
cfg: Config,
store: Store[F],
itemOps: OItem[F],
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F],
updateCheck: UpdateCheck[F],
notification: ONotification[F],
fileRepo: OFileRepository[F],
javaEmil: Emil[F],
jobStoreModule: JobStoreModule[F],
upload: OUpload[F],
createIndex: CreateIndex[F],
joex: OJoex[F],
itemSearch: OItemSearch[F]
) {
def get: JobTaskRegistry[F] =
JobTaskRegistry
.empty[F]
.withTask(
JobTask.json(
ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg, store, itemOps, fts, analyser, regexNer),
ItemHandler.onCancel[F](store)
)
)
.withTask(
JobTask.json(
ReProcessItemArgs.taskName,
ReProcessItem[F](cfg, fts, itemOps, analyser, regexNer, store),
ReProcessItem.onCancel[F]
)
)
.withTask(
JobTask.json(
ScanMailboxArgs.taskName,
ScanMailboxTask[F](cfg.userTasks.scanMailbox, store, javaEmil, upload, joex),
ScanMailboxTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, store, fts, createIndex),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, store, fts, createIndex),
ReIndexTask.onCancel[F]
)
)
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg, store, fileRepo),
HouseKeepingTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PdfConvTask.taskName,
PdfConvTask[F](cfg, store),
PdfConvTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ConvertAllPdfArgs.taskName,
ConvertAllPdfTask[F](jobStoreModule.jobs, joex, store),
ConvertAllPdfTask.onCancel[F]
)
)
.withTask(
JobTask.json(
LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, store, analyser),
LearnClassifierTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.extraction.preview, store),
MakePreviewTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPreviewsArgs.taskName,
AllPreviewsTask[F](jobStoreModule.jobs, joex, store),
AllPreviewsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MakePageCountArgs.taskName,
MakePageCountTask[F](store),
MakePageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
AllPageCountTask.taskName,
AllPageCountTask[F](store, jobStoreModule.jobs, joex),
AllPageCountTask.onCancel[F]
)
)
.withTask(
JobTask.json(
EmptyTrashArgs.taskName,
EmptyTrashTask[F](itemOps, itemSearch),
EmptyTrashTask.onCancel[F]
)
)
.withTask(
JobTask.json(
UpdateCheckTask.taskName,
UpdateCheckTask[F](
cfg.updateCheck,
cfg.sendMail,
store,
javaEmil,
updateCheck,
ThisVersion.default
),
UpdateCheckTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicQueryTask.taskName,
PeriodicQueryTask[F](store, notification),
PeriodicQueryTask.onCancel[F]
)
)
.withTask(
JobTask.json(
PeriodicDueItemsTask.taskName,
PeriodicDueItemsTask[F](store, notification),
PeriodicDueItemsTask.onCancel[F]
)
)
.withTask(
JobTask.json(
FileCopyTaskArgs.taskName,
FileCopyTask[F](cfg, store),
FileCopyTask.onCancel[F]
)
)
.withTask(
JobTask.json(
FileIntegrityCheckArgs.taskName,
FileIntegrityCheckTask[F](fileRepo, store),
FileIntegrityCheckTask.onCancel[F]
)
)
}
object JoexTasks {
def resource[F[_]: Async](
cfg: Config,
jobStoreModule: JobStoreModuleBuilder.Module[F],
httpClient: Client[F],
pubSub: PubSubT[F],
notificationModule: NotificationModule[F],
emailService: Emil[F]
): Resource[F, JoexTasks[F]] =
for {
joex <- OJoex(pubSub)
store = jobStoreModule.store
upload <- OUpload(store, jobStoreModule.jobs, joex)
fts <- createFtsClient(cfg)(httpClient)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs, joex)
itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
updateCheck <- UpdateCheck.resource(httpClient)
notification <- ONotification(store, notificationModule)
fileRepo <- OFileRepository(store, jobStoreModule.jobs, joex)
} yield new JoexTasks[F](
cfg,
store,
itemOps,
fts,
analyser,
regexNer,
updateCheck,
notification,
fileRepo,
emailService,
jobStoreModule,
upload,
createIndex,
joex,
itemSearchOps
)
private def createFtsClient[F[_]: Async](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
}

View File

@ -31,7 +31,8 @@ object Main extends IOApp {
Option(System.getProperty("config.file")),
cfg.appId,
cfg.baseUrl,
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled)
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
cfg.files.defaultStoreConfig
)
_ <- logger.info(s"\n${banner.render("***>")}")
_ <-

View File

@ -12,9 +12,9 @@ import fs2.Stream
import docspell.backend.ops.{OItem, OItemSearch}
import docspell.common._
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.scheduler.usertask.UserTask
import docspell.store.records.RItem
import docspell.store.usertask.UserTask
import com.github.eikek.calev.CalEvent

View File

@ -0,0 +1,144 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.filecopy
import cats.data.NonEmptyList
import cats.effect._
import cats.implicits._
import docspell.common.FileCopyTaskArgs.Selection
import docspell.common.{FileCopyTaskArgs, Ident}
import docspell.joex.Config
import docspell.logging.Logger
import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import docspell.store.file.{BinnyUtils, FileRepository, FileRepositoryConfig}
import binny.CopyTool.Counter
import binny.{BinaryId, BinaryStore, CopyTool}
import io.circe.generic.semiauto.deriveCodec
import io.circe.{Codec, Decoder, Encoder}
object FileCopyTask {
type Args = FileCopyTaskArgs
case class CopyResult(success: Boolean, message: String, counter: List[Counter])
object CopyResult {
def noSourceImpl: CopyResult =
CopyResult(false, "No source BinaryStore implementation found!", Nil)
def noTargetImpl: CopyResult =
CopyResult(false, "No target BinaryStore implementation found!", Nil)
def noSourceStore(id: Ident): CopyResult =
CopyResult(
false,
s"No source file repo found with id: ${id.id}. Make sure it is present in the config.",
Nil
)
def noTargetStore: CopyResult =
CopyResult(false, "No target file repositories defined", Nil)
def success(counter: NonEmptyList[Counter]): CopyResult =
CopyResult(true, "Done", counter.toList)
implicit val binaryIdCodec: Codec[BinaryId] =
Codec.from(
Decoder.decodeString.map(BinaryId.apply),
Encoder.encodeString.contramap(_.id)
)
implicit val counterEncoder: Codec[Counter] =
deriveCodec
implicit val jsonCodec: Codec[CopyResult] =
deriveCodec
implicit val jobTaskResultEncoder: JobTaskResultEncoder[CopyResult] =
JobTaskResultEncoder.fromJson[CopyResult].withMessage { result =>
val allGood = result.counter.map(_.success).sum
val failed = result.counter.map(_.failed.size).sum
if (result.success)
s"Successfully copied $allGood files to ${result.counter.size} stores."
else
s"Copying files failed for ${failed} files! ${allGood} were copied successfully."
}
}
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${FileCopyTaskArgs.taskName.id} task"))
def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, CopyResult] =
Task { ctx =>
val src = ctx.args.from
.map(id =>
cfg.files.getFileRepositoryConfig(id).toRight(CopyResult.noSourceStore(id))
)
.getOrElse(Right(cfg.files.defaultFileRepositoryConfig))
val targets = ctx.args.to match {
case Selection.All =>
cfg.files.enabledStores.values.toList
.map(FileRepositoryConfig.fromFileStoreConfig(cfg.files.chunkSize, _))
case Selection.Stores(ids) =>
ids.traverse(cfg.files.getFileRepositoryConfig).map(_.toList).getOrElse(Nil)
}
// remove source from targets if present there
val data =
for {
srcConfig <- src
trgConfig <- NonEmptyList
.fromList(targets.filter(_ != srcConfig))
.toRight(CopyResult.noTargetStore)
srcRepo = store.createFileRepository(srcConfig, true)
targetRepos = trgConfig.map(store.createFileRepository(_, false))
} yield (srcRepo, targetRepos)
data match {
case Right((from, tos)) =>
ctx.logger.info(s"Start copying all files from $from") *>
copy(ctx.logger, from, tos).flatTap(r =>
if (r.success) ctx.logger.info(s"Copying finished: ${r.counter}")
else ctx.logger.error(s"Copying failed: $r")
)
case Left(res) =>
ctx.logger.error(s"Copying failed: $res") *> res.pure[F]
}
}
def copy[F[_]: Async](
logger: Logger[F],
from: FileRepository[F],
to: NonEmptyList[FileRepository[F]]
): F[CopyResult] =
FileRepository.getDelegate(from) match {
case None =>
CopyResult.noSourceImpl.pure[F]
case Some((src, srcMeta)) =>
to.traverse(FileRepository.getDelegate).map(_.map(_._1)) match {
case None =>
CopyResult.noTargetImpl.pure[F]
case Some(targets) =>
val log = BinnyUtils.LoggerAdapter(logger)
val maxConcurrent = {
val nCores = Runtime.getRuntime.availableProcessors()
if (nCores > 2) nCores / 2 else 1
}
def copyTo(to: BinaryStore[F]) =
CopyTool.copyAll[F](log, src, srcMeta, to, 50, maxConcurrent)
logger.info(s"Start copying ${from.config} -> ${to.map(_.config)}") *>
targets.traverse(copyTo).map(CopyResult.success)
}
}
}

View File

@ -0,0 +1,91 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.filecopy
import cats.Monoid
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.backend.ops.OFileRepository.IntegrityResult
import docspell.common.{FileIntegrityCheckArgs, FileKey}
import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import docspell.store.records.RFileMeta
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
object FileIntegrityCheckTask {
type Args = FileIntegrityCheckArgs
case class Result(ok: Int, failedKeys: Set[FileKey], notFoundKeys: Set[FileKey]) {
override def toString: String =
s"Result(ok=$ok, failed=${failedKeys.size}, notFound=${notFoundKeys.size}, " +
s"keysFailed=$failedKeys, notFoundKeys=$notFoundKeys)"
}
object Result {
val empty = Result(0, Set.empty, Set.empty)
def notFound(key: FileKey) = Result(0, Set.empty, Set(key))
def from(r: IntegrityResult): Result =
if (r.ok) Result(1, Set.empty, Set.empty) else Result(0, Set(r.key), Set.empty)
implicit val monoid: Monoid[Result] =
Monoid.instance(
empty,
(a, b) =>
Result(
a.ok + b.ok,
a.failedKeys ++ b.failedKeys,
a.notFoundKeys ++ b.notFoundKeys
)
)
implicit val jsonEncoder: Encoder[Result] =
deriveEncoder
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
JobTaskResultEncoder.fromJson[Result].withMessage { result =>
s"Integrity check finished. Ok: ${result.ok}, " +
s"Failed: ${result.failedKeys.size}, Not found: ${result.notFoundKeys.size}"
}
}
def apply[F[_]: Sync](ops: OFileRepository[F], store: Store[F]): Task[F, Args, Result] =
Task { ctx =>
store
.transact(
RFileMeta
.findAll(ctx.args.pattern, 50)
)
.chunks
.evalTap(c => ctx.logger.info(s"Checking next ${c.size} files…"))
.unchunks
.evalMap(meta =>
ops.checkIntegrity(meta.id, meta.checksum.some).flatMap {
case Some(r) =>
Result.from(r).pure[F]
case None =>
ctx.logger
.error(s"File '${meta.id.toString}' not found in file repository")
.as(Result.notFound(meta.id))
}
)
.foldMonoid
.compile
.lastOrError
.flatTap(result =>
ctx.logger
.infoWith(s"File check result: $result")(_.data("integrityCheck", result))
)
}
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${FileIntegrityCheckArgs.taskName.id} task"))
}

View File

@ -9,25 +9,13 @@ package docspell.joex.fts
import docspell.backend.fulltext.CreateIndex
import docspell.ftsclient.FtsClient
import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.logging.Logger
import docspell.store.Store
case class FtsContext[F[_]](
final case class FtsContext[F[_]](
cfg: Config.FullTextSearch,
store: Store[F],
fulltext: CreateIndex[F],
fts: FtsClient[F],
logger: Logger[F]
)
object FtsContext {
def apply[F[_]](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
fulltext: CreateIndex[F],
ctx: Context[F, _]
): FtsContext[F] =
FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger)
}

View File

@ -14,8 +14,9 @@ import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.logging.Logger
import docspell.scheduler.Context
import docspell.store.Store
object FtsWork {
import syntax._
@ -106,10 +107,11 @@ object FtsWork {
def forContext(
cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F],
fulltext: CreateIndex[F]
): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx))
mt.local(ctx => FtsContext(cfg, store, fulltext, fts, ctx.logger))
}
}
}

View File

@ -13,14 +13,15 @@ import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.records.RJob
import docspell.scheduler.{Job, Task}
import docspell.store.Store
object MigrationTask {
val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F],
createIndex: CreateIndex[F]
): Task[F, Unit, Unit] =
@ -30,7 +31,7 @@ object MigrationTask {
Task(ctx =>
for {
migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs)
res <- Migration[F](cfg, fts, store, createIndex, ctx.logger).run(migs)
} yield res
)
)
@ -38,21 +39,18 @@ object MigrationTask {
def onCancel[F[_]]: Task[F, Unit, Unit] =
Task.log[F, Unit](_.warn("Cancelling full-text-index task"))
def job[F[_]: Sync]: F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
def job[F[_]: Sync]: F[Job[String]] =
Job
.createNew(
taskName,
DocspellSystem.taskGroup,
(),
"Create full-text index",
now,
DocspellSystem.taskGroup,
Priority.Low,
Some(DocspellSystem.migrationTaskTracker)
)
.map(_.encode)
def migrationTasks[F[_]: Async](fts: FtsClient[F]): F[List[Migration[F]]] =
fts.initialize.map(_.map(fm => Migration.from(fm)))

View File

@ -7,13 +7,15 @@
package docspell.joex.fts
import cats.effect._
import cats.implicits._
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
import docspell.joex.fts.FtsWork.syntax._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
object ReIndexTask {
type Args = ReIndexTaskArgs
@ -23,6 +25,7 @@ object ReIndexTask {
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F],
fulltext: CreateIndex[F]
): Task[F, Args, Unit] =
@ -30,7 +33,7 @@ object ReIndexTask {
.log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ =>
Task(ctx =>
clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx)
clearData[F](ctx.args.collective).forContext(cfg, store, fts, fulltext).run(ctx)
)
)
@ -42,7 +45,7 @@ object ReIndexTask {
(collective match {
case Some(_) =>
FtsWork
.clearIndex(collective)
.clearIndex[F](collective)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++

View File

@ -10,44 +10,51 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.logging.Logger
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.client.Client
object CheckNodesTask {
def apply[F[_]: Async](
cfg: HouseKeepingConfig.CheckNodes
): Task[F, Unit, Unit] =
cfg: HouseKeepingConfig.CheckNodes,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx =>
if (cfg.enabled)
for {
_ <- ctx.logger.info("Check nodes reachability")
ec = scala.concurrent.ExecutionContext.global
_ <- BlazeClientBuilder[F].withExecutionContext(ec).resource.use { client =>
checkNodes(ctx, client)
checkNodes(ctx.logger, store, client)
}
_ <- ctx.logger.info(
s"Remove nodes not found more than ${cfg.minNotFound} times"
)
n <- removeNodes(ctx, cfg)
n <- removeNodes(store, cfg)
_ <- ctx.logger.info(s"Removed $n nodes")
} yield ()
} yield CleanupResult.of(n)
else
ctx.logger.info("CheckNodes task is disabled in the configuration")
ctx.logger.info("CheckNodes task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
}
def checkNodes[F[_]: Async](ctx: Context[F, _], client: Client[F]): F[Unit] =
ctx.store
def checkNodes[F[_]: Async](
logger: Logger[F],
store: Store[F],
client: Client[F]
): F[Unit] =
store
.transact(RNode.streamAll)
.evalMap(node =>
checkNode(ctx.logger, client)(node.url)
checkNode(logger, client)(node.url)
.flatMap(seen =>
if (seen) ctx.store.transact(RNode.resetNotFound(node.id))
else ctx.store.transact(RNode.incrementNotFound(node.id))
if (seen) store.transact(RNode.resetNotFound(node.id))
else store.transact(RNode.incrementNotFound(node.id))
)
)
.compile
@ -67,9 +74,9 @@ object CheckNodesTask {
}
def removeNodes[F[_]](
ctx: Context[F, _],
store: Store[F],
cfg: HouseKeepingConfig.CheckNodes
): F[Int] =
ctx.store.transact(RNode.deleteNotFound(cfg.minNotFound))
store.transact(RNode.deleteNotFound(cfg.minNotFound))
}

View File

@ -10,22 +10,27 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._
object CleanupInvitesTask {
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupInvites): Task[F, Unit, Unit] =
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupInvites,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx =>
if (cfg.enabled)
for {
now <- Timestamp.current[F]
ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup invitations older than $ts")
n <- ctx.store.transact(RInvitation.deleteOlderThan(ts))
n <- store.transact(RInvitation.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n invitations")
} yield ()
} yield CleanupResult.of(n)
else
ctx.logger.info("CleanupInvites task is disabled in the configuration")
ctx.logger.info("CleanupInvites task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
}
}

View File

@ -11,24 +11,28 @@ import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._
object CleanupJobsTask {
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupJobs): Task[F, Unit, Unit] =
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupJobs,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx =>
if (cfg.enabled)
for {
now <- Timestamp.current[F]
ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup jobs older than $ts")
n <- deleteDoneJobs(ctx.store, ts, cfg.deleteBatch)
n <- deleteDoneJobs(store, ts, cfg.deleteBatch)
_ <- ctx.logger.info(s"Removed $n jobs")
} yield ()
} yield CleanupResult.of(n)
else
ctx.logger.info("CleanupJobs task is disabled in the configuration")
ctx.logger.info("CleanupJobs task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
}
def deleteDoneJobs[F[_]: Sync](store: Store[F], ts: Timestamp, batch: Int): F[Int] =

View File

@ -10,22 +10,26 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records._
object CleanupRememberMeTask {
def apply[F[_]: Sync](cfg: HouseKeepingConfig.CleanupRememberMe): Task[F, Unit, Unit] =
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupRememberMe,
store: Store[F]
): Task[F, Unit, CleanupResult] =
Task { ctx =>
if (cfg.enabled)
for {
now <- Timestamp.current[F]
ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup remember-me tokens older than $ts")
n <- ctx.store.transact(RRememberMe.deleteOlderThan(ts))
n <- store.transact(RRememberMe.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n tokens")
} yield ()
} yield CleanupResult.of(n)
else
ctx.logger.info("CleanupRememberMe task is disabled in the configuration")
ctx.logger.info("CleanupRememberMe task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
}
}

View File

@ -0,0 +1,21 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.hk
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
case class CleanupResult(removed: Int, disabled: Boolean) {
def asString = if (disabled) "disabled" else s"$removed"
}
object CleanupResult {
def of(n: Int): CleanupResult = CleanupResult(n, false)
def disabled: CleanupResult = CleanupResult(0, true)
implicit val jsonEncoder: Encoder[CleanupResult] =
deriveEncoder
}

View File

@ -16,7 +16,8 @@ case class HouseKeepingConfig(
cleanupInvites: CleanupInvites,
cleanupJobs: CleanupJobs,
cleanupRememberMe: CleanupRememberMe,
checkNodes: CheckNodes
checkNodes: CheckNodes,
integrityCheck: IntegrityCheck
)
object HouseKeepingConfig {
@ -29,4 +30,5 @@ object HouseKeepingConfig {
case class CheckNodes(enabled: Boolean, minNotFound: Int)
case class IntegrityCheck(enabled: Boolean)
}

View File

@ -9,41 +9,75 @@ package docspell.joex.hk
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.records._
import docspell.store.usertask.UserTaskScope
import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.scheduler.usertask.UserTask
import docspell.scheduler.{JobTaskResultEncoder, Task}
import docspell.store.Store
import com.github.eikek.calev._
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
object HouseKeepingTask {
private val periodicId = Ident.unsafe("docspell-houskeeping")
val taskName: Ident = Ident.unsafe("housekeeping")
def apply[F[_]: Async](cfg: Config): Task[F, Unit, Unit] =
def apply[F[_]: Async](
cfg: Config,
store: Store[F],
fileRepo: OFileRepository[F]
): Task[F, Unit, Result] = {
val combined =
(
CheckNodesTask(cfg.houseKeeping.checkNodes, store),
CleanupInvitesTask(cfg.houseKeeping.cleanupInvites, store),
CleanupJobsTask(cfg.houseKeeping.cleanupJobs, store),
CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe, store),
IntegrityCheckTask(cfg.houseKeeping.integrityCheck, store, fileRepo)
).mapN(Result.apply)
Task
.log[F, Unit](_.info(s"Running house-keeping task now"))
.flatMap(_ => CleanupInvitesTask(cfg.houseKeeping.cleanupInvites))
.flatMap(_ => CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe))
.flatMap(_ => CleanupJobsTask(cfg.houseKeeping.cleanupJobs))
.flatMap(_ => CheckNodesTask(cfg.houseKeeping.checkNodes))
.flatMap(_ => combined)
}
def onCancel[F[_]]: Task[F, Unit, Unit] =
Task.log[F, Unit](_.warn("Cancelling house-keeping task"))
def periodicTask[F[_]: Sync](ce: CalEvent): F[RPeriodicTask] =
RPeriodicTask
.createJson(
true,
UserTaskScope(DocspellSystem.taskGroup),
def periodicTask[F[_]: Sync](ce: CalEvent): F[UserTask[Unit]] =
UserTask(
periodicId,
taskName,
(),
"Docspell house-keeping",
Priority.Low,
true,
ce,
None
"Docspell house-keeping".some,
()
).pure[F]
case class Result(
checkNodes: CleanupResult,
cleanupInvites: CleanupResult,
cleanupJobs: CleanupResult,
cleanupRememberMe: CleanupResult,
integrityCheck: FileIntegrityCheckTask.Result
)
.map(_.copy(id = periodicId))
object Result {
implicit val jsonEncoder: Encoder[Result] =
deriveEncoder
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
JobTaskResultEncoder.fromJson[Result].withMessage { r =>
s"- Nodes removed: ${r.checkNodes.asString}\n" +
s"- Invites removed: ${r.cleanupInvites.asString}\n" +
s"- Jobs removed: ${r.cleanupJobs.asString}\n" +
s"- RememberMe removed: ${r.cleanupRememberMe.asString}\n" +
s"- Integrity check: ok=${r.integrityCheck.ok}, failed=${r.integrityCheck.failedKeys.size}, notFound=${r.integrityCheck.notFoundKeys.size}"
}
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.hk
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.common._
import docspell.joex.filecopy.FileIntegrityCheckTask
import docspell.scheduler.Task
import docspell.store.Store
object IntegrityCheckTask {
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.IntegrityCheck,
store: Store[F],
fileRepo: OFileRepository[F]
): Task[F, Unit, FileIntegrityCheckTask.Result] =
Task { ctx =>
if (cfg.enabled)
FileIntegrityCheckTask(fileRepo, store).run(
ctx.map(_ => FileIntegrityCheckArgs(FileKeyPart.Empty))
)
else
ctx.logger.info("Integrity check task is disabled in the configuration") *>
FileIntegrityCheckTask.Result.empty.pure[F]
}
}

View File

@ -14,8 +14,9 @@ import docspell.analysis.TextAnalyser
import docspell.backend.ops.OCollective
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler._
import docspell.logging.Logger
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.{RClassifierModel, RClassifierSetting}
object LearnClassifierTask {
@ -29,14 +30,16 @@ object LearnClassifierTask {
def apply[F[_]: Async](
cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
learnTags(cfg, analyser)
.flatMap(_ => learnItemEntities(cfg, analyser))
learnTags(cfg, store, analyser)
.flatMap(_ => learnItemEntities(cfg, store, analyser))
.flatMap(_ => Task(_ => Sync[F].delay(System.gc())))
private def learnItemEntities[F[_]: Async](
cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
Task { ctx =>
@ -44,6 +47,7 @@ object LearnClassifierTask {
LearnItemEntities
.learnAll(
analyser,
store,
ctx.args.collective,
cfg.classification.itemCount,
cfg.maxLength
@ -54,16 +58,17 @@ object LearnClassifierTask {
private def learnTags[F[_]: Async](
cfg: Config.TextAnalysis,
store: Store[F],
analyser: TextAnalyser[F]
): Task[F, Args, Unit] =
Task { ctx =>
val learnTags =
for {
sett <- findActiveSettings[F](ctx, cfg)
sett <- findActiveSettings[F](ctx, store, cfg)
maxItems = cfg.classification.itemCountOrWhenLower(sett.itemCount)
_ <- OptionT.liftF(
LearnTags
.learnAllTagCategories(analyser)(
.learnAllTagCategories(analyser, store)(
ctx.args.collective,
maxItems,
cfg.maxLength
@ -74,34 +79,38 @@ object LearnClassifierTask {
// learn classifier models from active tag categories
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
// delete classifier model files for categories that have been removed
clearObsoleteTagModels(ctx) *>
clearObsoleteTagModels(ctx, store) *>
// when tags are deleted, categories may get removed. fix the json array
ctx.store
store
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
.map(_ => ())
}
private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
private def clearObsoleteTagModels[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Unit] =
for {
list <- ctx.store.transact(
list <- store.transact(
ClassifierName.findOrphanTagModels(ctx.args.collective)
)
_ <- ctx.logger.info(
s"Found ${list.size} obsolete model files that are deleted now."
)
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id)))
n <- store.transact(RClassifierModel.deleteAll(list.map(_.id)))
_ <- list
.map(_.fileId)
.traverse(id => ctx.store.fileRepo.delete(id))
.traverse(id => store.fileRepo.delete(id))
_ <- ctx.logger.debug(s"Deleted $n model files.")
} yield ()
private def findActiveSettings[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis
): OptionT[F, OCollective.Classifier] =
if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective)))
OptionT(store.transact(RClassifierSetting.findById(ctx.args.collective)))
.filter(_.autoTagEnabled)
.map(OCollective.Classifier.fromRecord)
else

View File

@ -14,72 +14,81 @@ import fs2.Stream
import docspell.analysis.TextAnalyser
import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
object LearnItemEntities {
def learnAll[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
learnCorrOrg(analyser, collective, maxItems, maxTextLen)
.flatMap(_ => learnCorrPerson[F, A](analyser, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcPerson(analyser, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, collective, maxItems, maxTextLen))
learnCorrOrg[F, A](analyser, store, collective, maxItems, maxTextLen)
.flatMap(_ =>
learnCorrPerson[F, A](analyser, store, collective, maxItems, maxTextLen)
)
.flatMap(_ => learnConcPerson(analyser, store, collective, maxItems, maxTextLen))
.flatMap(_ => learnConcEquip(analyser, store, collective, maxItems, maxTextLen))
def learnCorrOrg[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
learn(analyser, collective)(
learn(store, analyser, collective)(
ClassifierName.correspondentOrg,
ctx => SelectItems.forCorrOrg(ctx.store, collective, maxItems, maxTextLen)
_ => SelectItems.forCorrOrg(store, collective, maxItems, maxTextLen)
)
def learnCorrPerson[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
learn(analyser, collective)(
learn(store, analyser, collective)(
ClassifierName.correspondentPerson,
ctx => SelectItems.forCorrPerson(ctx.store, collective, maxItems, maxTextLen)
_ => SelectItems.forCorrPerson(store, collective, maxItems, maxTextLen)
)
def learnConcPerson[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
learn(analyser, collective)(
learn(store, analyser, collective)(
ClassifierName.concernedPerson,
ctx => SelectItems.forConcPerson(ctx.store, collective, maxItems, maxTextLen)
_ => SelectItems.forConcPerson(store, collective, maxItems, maxTextLen)
)
def learnConcEquip[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
learn(analyser, collective)(
learn(store, analyser, collective)(
ClassifierName.concernedEquip,
ctx => SelectItems.forConcEquip(ctx.store, collective, maxItems, maxTextLen)
_ => SelectItems.forConcEquip(store, collective, maxItems, maxTextLen)
)
private def learn[F[_]: Async, A](
store: Store[F],
analyser: TextAnalyser[F],
collective: Ident
)(cname: ClassifierName, data: Context[F, _] => Stream[F, Data]): Task[F, A, Unit] =
Task { ctx =>
ctx.logger.info(s"Learn classifier ${cname.name}") *>
analyser.classifier.trainClassifier(ctx.logger, data(ctx))(
Kleisli(StoreClassifierModel.handleModel(ctx, collective, cname))
Kleisli(StoreClassifierModel.handleModel(store, ctx.logger, collective, cname))
)
}
}

View File

@ -12,13 +12,15 @@ import cats.implicits._
import docspell.analysis.TextAnalyser
import docspell.common._
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RClassifierSetting
object LearnTags {
def learnTagCategory[F[_]: Async, A](
analyser: TextAnalyser[F],
store: Store[F],
collective: Ident,
maxItems: Int,
maxTextLen: Int
@ -26,12 +28,14 @@ object LearnTags {
category: String
): Task[F, A, Unit] =
Task { ctx =>
val data = SelectItems.forCategory(ctx, collective)(maxItems, category, maxTextLen)
val data =
SelectItems.forCategory(store, collective)(maxItems, category, maxTextLen)
ctx.logger.info(s"Learn classifier for tag category: $category") *>
analyser.classifier.trainClassifier(ctx.logger, data)(
Kleisli(
StoreClassifierModel.handleModel(
ctx,
store,
ctx.logger,
collective,
ClassifierName.tagCategory(category)
)
@ -39,15 +43,15 @@ object LearnTags {
)
}
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F])(
def learnAllTagCategories[F[_]: Async, A](analyser: TextAnalyser[F], store: Store[F])(
collective: Ident,
maxItems: Int,
maxTextLen: Int
): Task[F, A, Unit] =
Task { ctx =>
for {
cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective))
task = learnTagCategory[F, A](analyser, collective, maxItems, maxTextLen) _
cats <- store.transact(RClassifierSetting.getActiveCategories(collective))
task = learnTagCategory[F, A](analyser, store, collective, maxItems, maxTextLen) _
_ <- cats.map(task).traverse(_.run(ctx))
} yield ()
}

View File

@ -10,7 +10,6 @@ import fs2.{Pipe, Stream}
import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.store.Store
import docspell.store.qb.Batch
import docspell.store.queries.{QItem, TextAndTag}
@ -21,16 +20,7 @@ object SelectItems {
val pageSep = LearnClassifierTask.pageSep
val noClass = LearnClassifierTask.noClass
def forCategory[F[_]](ctx: Context[F, _], collective: Ident)(
maxItems: Int,
category: String,
maxTextLen: Int
): Stream[F, Data] =
forCategory(ctx.store, collective, maxItems, category, maxTextLen)
def forCategory[F[_]](
store: Store[F],
collective: Ident,
def forCategory[F[_]](store: Store[F], collective: Ident)(
maxItems: Int,
category: String,
maxTextLen: Int

View File

@ -12,7 +12,6 @@ import fs2.io.file.Files
import docspell.analysis.classifier.ClassifierModel
import docspell.common._
import docspell.joex.scheduler._
import docspell.logging.Logger
import docspell.store.Store
import docspell.store.records.RClassifierModel
@ -20,21 +19,12 @@ import docspell.store.records.RClassifierModel
object StoreClassifierModel {
def handleModel[F[_]: Async](
ctx: Context[F, _],
store: Store[F],
logger: Logger[F],
collective: Ident,
modelName: ClassifierName
)(
trainedModel: ClassifierModel
): F[Unit] =
handleModel(ctx.store, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Async](
store: Store[F],
logger: Logger[F]
)(
collective: Ident,
modelName: ClassifierName,
trainedModel: ClassifierModel
): F[Unit] =
for {
oldFile <- store.transact(

View File

@ -70,7 +70,7 @@ object ReadMail {
HtmlBodyViewConfig.default.copy(
textToHtml = MarkdownBody.makeHtml(markdownCfg)
)
).map(makeHtmlBinary[F] _).map(b => Some(b))
).map(makeHtmlBinary[F]).map(b => Some(b))
}
for {

View File

@ -12,14 +12,15 @@ import cats.implicits._
import docspell.backend.ops.ONotification
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.notification.api.EventContext
import docspell.notification.api.NotificationChannel
import docspell.notification.api.PeriodicDueItemsArgs
import docspell.query.Date
import docspell.query.ItemQuery._
import docspell.query.ItemQueryDsl._
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.qb.Batch
import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query}
@ -32,11 +33,14 @@ object PeriodicDueItemsTask {
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] =
def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx =>
val limit = 7
Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items =>
withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -51,7 +55,12 @@ object PeriodicDueItemsTask {
): F[Unit] =
TaskOperations.withChannel(ctx.logger, ctx.args.channels, ctx.args.account, ops)(cont)
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)(
def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit]
): F[Unit] = {
val rightDate = Date((now + Duration.days(ctx.args.remindDays.toLong)).toMillis)
@ -77,7 +86,7 @@ object PeriodicDueItemsTask {
for {
res <-
ctx.store
store
.transact(
QItem
.findItems(q, now.toUtcDate, 0, Batch.limit(limit))

View File

@ -13,8 +13,6 @@ import cats.implicits._
import docspell.backend.ops.ONotification
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.notification.api.EventContext
import docspell.notification.api.NotificationChannel
import docspell.notification.api.PeriodicQueryArgs
@ -22,6 +20,9 @@ import docspell.query.ItemQuery
import docspell.query.ItemQuery.Expr
import docspell.query.ItemQuery.Expr.AndExpr
import docspell.query.ItemQueryParser
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.qb.Batch
import docspell.store.queries.ListItem
import docspell.store.queries.{QItem, Query}
@ -36,11 +37,14 @@ object PeriodicQueryTask {
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${taskName.id} task"))
def apply[F[_]: Sync](notificationOps: ONotification[F]): Task[F, Args, Unit] =
def apply[F[_]: Sync](
store: Store[F],
notificationOps: ONotification[F]
): Task[F, Args, Unit] =
Task { ctx =>
val limit = 7
Timestamp.current[F].flatMap { now =>
withItems(ctx, limit, now) { items =>
withItems(ctx, store, limit, now) { items =>
withEventContext(ctx, items, limit, now) { eventCtx =>
withChannel(ctx, notificationOps) { channels =>
notificationOps.sendMessage(ctx.logger, eventCtx, channels)
@ -58,9 +62,11 @@ object PeriodicQueryTask {
private def queryString(q: ItemQuery.Expr) =
ItemQueryParser.asString(q)
def withQuery[F[_]: Sync](ctx: Context[F, Args])(cont: Query => F[Unit]): F[Unit] = {
def withQuery[F[_]: Sync](ctx: Context[F, Args], store: Store[F])(
cont: Query => F[Unit]
): F[Unit] = {
def fromBookmark(id: String) =
ctx.store
store
.transact(RQueryBookmark.findByNameOrId(ctx.args.account, id))
.map(_.map(_.query))
.flatTap(q =>
@ -68,7 +74,7 @@ object PeriodicQueryTask {
)
def fromShare(id: String) =
ctx.store
store
.transact(RShare.findOneByCollective(ctx.args.account.collective, Some(true), id))
.map(_.map(_.query))
.flatTap(q =>
@ -120,11 +126,16 @@ object PeriodicQueryTask {
}
}
def withItems[F[_]: Sync](ctx: Context[F, Args], limit: Int, now: Timestamp)(
def withItems[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
limit: Int,
now: Timestamp
)(
cont: Vector[ListItem] => F[Unit]
): F[Unit] =
withQuery(ctx) { query =>
val items = ctx.store
withQuery(ctx, store) { query =>
val items = store
.transact(QItem.findItems(query, now.toUtcDate, 0, Batch.limit(limit)))
.compile
.to(Vector)

View File

@ -13,22 +13,24 @@ import fs2.{Chunk, Stream}
import docspell.backend.JobFactory
import docspell.backend.ops.OJoex
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.store.queue.JobQueue
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records.RJob
object AllPageCountTask {
val taskName = Ident.unsafe("all-page-count")
type Args = Unit
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] =
def apply[F[_]: Sync](
store: Store[F],
jobStore: JobStore[F],
joex: OJoex[F]
): Task[F, Args, Unit] =
Task { ctx =>
for {
_ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, queue)
n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes
} yield ()
@ -39,14 +41,15 @@ object AllPageCountTask {
def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args],
queue: JobQueue[F]
store: Store[F],
jobStore: JobStore[F]
): F[Int] =
ctx.store
store
.transact(findAttachments)
.chunks
.flatMap(createJobs[F])
.chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile
.foldMonoid
@ -54,28 +57,25 @@ object AllPageCountTask {
private def findAttachments[F[_]] =
RAttachment.findAllWithoutPageCount(50)
private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, RJob] = {
def mkJob(ra: RAttachment): F[RJob] =
private def createJobs[F[_]: Sync](ras: Chunk[RAttachment]): Stream[F, Job[String]] = {
def mkJob(ra: RAttachment): F[Job[MakePageCountArgs]] =
JobFactory.makePageCount(MakePageCountArgs(ra.id), None)
val jobs = ras.traverse(mkJob)
Stream.evalUnChunk(jobs)
Stream.evalUnChunk(jobs).map(_.encode)
}
def job[F[_]: Sync]: F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
def job[F[_]: Sync]: F[Job[String]] =
Job
.createNew(
AllPageCountTask.taskName,
DocspellSystem.taskGroup,
(),
"Create all page-counts",
now,
DocspellSystem.taskGroup,
Priority.Low,
Some(DocspellSystem.allPageCountTaskTracker)
)
.map(_.encode)
}

View File

@ -11,8 +11,9 @@ import cats.implicits._
import docspell.common._
import docspell.joex.process.AttachmentPageCount
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentMeta
@ -20,10 +21,10 @@ object MakePageCountTask {
type Args = MakePageCountArgs
def apply[F[_]: Sync](): Task[F, Args, Unit] =
def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx =>
for {
exists <- pageCountExists(ctx)
exists <- pageCountExists(ctx, store)
_ <-
if (exists)
ctx.logger.info(
@ -32,7 +33,7 @@ object MakePageCountTask {
else
ctx.logger.info(
s"Reading page-count for attachment ${ctx.args.attachment}"
) *> generatePageCount(ctx)
) *> generatePageCount(ctx, store)
} yield ()
}
@ -40,19 +41,20 @@ object MakePageCountTask {
Task.log(_.warn("Cancelling make-page-count task"))
private def generatePageCount[F[_]: Sync](
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
): F[Unit] =
for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra
.map(AttachmentPageCount.createPageCount(ctx))
.map(AttachmentPageCount.createPageCount(ctx, store))
.getOrElse(
ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}")
)
} yield ()
private def pageCountExists[F[_]](ctx: Context[F, Args]): F[Boolean] =
ctx.store.transact(
private def pageCountExists[F[_]](ctx: Context[F, Args], store: Store[F]): F[Boolean] =
store.transact(
RAttachmentMeta
.findPageCountById(ctx.args.attachment)
.map(_.exists(_ > 0))

View File

@ -12,10 +12,9 @@ import fs2.{Chunk, Stream}
import docspell.backend.ops.OJoex
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.queue.JobQueue
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records._
/* A task to find all non-converted pdf files (of a collective, or
* all) and converting them using ocrmypdf by submitting a job for
@ -24,11 +23,15 @@ import docspell.store.records._
object ConvertAllPdfTask {
type Args = ConvertAllPdfArgs
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] =
def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx =>
for {
_ <- ctx.logger.info("Converting pdfs using ocrmypdf")
n <- submitConversionJobs(ctx, queue)
n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n file conversion jobs")
_ <- joex.notifyAllNodes
} yield ()
@ -39,40 +42,36 @@ object ConvertAllPdfTask {
def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args],
queue: JobQueue[F]
store: Store[F],
jobStore: JobStore[F]
): F[Int] =
ctx.store
store
.transact(RAttachment.findNonConvertedPdf(ctx.args.collective, 50))
.chunks
.flatMap(createJobs[F](ctx))
.chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalMap(jobs => jobStore.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile
.foldMonoid
private def createJobs[F[_]: Sync](
ctx: Context[F, Args]
)(ras: Chunk[RAttachment]): Stream[F, RJob] = {
)(ras: Chunk[RAttachment]): Stream[F, Job[String]] = {
val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup)
def mkJob(ra: RAttachment): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
def mkJob(ra: RAttachment): F[Job[PdfConvTask.Args]] =
Job.createNew(
PdfConvTask.taskName,
collectiveOrSystem,
PdfConvTask.Args(ra.id),
s"Convert pdf ${ra.id.id}/${ra.name.getOrElse("-")}",
now,
collectiveOrSystem,
Priority.Low,
Some(PdfConvTask.taskName / ra.id)
)
val jobs = ras.traverse(mkJob)
Stream.evalUnChunk(jobs)
Stream.evalUnChunk(jobs).map(_.encode)
}
}

View File

@ -16,7 +16,8 @@ import docspell.common._
import docspell.convert.ConversionResult
import docspell.convert.extern.OcrMyPdf
import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._
import io.circe.generic.semiauto._
@ -36,12 +37,12 @@ object PdfConvTask {
val taskName = Ident.unsafe("pdf-files-migration")
def apply[F[_]: Async](cfg: Config): Task[F, Args, Unit] =
def apply[F[_]: Async](cfg: Config, store: Store[F]): Task[F, Args, Unit] =
Task { ctx =>
for {
_ <- ctx.logger.info(s"Converting pdf file ${ctx.args} using ocrmypdf")
meta <- checkInputs(cfg, ctx)
_ <- meta.traverse(fm => convert(cfg, ctx, fm))
meta <- checkInputs(cfg, ctx, store)
_ <- meta.traverse(fm => convert(cfg, ctx, store, fm))
} yield ()
}
@ -53,19 +54,20 @@ object PdfConvTask {
// check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled
def checkInputs[F[_]: Sync](
cfg: Config,
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
): F[Option[RFileMeta]] = {
val none: Option[RFileMeta] = None
val checkSameFiles =
(for {
ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId)))
ra <- OptionT(store.transact(RAttachment.findById(ctx.args.attachId)))
isSame <- OptionT.liftF(
ctx.store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId))
store.transact(RAttachmentSource.isSameFile(ra.id, ra.fileId))
)
} yield isSame).getOrElse(false)
val existsPdf =
for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
meta <- store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(MimeType.pdf))
_ <-
if (res.isEmpty)
@ -90,18 +92,19 @@ object PdfConvTask {
def convert[F[_]: Async](
cfg: Config,
ctx: Context[F, Args],
store: Store[F],
in: RFileMeta
): F[Unit] = {
val fs = ctx.store.fileRepo
val fs = store.fileRepo
val data = fs.getBytes(in.id)
val storeResult: ConversionResult.Handler[F, Unit] =
Kleisli {
case ConversionResult.SuccessPdf(file) =>
storeToAttachment(ctx, in, file)
storeToAttachment(ctx, store, in, file)
case ConversionResult.SuccessPdfTxt(file, _) =>
storeToAttachment(ctx, in, file)
storeToAttachment(ctx, store, in, file)
case ConversionResult.UnsupportedFormat(mime) =>
ctx.logger.warn(
@ -124,19 +127,20 @@ object PdfConvTask {
)(data, storeResult)
for {
lang <- getLanguage(ctx)
lang <- getLanguage(ctx, store)
_ <- ocrMyPdf(lang)
} yield ()
}
def getLanguage[F[_]: Sync](ctx: Context[F, Args]): F[Language] =
def getLanguage[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[Language] =
(for {
coll <- OptionT(ctx.store.transact(RCollective.findByAttachment(ctx.args.attachId)))
coll <- OptionT(store.transact(RCollective.findByAttachment(ctx.args.attachId)))
lang = coll.language
} yield lang).getOrElse(Language.German)
def storeToAttachment[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
meta: RFileMeta,
newFile: Stream[F, Byte]
): F[Unit] = {
@ -146,10 +150,10 @@ object PdfConvTask {
for {
fid <-
newFile
.through(ctx.store.fileRepo.save(collective, cat, mimeHint))
.through(store.fileRepo.save(collective, cat, mimeHint))
.compile
.lastOrError
_ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
_ <- store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
} yield ()
}
}

View File

@ -14,21 +14,23 @@ import docspell.backend.JobFactory
import docspell.backend.ops.OJoex
import docspell.common.MakePreviewArgs.StoreMode
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.store.queue.JobQueue
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records.RJob
object AllPreviewsTask {
type Args = AllPreviewsArgs
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] =
def apply[F[_]: Sync](
jobStore: JobStore[F],
joex: OJoex[F],
store: Store[F]
): Task[F, Args, Unit] =
Task { ctx =>
for {
_ <- ctx.logger.info("Generating previews for attachments")
n <- submitConversionJobs(ctx, queue)
n <- submitConversionJobs(ctx, store, jobStore)
_ <- ctx.logger.info(s"Submitted $n jobs")
_ <- joex.notifyAllNodes
} yield ()
@ -39,14 +41,17 @@ object AllPreviewsTask {
def submitConversionJobs[F[_]: Sync](
ctx: Context[F, Args],
queue: JobQueue[F]
store: Store[F],
jobStore: JobStore[F]
): F[Int] =
ctx.store
store
.transact(findAttachments(ctx))
.chunks
.flatMap(createJobs[F](ctx))
.chunks
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
.evalMap(jobs =>
jobStore.insertAllIfNew(jobs.map(_.encode).toVector).map(_ => jobs.size)
)
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
.compile
.foldMonoid
@ -61,13 +66,13 @@ object AllPreviewsTask {
private def createJobs[F[_]: Sync](
ctx: Context[F, Args]
)(ras: Chunk[RAttachment]): Stream[F, RJob] = {
)(ras: Chunk[RAttachment]): Stream[F, Job[MakePreviewArgs]] = {
val collectiveOrSystem = {
val cid = ctx.args.collective.getOrElse(DocspellSystem.taskGroup)
AccountId(cid, DocspellSystem.user)
}
def mkJob(ra: RAttachment): F[RJob] =
def mkJob(ra: RAttachment): F[Job[MakePreviewArgs]] =
JobFactory.makePreview(
MakePreviewArgs(ra.id, ctx.args.storeMode),
collectiveOrSystem.some
@ -77,7 +82,10 @@ object AllPreviewsTask {
Stream.evalUnChunk(jobs)
}
def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] =
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None)
def job[F[_]: Sync](
storeMode: MakePreviewArgs.StoreMode,
cid: Option[Ident]
): F[Job[String]] =
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None).map(_.encode)
}

View File

@ -13,8 +13,9 @@ import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.process.AttachmentPreview
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentPreview
@ -22,10 +23,10 @@ object MakePreviewTask {
type Args = MakePreviewArgs
def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] =
def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F]): Task[F, Args, Unit] =
Task { ctx =>
for {
exists <- previewExists(ctx)
exists <- previewExists(ctx, store)
preview <- PdfboxPreview(pcfg)
_ <-
if (exists)
@ -35,7 +36,7 @@ object MakePreviewTask {
else
ctx.logger.info(
s"Generating preview image for attachment ${ctx.args.attachment}"
) *> generatePreview(ctx, preview)
) *> generatePreview(ctx, store, preview)
} yield ()
}
@ -44,20 +45,24 @@ object MakePreviewTask {
private def generatePreview[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
preview: PdfboxPreview[F]
): F[Unit] =
for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
ra <- store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra
.map(AttachmentPreview.createPreview(ctx, preview))
.map(AttachmentPreview.createPreview(ctx, store, preview))
.getOrElse(
ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}")
)
} yield ()
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] =
private def previewExists[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[Boolean] =
if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing)
ctx.store.transact(
store.transact(
RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined)
)
else

View File

@ -15,7 +15,8 @@ import fs2.Stream
import docspell.common._
import docspell.extract.pdfbox.PdfMetaData
import docspell.extract.pdfbox.PdfboxExtract
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records.RAttachment
import docspell.store.records._
@ -24,7 +25,7 @@ import docspell.store.records._
*/
object AttachmentPageCount {
def apply[F[_]: Sync]()(
def apply[F[_]: Sync](store: Store[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -33,7 +34,7 @@ object AttachmentPageCount {
s"Retrieving page count for ${item.attachments.size} files…"
)
_ <- item.attachments
.traverse(createPageCount(ctx))
.traverse(createPageCount(ctx, store))
.attempt
.flatMap {
case Right(_) => ().pure[F]
@ -46,14 +47,15 @@ object AttachmentPageCount {
}
def createPageCount[F[_]: Sync](
ctx: Context[F, _]
ctx: Context[F, _],
store: Store[F]
)(ra: RAttachment): F[Option[PdfMetaData]] =
findMime[F](ctx)(ra).flatMap {
findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) =>
PdfboxExtract.getMetaData(loadFile(ctx)(ra)).flatMap {
PdfboxExtract.getMetaData(loadFile(store)(ra)).flatMap {
case Right(md) =>
ctx.logger.debug(s"Found number of pages: ${md.pageCount}") *>
updatePageCount(ctx, md, ra).map(_.some)
updatePageCount(ctx, store, md, ra).map(_.some)
case Left(ex) =>
ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *>
(None: Option[PdfMetaData]).pure[F]
@ -66,6 +68,7 @@ object AttachmentPageCount {
private def updatePageCount[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
md: PdfMetaData,
ra: RAttachment
): F[PdfMetaData] =
@ -73,12 +76,12 @@ object AttachmentPageCount {
_ <- ctx.logger.debug(
s"Update attachment ${ra.id.id} with page count ${md.pageCount.some}"
)
n <- ctx.store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some))
n <- store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some))
m <-
if (n == 0)
ctx.logger.warn(
s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
) *> ctx.store.transact(
) *> store.transact(
RAttachmentMeta.insert(
RAttachmentMeta(
ra.id,
@ -94,11 +97,11 @@ object AttachmentPageCount {
_ <- ctx.logger.debug(s"Stored page count (${n + m}).")
} yield md
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId)
def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
store.fileRepo.getBytes(ra.fileId)
}

View File

@ -15,7 +15,8 @@ import fs2.Stream
import docspell.common._
import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
import docspell.store.queries.QAttachment
import docspell.store.records.RAttachment
import docspell.store.records._
@ -26,7 +27,7 @@ import docspell.store.records._
*/
object AttachmentPreview {
def apply[F[_]: Sync](pcfg: PreviewConfig)(
def apply[F[_]: Sync](pcfg: PreviewConfig, store: Store[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -36,7 +37,7 @@ object AttachmentPreview {
)
preview <- PdfboxPreview(pcfg)
_ <- item.attachments
.traverse(createPreview(ctx, preview))
.traverse(createPreview(ctx, store, preview))
.attempt
.flatMap {
case Right(_) => ().pure[F]
@ -50,16 +51,17 @@ object AttachmentPreview {
def createPreview[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
preview: PdfboxPreview[F]
)(
ra: RAttachment
): F[Option[RAttachmentPreview]] =
findMime[F](ctx)(ra).flatMap {
findMime[F](store)(ra).flatMap {
case MimeType.PdfMatch(_) =>
preview.previewPNG(loadFile(ctx)(ra)).flatMap {
preview.previewPNG(loadFile(store)(ra)).flatMap {
case Some(out) =>
ctx.logger.debug("Preview generated, saving to database…") *>
createRecord(ctx, ra.fileId.collective, out, ra).map(_.some)
createRecord(store, ra.fileId.collective, out, ra).map(_.some)
case None =>
ctx.logger
.info(s"Preview could not be generated. Maybe the pdf has no pages?") *>
@ -72,7 +74,7 @@ object AttachmentPreview {
}
private def createRecord[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
collective: Ident,
png: Stream[F, Byte],
ra: RAttachment
@ -83,7 +85,7 @@ object AttachmentPreview {
for {
fileId <- png
.through(
ctx.store.fileRepo.save(
store.fileRepo.save(
collective,
FileCategory.PreviewImage,
MimeTypeHint(name.map(_.fullName), Some("image/png"))
@ -93,16 +95,16 @@ object AttachmentPreview {
.lastOrError
now <- Timestamp.current[F]
rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now)
_ <- QAttachment.deletePreview(ctx.store)(ra.id)
_ <- ctx.store.transact(RAttachmentPreview.insert(rp))
_ <- QAttachment.deletePreview(store)(ra.id)
_ <- store.transact(RAttachmentPreview.insert(rp))
} yield rp
}
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
ctx.store.fileRepo.getBytes(ra.fileId)
def loadFile[F[_]](store: Store[F])(ra: RAttachment): Stream[F, Byte] =
store.fileRepo.getBytes(ra.fileId)
}

View File

@ -17,7 +17,8 @@ import docspell.convert.ConversionResult.Handler
import docspell.convert.SanitizeHtml
import docspell.convert._
import docspell.joex.extract.JsoupSanitizer
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._
/** Goes through all attachments and creates a PDF version of it where supported.
@ -36,21 +37,22 @@ object ConvertPdf {
def apply[F[_]: Async](
cfg: ConvertConfig,
store: Store[F],
item: ItemData
): Task[F, Args, ItemData] =
Task { ctx =>
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
isConverted(ctx)(ra).flatMap {
isConverted(store)(ra).flatMap {
case true if ctx.args.isNormalProcessing =>
ctx.logger.info(
s"Conversion to pdf already done for attachment ${ra.name}."
) *>
ctx.store
store
.transact(RAttachmentMeta.findById(ra.id))
.map(rmOpt => (ra, rmOpt))
case _ =>
findMime(ctx)(ra).flatMap(m =>
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
findMime(store)(ra).flatMap(m =>
convertSafe(cfg, JsoupSanitizer.clean, ctx, store, item)(ra, m)
)
}
@ -62,13 +64,15 @@ object ConvertPdf {
}
def isConverted[F[_]](ctx: Context[F, Args])(
def isConverted[F[_]](store: Store[F])(
ra: RAttachment
): F[Boolean] =
ctx.store.transact(RAttachmentSource.isConverted(ra.id))
store.transact(RAttachmentSource.isConverted(ra.id))
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
def findMime[F[_]: Functor](store: Store[F])(
ra: RAttachment
): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(MimeType.octetStream)
@ -76,14 +80,15 @@ object ConvertPdf {
cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml,
ctx: Context[F, Args],
store: Store[F],
item: ItemData
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
loadCollectivePasswords(ctx).flatMap(collPass =>
loadCollectivePasswords(ctx, store).flatMap(collPass =>
Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
mime match {
case mt =>
val data = ctx.store.fileRepo.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item)
val data = store.fileRepo.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, store, cfg, ra, item)
ctx.logger
.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
@ -94,14 +99,16 @@ object ConvertPdf {
)
private def loadCollectivePasswords[F[_]: Async](
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
): F[List[Password]] =
ctx.store
store
.transact(RCollectivePassword.findAll(ctx.args.meta.collective))
.map(_.map(_.password).distinct)
private def conversionHandler[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig,
ra: RAttachment,
item: ItemData
@ -109,12 +116,12 @@ object ConvertPdf {
Kleisli {
case ConversionResult.SuccessPdf(pdf) =>
ctx.logger.info(s"Conversion to pdf successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf)
storePDF(ctx, store, cfg, ra, pdf)
.map(r => (r, None))
case ConversionResult.SuccessPdfTxt(pdf, txt) =>
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
storePDF(ctx, cfg, ra, pdf)
storePDF(ctx, store, cfg, ra, pdf)
.flatMap(r =>
txt.map(t =>
(
@ -148,6 +155,7 @@ object ConvertPdf {
private def storePDF[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
cfg: ConvertConfig,
ra: RAttachment,
pdf: Stream[F, Byte]
@ -162,7 +170,7 @@ object ConvertPdf {
pdf
.through(
ctx.store.fileRepo.save(
store.fileRepo.save(
ctx.args.meta.collective,
FileCategory.AttachmentConvert,
MimeTypeHint(hint.filename, hint.advertised)
@ -170,32 +178,33 @@ object ConvertPdf {
)
.compile
.lastOrError
.flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId))
.flatMap(fmId => updateAttachment[F](ctx, store, ra, fmId, newName).map(_ => fmId))
.map(fmId => ra.copy(fileId = fmId, name = newName))
}
private def updateAttachment[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
ra: RAttachment,
fmId: FileKey,
newName: Option[String]
): F[Unit] =
for {
oldFile <- ctx.store.transact(RAttachment.findById(ra.id))
oldFile <- store.transact(RAttachment.findById(ra.id))
_ <-
ctx.store
store
.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName))
_ <- oldFile match {
case Some(raPrev) =>
for {
sameFile <-
ctx.store
store
.transact(RAttachmentSource.isSameFile(ra.id, raPrev.fileId))
_ <-
if (sameFile) ().pure[F]
else
ctx.logger.info("Deleting previous attachment file") *>
ctx.store.fileRepo
store.fileRepo
.delete(raPrev.fileId)
.attempt
.flatMap {

View File

@ -13,7 +13,8 @@ import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.file.FileMetadata
import docspell.store.queries.QItem
import docspell.store.records._
@ -21,13 +22,13 @@ import docspell.store.records._
/** Task that creates the item. */
object CreateItem {
def apply[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
findExisting[F].flatMap {
def apply[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
findExisting[F](store).flatMap {
case Some(ri) => Task.pure(ri)
case None => createNew[F]
case None => createNew[F](store)
}
def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
def createNew[F[_]: Sync](store: Store[F]): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
def isValidFile(fm: FileMetadata) =
ctx.args.meta.validFileTypes.isEmpty ||
@ -36,11 +37,11 @@ object CreateItem {
def fileMetas(itemId: Ident, now: Timestamp) =
Stream
.eval(ctx.store.transact(RAttachment.nextPosition(itemId)))
.eval(store.transact(RAttachment.nextPosition(itemId)))
.flatMap { offset =>
Stream
.emits(ctx.args.files)
.evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm)))
.evalMap(f => store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm)))
.collect { case (f, Some(fm)) if isValidFile(fm) => f }
.zipWithIndex
.evalMap { case (f, index) =>
@ -67,11 +68,11 @@ object CreateItem {
(for {
_ <- OptionT.liftF(
ctx.logger.info(
s"Loading item with id ${id.id} to ammend"
s"Loading item with id ${id.id} to amend"
)
)
item <- OptionT(
ctx.store
store
.transact(RItem.findByIdAndCollective(id, ctx.args.meta.collective))
)
} yield (1, item))
@ -88,7 +89,7 @@ object CreateItem {
ctx.args.meta.direction.getOrElse(Direction.Incoming),
ItemState.Premature
)
n <- ctx.store.transact(RItem.insert(item))
n <- store.transact(RItem.insert(item))
} yield (n, item)
}
@ -98,7 +99,7 @@ object CreateItem {
_ <- if (it._1 != 1) storeItemError[F](ctx) else ().pure[F]
now <- Timestamp.current[F]
fm <- fileMetas(it._2.id, now)
k <- fm.traverse(insertAttachment(ctx))
k <- fm.traverse(insertAttachment(store))
_ <- logDifferences(ctx, fm, k.sum)
dur <- time
_ <- ctx.logger.info(s"Creating item finished in ${dur.formatExact}")
@ -115,25 +116,27 @@ object CreateItem {
)
}
def insertAttachment[F[_]](ctx: Context[F, _])(ra: RAttachment): F[Int] = {
def insertAttachment[F[_]](store: Store[F])(ra: RAttachment): F[Int] = {
val rs = RAttachmentSource.of(ra)
ctx.store.transact(for {
store.transact(for {
n <- RAttachment.insert(ra)
_ <- RAttachmentSource.insert(rs)
} yield n)
}
private def findExisting[F[_]: Sync]: Task[F, ProcessItemArgs, Option[ItemData]] =
private def findExisting[F[_]: Sync](
store: Store[F]
): Task[F, ProcessItemArgs, Option[ItemData]] =
Task { ctx =>
val states = ItemState.invalidStates
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
for {
cand <- ctx.store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states))
cand <- store.transact(QItem.findByFileIds(fileMetaIds.toSeq, states))
_ <-
if (cand.nonEmpty)
ctx.logger.warn(s"Found ${cand.size} existing item with these files.")
else ().pure[F]
ht <- cand.drop(1).traverse(ri => QItem.delete(ctx.store)(ri.id, ri.cid))
ht <- cand.drop(1).traverse(ri => QItem.delete(store)(ri.id, ri.cid))
_ <-
if (ht.sum > 0)
ctx.logger.warn(s"Removed ${ht.sum} items with same attachments")
@ -144,7 +147,7 @@ object CreateItem {
OptionT(
// load attachments but only those mentioned in the task's arguments
cand.headOption.traverse(ri =>
ctx.store
store
.transact(RAttachment.findByItemCollectiveSource(ri.id, ri.cid, fids))
.flatTap(ats =>
ctx.logger.debug(
@ -156,7 +159,7 @@ object CreateItem {
)
.getOrElse(Vector.empty)
orig <- rms.traverse(a =>
ctx.store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s))
store.transact(RAttachmentSource.findById(a.id)).map(s => (a, s))
)
origMap =
orig

View File

@ -12,8 +12,9 @@ import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.logging.Logger
import docspell.scheduler.Task
import docspell.store.Store
/** After candidates have been determined, the set is reduced by doing some cross checks.
* For example: if a organization is suggested as correspondent, the correspondent person
@ -22,13 +23,15 @@ import docspell.logging.Logger
*/
object CrossCheckProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val proposals = data.finalProposals
val corrOrg = proposals.find(MetaProposalType.CorrOrg)
(for {
orgRef <- OptionT.fromOption[F](corrOrg)
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx))
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, store))
clProps <- OptionT.liftF(
personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef)
)
@ -53,7 +56,7 @@ object CrossCheckProposals {
mpl.find(MetaProposalType.CorrPerson) match {
case Some(ppl) =>
val list = ppl.values.filter(c =>
persRefs.get(c.ref.id).exists(_.organization == Some(orgId))
persRefs.get(c.ref.id).exists(_.organization.contains(orgId))
)
if (ppl.values.toList == list) mpl.pure[F]

View File

@ -10,7 +10,8 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.queries.QItem
import docspell.store.records.RFileMeta
import docspell.store.records.RJob
@ -20,46 +21,52 @@ import doobie._
object DuplicateCheck {
type Args = ProcessItemArgs
def apply[F[_]: Sync]: Task[F, Args, Args] =
def apply[F[_]: Sync](store: Store[F]): Task[F, Args, Args] =
Task { ctx =>
if (ctx.args.meta.skipDuplicate)
for {
retries <- getRetryCount(ctx)
retries <- getRetryCount(ctx, store)
res <-
if (retries == 0)
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx)
ctx.logger
.debug("Checking for duplicate files") *> removeDuplicates(ctx, store)
else ctx.args.pure[F]
} yield res
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
}
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] =
def removeDuplicates[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F]
): F[ProcessItemArgs] =
for {
fileMetas <- findDuplicates(ctx)
_ <- fileMetas.traverse(deleteDuplicate(ctx))
fileMetas <- findDuplicates(ctx, store)
_ <- fileMetas.traverse(deleteDuplicate(ctx, store))
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
} yield ctx.args.copy(files =
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId))
)
private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] =
ctx.store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0))
private def getRetryCount[F[_]: Sync](ctx: Context[F, _], store: Store[F]): F[Int] =
store.transact(RJob.getRetries(ctx.jobId)).map(_.getOrElse(0))
private def deleteDuplicate[F[_]: Sync](
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
)(fd: FileMetaDupes): F[Unit] = {
val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name)
if (fd.exists)
ctx.logger
.info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo
.info(s"Deleting duplicate file $fname!") *> store.fileRepo
.delete(fd.fm.id)
else ().pure[F]
}
private def findDuplicates[F[_]](
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
): F[Vector[FileMetaDupes]] =
ctx.store.transact(for {
store.transact(for {
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
dupes <- fileMetas.traverse(checkDuplicate(ctx))
} yield dupes)

View File

@ -12,25 +12,28 @@ import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RPerson}
/** Calculate weights for candidates that adds the most likely candidate a lower number.
*/
object EvalProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { _ =>
for {
now <- Timestamp.current[F]
personRefs <- findOrganizationRelation[F](data, ctx)
personRefs <- findOrganizationRelation[F](data, store)
metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs))
} yield data.copy(metas = metas)
}
def findOrganizationRelation[F[_]: Sync](
data: ItemData,
ctx: Context[F, _]
store: Store[F]
): F[Map[Ident, PersonRef]] = {
val corrPersIds = data.metas
.map(_.proposals)
@ -38,7 +41,7 @@ object EvalProposals {
.flatMap(_.find(MetaProposalType.CorrPerson))
.flatMap(_.values.toList.map(_.ref.id))
.toSet
ctx.store
store
.transact(RPerson.findOrganization(corrPersIds))
.map(_.map(p => (p.id, p)).toMap)
}

View File

@ -18,7 +18,8 @@ import fs2.Stream
import docspell.common._
import docspell.files.Zip
import docspell.joex.mail._
import docspell.joex.scheduler._
import docspell.scheduler._
import docspell.store.Store
import docspell.store.records._
import emil.Mail
@ -34,39 +35,41 @@ import emil.Mail
object ExtractArchive {
type Args = ProcessItemArgs
def apply[F[_]: Async](
def apply[F[_]: Async](store: Store[F])(
item: ItemData
): Task[F, Args, ItemData] =
multiPass(item, None).map(_._2)
multiPass(store, item, None).map(_._2)
def multiPass[F[_]: Async](
store: Store[F],
item: ItemData,
archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
singlePass(item, archive).flatMap { t =>
singlePass(store, item, archive).flatMap { t =>
if (t._1.isEmpty) Task.pure(t)
else multiPass(t._2, t._1)
else multiPass(store, t._2, t._1)
}
def singlePass[F[_]: Async](
store: Store[F],
item: ItemData,
archive: Option[RAttachmentArchive]
): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
Task { ctx =>
def extract(ra: RAttachment, pos: Int): F[Extracted] =
findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m))
findMime(store)(ra).flatMap(m => extractSafe(ctx, store, archive)(ra, pos, m))
for {
lastPos <- ctx.store.transact(RAttachment.nextPosition(item.item.id))
lastPos <- store.transact(RAttachment.nextPosition(item.item.id))
extracts <-
item.attachments.zipWithIndex
.traverse(t => extract(t._1, lastPos + t._2))
.map(Monoid[Extracted].combineAll)
.map(fixPositions)
nra = extracts.files
_ <- extracts.files.traverse(storeAttachment(ctx))
_ <- extracts.files.traverse(storeAttachment(store))
naa = extracts.archives
_ <- naa.traverse(storeArchive(ctx))
_ <- naa.traverse(storeArchive(store))
} yield naa.headOption -> item.copy(
attachments = nra,
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap,
@ -83,25 +86,26 @@ object ExtractArchive {
if (extract.archives.isEmpty) extract
else extract.updatePositions
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
def findMime[F[_]: Functor](store: Store[F])(ra: RAttachment): F[MimeType] =
OptionT(store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(MimeType.octetStream)
def extractSafe[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] =
mime match {
case MimeType.ZipMatch(_) if ra.name.exists(_.toLowerCase.endsWith(".zip")) =>
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
extractZip(ctx, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive))
extractZip(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, store, ra, archive))
case MimeType.EmailMatch(_) =>
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
extractMail(ctx, archive)(ra, pos)
.flatMap(cleanupParents(ctx, ra, archive))
extractMail(ctx, store, archive)(ra, pos)
.flatMap(cleanupParents(ctx, store, ra, archive))
case _ =>
ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
@ -110,6 +114,7 @@ object ExtractArchive {
def cleanupParents[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
ra: RAttachment,
archive: Option[RAttachmentArchive]
)(extracted: Extracted): F[Extracted] =
@ -119,30 +124,31 @@ object ExtractArchive {
_ <- ctx.logger.debug(
s"Extracted inner attachment ${ra.name}. Remove it completely."
)
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
_ <- ctx.store.transact(RAttachment.delete(ra.id))
_ <- ctx.store.fileRepo.delete(ra.fileId)
_ <- store.transact(RAttachmentArchive.delete(ra.id))
_ <- store.transact(RAttachment.delete(ra.id))
_ <- store.fileRepo.delete(ra.fileId)
} yield extracted
case None =>
for {
_ <- ctx.logger.debug(
s"Extracted attachment ${ra.name}. Remove it from the item."
)
_ <- ctx.store.transact(RAttachment.delete(ra.id))
_ <- store.transact(RAttachment.delete(ra.id))
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
}
def extractZip[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
val zipData = ctx.store.fileRepo.getBytes(ra.fileId)
val zipData = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
zipData
.through(Zip.unzipP[F](8192, glob))
.zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, None))
.flatMap(handleEntry(ctx, store, ra, pos, archive, None))
.foldMonoid
.compile
.lastOrError
@ -150,9 +156,10 @@ object ExtractArchive {
def extractMail[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId)
val email: Stream[F, Byte] = store.fileRepo.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
@ -170,7 +177,9 @@ object ExtractArchive {
ReadMail
.mailToEntries(ctx.logger, glob, attachOnly)(mail)
.zipWithIndex
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
.flatMap(handleEntry(ctx, store, ra, pos, archive, mId)) ++ Stream.eval(
givenMeta
)
}
.foldMonoid
.compile
@ -185,6 +194,7 @@ object ExtractArchive {
def handleEntry[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
ra: RAttachment,
pos: Int,
archive: Option[RAttachmentArchive],
@ -195,7 +205,7 @@ object ExtractArchive {
val (entry, subPos) = tentry
val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileId = entry.data.through(
ctx.store.fileRepo
store.fileRepo
.save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint)
)
@ -217,16 +227,16 @@ object ExtractArchive {
}
def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = {
val insert = CreateItem.insertAttachment(ctx)(ra)
def storeAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] = {
val insert = CreateItem.insertAttachment(store)(ra)
for {
n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position))
n1 <- store.transact(RAttachment.updatePosition(ra.id, ra.position))
n2 <- if (n1 > 0) 0.pure[F] else insert
} yield n1 + n2
}
def storeArchive[F[_]](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] =
ctx.store.transact(RAttachmentArchive.insert(aa))
def storeArchive[F[_]](store: Store[F])(aa: RAttachmentArchive): F[Int] =
store.transact(RAttachmentArchive.insert(aa))
case class Extracted(
files: Vector[RAttachment],

View File

@ -16,7 +16,8 @@ import docspell.analysis.contact._
import docspell.common.MetaProposal.Candidate
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records._
/** Super simple approach to find corresponding meta data to an item by looking up values
@ -26,7 +27,8 @@ object FindProposal {
type Args = ProcessItemArgs
def apply[F[_]: Sync](
cfg: Config.TextAnalysis
cfg: Config.TextAnalysis,
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] =
Task { ctx =>
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
@ -34,15 +36,16 @@ object FindProposal {
_ <- ctx.logger.info("Starting find-proposal")
rmv <- rmas
.traverse(rm =>
processAttachment(cfg, rm, data.findDates(rm), ctx)
processAttachment(cfg, rm, data.findDates(rm), ctx, store)
.map(ml => rm.copy(proposals = ml))
)
clp <- lookupClassifierProposals(ctx, data.classifyProposals)
clp <- lookupClassifierProposals(ctx, store, data.classifyProposals)
} yield data.copy(metas = rmv, classifyProposals = clp)
}
def lookupClassifierProposals[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
mpList: MetaProposalList
): F[MetaProposalList] = {
val coll = ctx.args.meta.collective
@ -50,7 +53,7 @@ object FindProposal {
def lookup(mp: MetaProposal): F[Option[IdRef]] =
mp.proposalType match {
case MetaProposalType.CorrOrg =>
ctx.store
store
.transact(
ROrganization
.findLike(coll, mp.values.head.ref.name.toLowerCase, OrgUse.notDisabled)
@ -60,7 +63,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier organization for $mp: $oref")
)
case MetaProposalType.CorrPerson =>
ctx.store
store
.transact(
RPerson
.findLike(
@ -74,7 +77,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier corr-person for $mp: $oref")
)
case MetaProposalType.ConcPerson =>
ctx.store
store
.transact(
RPerson
.findLike(
@ -88,7 +91,7 @@ object FindProposal {
ctx.logger.debug(s"Found classifier conc-person for $mp: $oref")
)
case MetaProposalType.ConcEquip =>
ctx.store
store
.transact(
REquipment
.findLike(
@ -123,9 +126,10 @@ object FindProposal {
cfg: Config.TextAnalysis,
rm: RAttachmentMeta,
rd: Vector[NerDateLabel],
ctx: Context[F, ProcessItemArgs]
ctx: Context[F, Args],
store: Store[F]
): F[MetaProposalList] = {
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx))
val finder = Finder.searchExact(ctx, store).next(Finder.searchFuzzy(ctx, store))
List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd))
.traverse(identity)
.map(MetaProposalList.flatten)
@ -215,19 +219,24 @@ object FindProposal {
def unit[F[_]: Applicative](value: MetaProposalList): Finder[F] =
_ => value.pure[F]
def searchExact[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] =
def searchExact[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels =>
labels.toList.traverse(nl => search(nl, true, ctx)).map(MetaProposalList.flatten)
labels.toList
.traverse(nl => search(nl, true, ctx, store))
.map(MetaProposalList.flatten)
def searchFuzzy[F[_]: Sync](ctx: Context[F, ProcessItemArgs]): Finder[F] =
def searchFuzzy[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): Finder[F] =
labels =>
labels.toList.traverse(nl => search(nl, false, ctx)).map(MetaProposalList.flatten)
labels.toList
.traverse(nl => search(nl, false, ctx, store))
.map(MetaProposalList.flatten)
}
private def search[F[_]: Sync](
nt: NerLabel,
exact: Boolean,
ctx: Context[F, ProcessItemArgs]
ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = {
val value =
if (exact) normalizeSearchValue(nt.label)
@ -243,7 +252,7 @@ object FindProposal {
nt.tag match {
case NerTag.Organization =>
ctx.logger.debug(s"Looking for organizations: $value") *>
ctx.store
store
.transact(
ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -251,20 +260,20 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
case NerTag.Person =>
val s1 = ctx.store
val s1 = store
.transact(
RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.concerningAndBoth)
)
.map(MetaProposalList.from(MetaProposalType.ConcPerson, nt))
val s2 = ctx.store
val s2 = store
.transact(
RPerson
.findLike(ctx.args.meta.collective, value, PersonUse.correspondentAndBoth)
)
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val s3 =
ctx.store
store
.transact(
ROrganization
.findLike(ctx.args.meta.collective, value, OrgUse.notDisabled)
@ -283,7 +292,7 @@ object FindProposal {
case NerTag.Misc =>
ctx.logger.debug(s"Looking for equipments: $value") *>
ctx.store
store
.transact(
REquipment
.findLike(ctx.args.meta.collective, value, EquipmentUse.notDisabled)
@ -291,7 +300,7 @@ object FindProposal {
.map(MetaProposalList.from(MetaProposalType.ConcEquip, nt))
case NerTag.Email =>
searchContact(nt, ContactKind.Email, value, ctx)
searchContact(nt, ContactKind.Email, value, ctx, store)
case NerTag.Website =>
if (!exact) {
@ -301,9 +310,9 @@ object FindProposal {
.map(_.toPrimaryDomain.asString)
.map(s => s"%$s%")
.getOrElse(value)
searchContact(nt, ContactKind.Website, searchString, ctx)
searchContact(nt, ContactKind.Website, searchString, ctx, store)
} else
searchContact(nt, ContactKind.Website, value, ctx)
searchContact(nt, ContactKind.Website, value, ctx, store)
case NerTag.Date =>
// There is no database search required for this tag
@ -315,18 +324,19 @@ object FindProposal {
nt: NerLabel,
kind: ContactKind,
value: String,
ctx: Context[F, ProcessItemArgs]
ctx: Context[F, ProcessItemArgs],
store: Store[F]
): F[MetaProposalList] = {
val orgs = ctx.store
val orgs = store
.transact(ROrganization.findLike(ctx.args.meta.collective, kind, value))
.map(MetaProposalList.from(MetaProposalType.CorrOrg, nt))
val corrP = ctx.store
val corrP = store
.transact(
RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.correspondentAndBoth)
)
.map(MetaProposalList.from(MetaProposalType.CorrPerson, nt))
val concP = ctx.store
val concP = store
.transact(
RPerson
.findLike(ctx.args.meta.collective, kind, value, PersonUse.concerningAndBoth)

View File

@ -8,6 +8,7 @@ package docspell.joex.process
import docspell.common._
import docspell.joex.process.ItemData.AttachmentDates
import docspell.scheduler.JobTaskResultEncoder
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
import io.circe.syntax.EncoderOps
@ -118,7 +119,28 @@ object ItemData {
)
.asJson,
"tags" -> data.tags.asJson,
"assumedTags" -> data.classifyTags.asJson
"assumedTags" -> data.classifyTags.asJson,
"assumedCorrOrg" -> data.finalProposals
.find(MetaProposalType.CorrOrg)
.map(_.values.head.ref)
.asJson
)
}
implicit val jobTaskResultEncoder: JobTaskResultEncoder[ItemData] =
JobTaskResultEncoder.fromJson[ItemData].withMessage { data =>
val tags =
if (data.tags.isEmpty && data.classifyTags.isEmpty) ""
else (data.tags ++ data.classifyTags).mkString("[", ", ", "]")
val corg =
data.finalProposals.find(MetaProposalType.CorrOrg).map(_.values.head.ref.name)
val cpers =
data.finalProposals.find(MetaProposalType.CorrPerson).map(_.values.head.ref.name)
val org = corg match {
case Some(o) => s" by $o" + cpers.map(p => s"/$p").getOrElse("")
case None => cpers.map(p => s" by $p").getOrElse("")
}
s"Processed '${data.item.name}' $tags$org"
}
}

View File

@ -17,59 +17,62 @@ import docspell.common.{ItemState, ProcessItemArgs}
import docspell.ftsclient.FtsClient
import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem
import docspell.store.records.RItem
object ItemHandler {
type Args = ProcessItemArgs
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
logWarn("Now cancelling.").flatMap(_ =>
markItemCreated.flatMap {
def onCancel[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
logWarn[F]("Now cancelling.").flatMap(_ =>
markItemCreated(store).flatMap {
case true =>
Task.pure(())
case false =>
deleteByFileIds[F].flatMap(_ => deleteFiles)
deleteByFileIds[F](store).flatMap(_ => deleteFiles(store))
}
)
def newItem[F[_]: Async](
cfg: Config,
store: Store[F],
itemOps: OItem[F],
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
): Task[F, Args, Option[ItemData]] =
logBeginning.flatMap(_ =>
DuplicateCheck[F]
logBeginning[F].flatMap(_ =>
DuplicateCheck[F](store)
.flatMap(args =>
if (args.files.isEmpty) logNoFiles.map(_ => None)
if (args.files.isEmpty) logNoFiles[F].map(_ => None)
else {
val create: Task[F, Args, ItemData] =
CreateItem[F].contramap(_ => args.pure[F])
CreateItem[F](store).contramap(_ => args.pure[F])
create
.flatMap(itemStateTask(ItemState.Processing))
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
.flatMap(itemStateTask(store, ItemState.Processing))
.flatMap(safeProcess[F](cfg, store, itemOps, fts, analyser, regexNer))
.map(_.some)
}
)
)
def itemStateTask[F[_]: Sync, A](
state: ItemState
)(data: ItemData): Task[F, A, ItemData] =
Task(ctx =>
ctx.store
def itemStateTask[F[_]: Sync, A](store: Store[F], state: ItemState)(
data: ItemData
): Task[F, A, ItemData] =
Task(_ =>
store
.transact(RItem.updateState(data.item.id, state, ItemState.invalidStates))
.map(_ => data)
)
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
def isLastRetry[F[_]]: Task[F, Args, Boolean] =
Task(_.isLastRetry)
def safeProcess[F[_]: Async](
cfg: Config,
store: Store[F],
itemOps: OItem[F],
fts: FtsClient[F],
analyser: TextAnalyser[F],
@ -77,30 +80,31 @@ object ItemHandler {
)(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap {
case true =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data).attempt.flatMap {
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data).attempt
.flatMap {
case Right(d) =>
Task.pure(d)
case Left(ex) =>
logWarn[F](
"Processing failed on last retry. Creating item but without proposals."
).flatMap(_ => itemStateTask(ItemState.Created)(data))
).flatMap(_ => itemStateTask(store, ItemState.Created)(data))
.andThen(_ => Sync[F].raiseError(ex))
}
case false =>
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer)(data)
.flatMap(itemStateTask(ItemState.Created))
ProcessItem[F](cfg, itemOps, fts, analyser, regexNer, store)(data)
.flatMap(itemStateTask(store, ItemState.Created))
}
private def markItemCreated[F[_]: Sync]: Task[F, Args, Boolean] =
private def markItemCreated[F[_]: Sync](store: Store[F]): Task[F, Args, Boolean] =
Task { ctx =>
val fileMetaIds = ctx.args.files.map(_.fileMetaId).toSet
(for {
item <- OptionT(ctx.store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq)))
item <- OptionT(store.transact(QItem.findOneByFileIds(fileMetaIds.toSeq)))
_ <- OptionT.liftF(
ctx.logger.info("Processing cancelled. Marking item as created anyways.")
)
_ <- OptionT.liftF(
ctx.store
store
.transact(
RItem.updateState(item.id, ItemState.Created, ItemState.invalidStates)
)
@ -111,11 +115,11 @@ object ItemHandler {
)
}
private def deleteByFileIds[F[_]: Sync]: Task[F, Args, Unit] =
private def deleteByFileIds[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task { ctx =>
val states = ItemState.invalidStates
for {
items <- ctx.store.transact(
items <- store.transact(
QItem.findByFileIds(ctx.args.files.map(_.fileMetaId), states)
)
_ <-
@ -124,16 +128,16 @@ object ItemHandler {
ctx.logger.info(
s"No items found for file ids ${ctx.args.files.map(_.fileMetaId)}"
)
_ <- items.traverse(i => QItem.delete(ctx.store)(i.id, ctx.args.meta.collective))
_ <- items.traverse(i => QItem.delete(store)(i.id, ctx.args.meta.collective))
} yield ()
}
private def deleteFiles[F[_]: Sync]: Task[F, Args, Unit] =
private def deleteFiles[F[_]: Sync](store: Store[F]): Task[F, Args, Unit] =
Task(ctx =>
ctx.logger.info("Deleting input files …") *>
Stream
.emits(ctx.args.files.map(_.fileMetaId))
.evalMap(id => ctx.store.fileRepo.delete(id).attempt)
.evalMap(id => store.fileRepo.delete(id).attempt)
.compile
.drain
)

View File

@ -11,20 +11,25 @@ import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.RItem
object LinkProposal {
def onlyNew[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def onlyNew[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state.isValid)
Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on existing item"))
.map(_ => data)
else
LinkProposal[F](data)
LinkProposal[F](store)(data)
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def apply[F[_]: Sync](
store: Store[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
if (data.item.state == ItemState.Confirmed)
Task
.log[F, ProcessItemArgs](_.debug(s"Not linking proposals on confirmed item"))
@ -35,7 +40,7 @@ object LinkProposal {
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all
.traverse(applyValue(data, proposals, ctx))
.traverse(applyValue(data, proposals, ctx, store))
.map(result => ctx.logger.info(s"Results from proposal processing: $result"))
.map(_ => data)
}
@ -43,7 +48,8 @@ object LinkProposal {
def applyValue[F[_]: Sync](
data: ItemData,
proposalList: MetaProposalList,
ctx: Context[F, ProcessItemArgs]
ctx: Context[F, ProcessItemArgs],
store: Store[F]
)(mpt: MetaProposalType): F[Result] =
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
case None =>
@ -51,29 +57,30 @@ object LinkProposal {
Result.noneFound(mpt).pure[F]
case Some(a) if a.isSingleValue =>
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ =>
Result.single(mpt)
setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
_ => Result.single(mpt)
)
case Some(a) =>
val ids = a.values.map(_.ref.id.id)
ctx.logger.info(
s"Found many (${a.size}, $ids) candidates for ${a.proposalType}. Setting first."
) *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ =>
Result.multiple(mpt)
setItemMeta(data.item.id, ctx, store, a.proposalType, a.values.head.ref.id).map(
_ => Result.multiple(mpt)
)
}
def setItemMeta[F[_]: Sync](
itemId: Ident,
ctx: Context[F, ProcessItemArgs],
store: Store[F],
mpt: MetaProposalType,
value: Ident
): F[Int] =
mpt match {
case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact(
store.transact(
RItem.updateCorrOrg(
NonEmptyList.of(itemId),
ctx.args.meta.collective,
@ -82,7 +89,7 @@ object LinkProposal {
)
case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact(
store.transact(
RItem.updateConcPerson(
NonEmptyList.of(itemId),
ctx.args.meta.collective,
@ -91,7 +98,7 @@ object LinkProposal {
)
case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact(
store.transact(
RItem.updateCorrPerson(
NonEmptyList.of(itemId),
ctx.args.meta.collective,
@ -100,7 +107,7 @@ object LinkProposal {
)
case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact(
store.transact(
RItem.updateConcEquip(
NonEmptyList.of(itemId),
ctx.args.meta.collective,
@ -112,7 +119,7 @@ object LinkProposal {
case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item date ${value.id}") *>
ctx.store.transact(
store.transact(
RItem.updateDate(
NonEmptyList.of(itemId),
ctx.args.meta.collective,
@ -128,7 +135,7 @@ object LinkProposal {
case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
ctx.store.transact(
store.transact(
RItem.updateDueDate(
NonEmptyList.of(itemId),
ctx.args.meta.collective,

View File

@ -7,6 +7,7 @@
package docspell.joex.process
import cats.effect._
import cats.implicits._
import docspell.analysis.TextAnalyser
import docspell.backend.ops.OItem
@ -14,7 +15,8 @@ import docspell.common.ProcessItemArgs
import docspell.ftsclient.FtsClient
import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.Store
object ProcessItem {
@ -23,12 +25,13 @@ object ProcessItem {
itemOps: OItem[F],
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ExtractArchive(item)
ExtractArchive(store)(item)
.flatMap(Task.setProgress(20))
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, (40, 60, 80)))
.flatMap(LinkProposal.onlyNew[F])
.flatMap(processAttachments0(cfg, fts, analyser, regexNer, store, (40, 60, 80)))
.flatMap(LinkProposal.onlyNew[F](store))
.flatMap(SetGivenData.onlyNew[F](itemOps))
.flatMap(Task.setProgress(99))
.flatMap(RemoveEmptyItem(itemOps))
@ -37,34 +40,37 @@ object ProcessItem {
cfg: Config,
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
processAttachments0[F](cfg, fts, analyser, regexNer, (30, 60, 90))(item)
processAttachments0[F](cfg, fts, analyser, regexNer, store, (30, 60, 90))(item)
def analysisOnly[F[_]: Async](
cfg: Config,
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
regexNer: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item)
.flatMap(FindProposal[F](cfg.textAnalysis))
.flatMap(EvalProposals[F])
.flatMap(CrossCheckProposals[F])
.flatMap(SaveProposals[F])
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer, store)(item)
.flatMap(FindProposal[F](cfg.textAnalysis, store))
.flatMap(EvalProposals[F](store))
.flatMap(CrossCheckProposals[F](store))
.flatMap(SaveProposals[F](store))
private def processAttachments0[F[_]: Async](
cfg: Config,
fts: FtsClient[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F],
store: Store[F],
progress: (Int, Int, Int)
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ConvertPdf(cfg.convert, item)
ConvertPdf(cfg.convert, store, item)
.flatMap(Task.setProgress(progress._1))
.flatMap(TextExtraction(cfg.extraction, fts))
.flatMap(AttachmentPreview(cfg.extraction.preview))
.flatMap(AttachmentPageCount())
.flatMap(TextExtraction(cfg.extraction, fts, store))
.flatMap(AttachmentPreview(cfg.extraction.preview, store))
.flatMap(AttachmentPageCount(store))
.flatMap(Task.setProgress(progress._2))
.flatMap(analysisOnly[F](cfg, analyser, regexNer))
.flatMap(analysisOnly[F](cfg, analyser, regexNer, store))
.flatMap(Task.setProgress(progress._3))
}

View File

@ -16,8 +16,9 @@ import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.QItem
import docspell.store.records.RAttachment
import docspell.store.records.RAttachmentSource
@ -32,13 +33,14 @@ object ReProcessItem {
fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
regexNer: RegexNerFile[F],
store: Store[F]
): Task[F, Args, Unit] =
Task
.log[F, Args](_.info("===== Start reprocessing ======"))
.flatMap(_ =>
loadItem[F]
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer))
loadItem[F](store)
.flatMap(safeProcess[F](cfg, fts, itemOps, analyser, regexNer, store))
.map(_ => ())
)
@ -53,13 +55,13 @@ object ReProcessItem {
else ra => selection.contains(ra.id)
}
def loadItem[F[_]: Sync]: Task[F, Args, ItemData] =
def loadItem[F[_]: Sync](store: Store[F]): Task[F, Args, ItemData] =
Task { ctx =>
(for {
item <- OptionT(ctx.store.transact(RItem.findById(ctx.args.itemId)))
attach <- OptionT.liftF(ctx.store.transact(RAttachment.findByItem(item.id)))
item <- OptionT(store.transact(RItem.findById(ctx.args.itemId)))
attach <- OptionT.liftF(store.transact(RAttachment.findByItem(item.id)))
asrc <-
OptionT.liftF(ctx.store.transact(RAttachmentSource.findByItem(ctx.args.itemId)))
OptionT.liftF(store.transact(RAttachmentSource.findByItem(ctx.args.itemId)))
asrcMap = asrc.map(s => s.id -> s).toMap
// copy the original files over to attachments to run the default processing task
// the processing doesn't touch the original files, only RAttachments
@ -97,6 +99,7 @@ object ReProcessItem {
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F],
store: Store[F],
data: ItemData
): Task[F, Args, ItemData] = {
@ -121,27 +124,27 @@ object ReProcessItem {
Nil
).pure[F]
getLanguage[F].flatMap { lang =>
getLanguage[F](store).flatMap { lang =>
ProcessItem
.processAttachments[F](cfg, fts, analyser, regexNer)(data)
.flatMap(LinkProposal[F])
.processAttachments[F](cfg, fts, analyser, regexNer, store)(data)
.flatMap(LinkProposal[F](store))
.flatMap(SetGivenData[F](itemOps))
.contramap[Args](convertArgs(lang))
}
}
def getLanguage[F[_]: Sync]: Task[F, Args, Language] =
def getLanguage[F[_]: Sync](store: Store[F]): Task[F, Args, Language] =
Task { ctx =>
val lang1 = OptionT(
ctx.store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption)
store.transact(QItem.getItemLanguage(ctx.args.itemId)).map(_.headOption)
)
val lang2 = OptionT(ctx.store.transact(RCollective.findByItem(ctx.args.itemId)))
val lang2 = OptionT(store.transact(RCollective.findByItem(ctx.args.itemId)))
.map(_.language)
lang1.orElse(lang2).getOrElse(Language.German)
}
def isLastRetry[F[_]: Sync]: Task[F, Args, Boolean] =
def isLastRetry[F[_]]: Task[F, Args, Boolean] =
Task(_.isLastRetry)
def safeProcess[F[_]: Async](
@ -149,11 +152,12 @@ object ReProcessItem {
fts: FtsClient[F],
itemOps: OItem[F],
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
regexNer: RegexNerFile[F],
store: Store[F]
)(data: ItemData): Task[F, Args, ItemData] =
isLastRetry[F].flatMap {
case true =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data).attempt
processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data).attempt
.flatMap {
case Right(d) =>
Task.pure(d)
@ -163,7 +167,7 @@ object ReProcessItem {
).andThen(_ => Sync[F].raiseError(ex))
}
case false =>
processFiles[F](cfg, fts, itemOps, analyser, regexNer, data)
processFiles[F](cfg, fts, itemOps, analyser, regexNer, store, data)
}
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =

View File

@ -11,7 +11,7 @@ import cats.implicits._
import docspell.backend.ops.OItem
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
object RemoveEmptyItem {

View File

@ -10,15 +10,15 @@ import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.AddResult
import docspell.scheduler.{Context, Task}
import docspell.store.records._
import docspell.store.{AddResult, Store}
/** Saves the proposals in the database */
object SaveProposals {
type Args = ProcessItemArgs
def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] =
def apply[F[_]: Sync](store: Store[F])(data: ItemData): Task[F, Args, ItemData] =
Task { ctx =>
for {
_ <- ctx.logger.info("Storing proposals")
@ -26,20 +26,24 @@ object SaveProposals {
.traverse(rm =>
ctx.logger.debug(
s"Storing attachment proposals: ${rm.proposals}"
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
) *> store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
)
_ <-
if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F]
else saveItemProposal(ctx, data)
else saveItemProposal(ctx, store, data)
} yield data
}
def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = {
def saveItemProposal[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
data: ItemData
): F[Unit] = {
def upsert(v: RItemProposal): F[Int] =
ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
case AddResult.Success => 1.pure[F]
case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.update(v))
store.transact(RItemProposal.update(v))
case AddResult.Failure(ex) =>
ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0
.pure[F]
@ -47,7 +51,7 @@ object SaveProposals {
for {
_ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}")
tags <- ctx.store.transact(
tags <- store.transact(
RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective)
)
tagRefs = tags.map(t => IdRef(t.tagId, t.name))

View File

@ -11,7 +11,7 @@ import cats.implicits._
import docspell.backend.ops.OItem
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.scheduler.Task
import docspell.store.UpdateResult
object SetGivenData {

View File

@ -18,8 +18,9 @@ import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile
import docspell.joex.learn.{ClassifierName, Classify, LearnClassifierTask}
import docspell.joex.process.ItemData.AttachmentDates
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.scheduler.Context
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
object TextAnalysis {
@ -28,7 +29,8 @@ object TextAnalysis {
def apply[F[_]: Async](
cfg: Config.TextAnalysis,
analyser: TextAnalyser[F],
nerFile: RegexNerFile[F]
nerFile: RegexNerFile[F],
store: Store[F]
)(item: ItemData): Task[F, Args, ItemData] =
Task { ctx =>
for {
@ -41,18 +43,19 @@ object TextAnalysis {
)
_ <- ctx.logger.debug(s"Storing tags: ${t.map(_._1.copy(content = None))}")
_ <- t.traverse(m =>
ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels))
store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels))
)
v = t.toVector
autoTagEnabled <- getActiveAutoTag(ctx, cfg)
autoTagEnabled <- getActiveAutoTag(ctx, store, cfg)
tag <-
if (autoTagEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier)
if (autoTagEnabled)
predictTags(ctx, store, cfg, item.metas, analyser.classifier)
else List.empty[String].pure[F]
classProposals <-
if (cfg.classification.enabled)
predictItemEntities(ctx, cfg, item.metas, analyser.classifier)
predictItemEntities(ctx, store, cfg, item.metas, analyser.classifier)
else MetaProposalList.empty.pure[F]
e <- s
@ -86,16 +89,17 @@ object TextAnalysis {
def predictTags[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F]
): F[List[String]] = {
val text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep)
val classifyWith: ClassifierName => F[Option[String]] =
makeClassify(ctx, cfg, classifier)(text)
makeClassify(ctx, store, cfg, classifier)(text)
for {
names <- ctx.store.transact(
names <- store.transact(
ClassifierName.findTagClassifiers(ctx.args.meta.collective)
)
_ <- ctx.logger.debug(s"Guessing tags for ${names.size} categories")
@ -105,6 +109,7 @@ object TextAnalysis {
def predictItemEntities[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F]
@ -116,7 +121,7 @@ object TextAnalysis {
mtype: MetaProposalType
): F[Option[MetaProposal]] =
for {
label <- makeClassify(ctx, cfg, classifier)(text).apply(cname)
label <- makeClassify(ctx, store, cfg, classifier)(text).apply(cname)
} yield label.map(str =>
MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty))
)
@ -136,13 +141,14 @@ object TextAnalysis {
private def makeClassify[F[_]: Async](
ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis,
classifier: TextClassifier[F]
)(text: String): ClassifierName => F[Option[String]] =
Classify[F](
ctx.logger,
cfg.workingDir,
ctx.store,
store,
classifier,
ctx.args.meta.collective,
text
@ -150,10 +156,11 @@ object TextAnalysis {
private def getActiveAutoTag[F[_]: Sync](
ctx: Context[F, Args],
store: Store[F],
cfg: Config.TextAnalysis
): F[Boolean] =
if (cfg.classification.enabled)
ctx.store
store
.transact(RClassifierSetting.findById(ctx.args.meta.collective))
.map(_.exists(_.autoTagEnabled))
.flatTap(enabled =>

View File

@ -13,12 +13,13 @@ import cats.implicits._
import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.ftsclient.{FtsClient, TextData}
import docspell.joex.scheduler.{Context, Task}
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
object TextExtraction {
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F])(
def apply[F[_]: Async](cfg: ExtractConfig, fts: FtsClient[F], store: Store[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -30,6 +31,7 @@ object TextExtraction {
txt <- item.attachments.traverse(
extractTextIfEmpty(
ctx,
store,
cfg,
ctx.args.meta.language,
ctx.args.meta.collective,
@ -38,7 +40,7 @@ object TextExtraction {
)
_ <- ctx.logger.debug("Storing extracted texts …")
_ <-
txt.toList.traverse(res => ctx.store.transact(RAttachmentMeta.upsert(res.am)))
txt.toList.traverse(res => store.transact(RAttachmentMeta.upsert(res.am)))
_ <- ctx.logger.debug(s"Extracted text stored.")
idxItem = TextData.item(
item.item.id,
@ -65,6 +67,7 @@ object TextExtraction {
def extractTextIfEmpty[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
store: Store[F],
cfg: ExtractConfig,
lang: Language,
collective: Ident,
@ -91,13 +94,14 @@ object TextExtraction {
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
makeTextData((rm, Nil)).pure[F]
case _ =>
extractTextToMeta[F](ctx, cfg, lang, item)(ra)
extractTextToMeta[F](ctx, store, cfg, lang, item)(ra)
.map(makeTextData)
}
}
def extractTextToMeta[F[_]: Async](
ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig,
lang: Language,
item: ItemData
@ -105,8 +109,8 @@ object TextExtraction {
for {
_ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}")
dst <- Duration.stopTime[F]
fids <- filesToExtract(ctx)(item, ra)
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
fids <- filesToExtract(store)(item, ra)
res <- extractTextFallback(ctx, store, cfg, ra, lang)(fids)
meta = item.changeMeta(
ra.id,
lang,
@ -123,14 +127,14 @@ object TextExtraction {
} yield (meta, tags)
def extractText[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
extr: Extraction[F],
lang: Language
)(fileId: FileKey): F[ExtractResult] = {
val data = ctx.store.fileRepo.getBytes(fileId)
val data = store.fileRepo.getBytes(fileId)
def findMime: F[MimeType] =
OptionT(ctx.store.fileRepo.findMeta(fileId))
OptionT(store.fileRepo.findMeta(fileId))
.map(_.mimetype)
.getOrElse(MimeType.octetStream)
@ -140,6 +144,7 @@ object TextExtraction {
private def extractTextFallback[F[_]: Async](
ctx: Context[F, _],
store: Store[F],
cfg: ExtractConfig,
ra: RAttachment,
lang: Language
@ -151,7 +156,7 @@ object TextExtraction {
case id :: rest =>
val extr = Extraction.create[F](ctx.logger, cfg)
extractText[F](ctx, extr, lang)(id)
extractText[F](store, extr, lang)(id)
.flatMap {
case res @ ExtractResult.Success(_, _) =>
res.some.pure[F]
@ -161,12 +166,12 @@ object TextExtraction {
.warn(
s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file."
)
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
.flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
case ExtractResult.Failure(ex) =>
ctx.logger
.warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file")
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
.flatMap(_ => extractTextFallback[F](ctx, store, cfg, ra, lang)(rest))
}
}
@ -176,13 +181,13 @@ object TextExtraction {
* If the source file is a PDF, then use the converted file. This may then already
* contain the text if ocrmypdf is enabled. If it is disabled, both files are the same.
*/
private def filesToExtract[F[_]: Sync](ctx: Context[F, _])(
private def filesToExtract[F[_]: Sync](store: Store[F])(
item: ItemData,
ra: RAttachment
): F[List[FileKey]] =
item.originFile.get(ra.id) match {
case Some(sid) =>
ctx.store.transact(RFileMeta.findMime(sid)).map {
store.transact(RFileMeta.findMime(sid)).map {
case Some(MimeType.PdfMatch(_)) =>
List(ra.fileId)
case _ =>

View File

@ -12,7 +12,7 @@ import cats.implicits._
import docspell.common.{Duration, Ident, Timestamp}
import docspell.joex.JoexApp
import docspell.joexapi.model._
import docspell.store.records.{RJob, RJobLog}
import docspell.store.records.RJobLog
import org.http4s.HttpRoutes
import org.http4s.circe.CirceEntityEncoder._
@ -67,17 +67,19 @@ object JoexRoutes {
}
}
def mkJob(j: RJob): Job =
// TODO !!
def mkJob(j: docspell.scheduler.Job[String]): Job =
Job(
j.id,
j.subject,
j.submitted,
Timestamp.Epoch,
j.priority,
j.retries,
j.progress,
j.started.getOrElse(Timestamp.Epoch)
-1,
-1,
Timestamp.Epoch
)
def mkJobLog(j: RJob, jl: Vector[RJobLog]): JobAndLog =
def mkJobLog(j: docspell.scheduler.Job[String], jl: Vector[RJobLog]): JobAndLog =
JobAndLog(mkJob(j), jl.map(r => JobLogEvent(r.created, r.level, r.message)).toList)
}

View File

@ -16,8 +16,9 @@ import fs2._
import docspell.backend.ops.{OJoex, OUpload}
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.logging.Logger
import docspell.scheduler.{Context, Task}
import docspell.store.Store
import docspell.store.queries.QOrganization
import docspell.store.records._
@ -32,6 +33,7 @@ object ScanMailboxTask {
def apply[F[_]: Sync](
cfg: Config.ScanMailbox,
store: Store[F],
emil: Emil[F],
upload: OUpload[F],
joex: OJoex[F]
@ -42,22 +44,22 @@ object ScanMailboxTask {
s"=== Start importing mails for user ${ctx.args.account.user.id}"
)
_ <- ctx.logger.debug(s"Settings: ${ctx.args.asJson.noSpaces}")
mailCfg <- getMailSettings(ctx)
mailCfg <- getMailSettings(ctx, store)
folders = ctx.args.folders.mkString(", ")
userId = ctx.args.account.user
imapConn = ctx.args.imapConnection
_ <- ctx.logger.info(
s"Reading mails for user ${userId.id} from ${imapConn.id}/$folders"
)
_ <- importMails(cfg, mailCfg, emil, upload, joex, ctx)
_ <- importMails(cfg, mailCfg, emil, upload, joex, ctx, store)
} yield ()
}
def onCancel[F[_]]: Task[F, ScanMailboxArgs, Unit] =
Task.log(_.warn("Cancelling scan-mailbox task"))
def getMailSettings[F[_]: Sync](ctx: Context[F, Args]): F[RUserImap] =
ctx.store
def getMailSettings[F[_]: Sync](ctx: Context[F, Args], store: Store[F]): F[RUserImap] =
store
.transact(RUserImap.getByName(ctx.args.account, ctx.args.imapConnection))
.flatMap {
case Some(c) => c.pure[F]
@ -75,10 +77,11 @@ object ScanMailboxTask {
theEmil: Emil[F],
upload: OUpload[F],
joex: OJoex[F],
ctx: Context[F, Args]
ctx: Context[F, Args],
store: Store[F]
): F[Unit] = {
val mailer = theEmil(mailCfg.toMailConfig)
val impl = new Impl[F](cfg, ctx)
val impl = new Impl[F](cfg, ctx, store)
val inFolders = ctx.args.folders.take(cfg.maxFolders)
val getInitialInput =
@ -142,7 +145,11 @@ object ScanMailboxTask {
ScanResult(List(folder -> left), processed)
}
final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) {
final private class Impl[F[_]: Sync](
cfg: Config.ScanMailbox,
ctx: Context[F, Args],
store: Store[F]
) {
private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] =
MailOp(_ => f(ctx.logger))
@ -213,7 +220,7 @@ object ScanMailboxTask {
NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match {
case Some(nl) =>
for {
archives <- ctx.store.transact(
archives <- store.transact(
RAttachmentArchive
.findByMessageIdAndCollective(nl, ctx.args.account.collective)
)
@ -237,7 +244,7 @@ object ScanMailboxTask {
for {
from <- OptionT.fromOption[F](mh.from)
_ <- OptionT(
ctx.store.transact(
store.transact(
QOrganization
.findPersonByContact(
ctx.args.account.collective,

View File

@ -1,60 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.scheduler
import cats.effect._
import fs2._
import fs2.concurrent.SignallingRef
import docspell.backend.ops.OJoex
import docspell.store.queue._
/** A periodic scheduler takes care to submit periodic tasks to the job queue.
*
* It is run in the background to regularily find a periodic task to execute. If the task
* is due, it will be submitted into the job queue where it will be picked up by the
* scheduler from some joex instance. If it is due in the future, a notification is
* scheduled to be received at that time so the task can be looked up again.
*/
trait PeriodicScheduler[F[_]] {
def config: PeriodicSchedulerConfig
def start: Stream[F, Nothing]
def shutdown: F[Unit]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
def notifyChange: F[Unit]
}
object PeriodicScheduler {
def create[F[_]: Async](
cfg: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
joex: OJoex[F]
): Resource[F, PeriodicScheduler[F]] =
for {
waiter <- Resource.eval(SignallingRef(true))
state <- Resource.eval(SignallingRef(PeriodicSchedulerImpl.emptyState[F]))
psch = new PeriodicSchedulerImpl[F](
cfg,
sch,
queue,
store,
joex,
waiter,
state
)
_ <- Resource.eval(psch.init)
} yield psch
}

View File

@ -11,12 +11,10 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task
import docspell.store.records.RPeriodicTask
import docspell.scheduler.Task
import docspell.scheduler.usertask.UserTask
import docspell.store.Store
import docspell.store.records.RUserEmail
import docspell.store.usertask.UserTask
import docspell.store.usertask.UserTaskScope
import emil._
@ -28,22 +26,20 @@ object UpdateCheckTask {
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn("Cancelling update-check task"))
def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[RPeriodicTask] =
def periodicTask[F[_]: Sync](cfg: UpdateCheckConfig): F[UserTask[Unit]] =
UserTask(
Ident.unsafe("docspell-update-check"),
taskName,
cfg.enabled,
cfg.schedule,
None,
"Docspell Update Check".some,
()
).encode.toPeriodicTask(
UserTaskScope(cfg.senderAccount.collective),
"Docspell Update Check".some
)
).pure[F]
def apply[F[_]: Async](
cfg: UpdateCheckConfig,
sendCfg: MailSendConfig,
store: Store[F],
emil: Emil[F],
updateCheck: UpdateCheck[F],
thisVersion: ThisVersion
@ -57,7 +53,7 @@ object UpdateCheckTask {
_ <- ctx.logger.debug(
s"Get SMTP connection for ${cfg.senderAccount.asString} and ${cfg.smtpId}"
)
smtpCfg <- findConnection(ctx, cfg)
smtpCfg <- findConnection(store, cfg)
_ <- ctx.logger.debug("Checking for latest release at GitHub")
latest <- updateCheck.latestRelease
_ <- ctx.logger.debug(s"Got latest release: $latest.")
@ -84,10 +80,10 @@ object UpdateCheckTask {
Task.pure(())
def findConnection[F[_]: Sync](
ctx: Context[F, _],
store: Store[F],
cfg: UpdateCheckConfig
): F[RUserEmail] =
OptionT(ctx.store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId)))
OptionT(store.transact(RUserEmail.getByName(cfg.senderAccount, cfg.smtpId)))
.getOrElseF(
Sync[F].raiseError(
new Exception(

View File

@ -22,10 +22,12 @@ object ScribeConfigure {
Sync[F].delay {
replaceJUL()
val docspellLogger = scribe.Logger("docspell")
val flywayLogger = scribe.Logger("org.flywaydb")
unsafeConfigure(scribe.Logger.root, cfg.copy(minimumLevel = getRootMinimumLevel))
unsafeConfigure(docspellLogger, cfg)
unsafeConfigure(flywayLogger, cfg)
unsafeConfigure(scribe.Logger("org.flywaydb"), cfg)
unsafeConfigure(scribe.Logger("binny"), cfg)
unsafeConfigure(scribe.Logger("org.http4s"), cfg)
}
private[this] def getRootMinimumLevel: Level =

View File

@ -204,7 +204,8 @@ object Event {
state: JobState,
subject: String,
submitter: Ident,
result: Json
resultData: Json,
resultMsg: Option[String]
) extends Event {
val eventType = JobDone
val baseUrl = None
@ -222,7 +223,8 @@ object Event {
JobState.running,
"Process 3 files",
account.user,
Json.Null
Json.Null,
None
)
} yield ev
}

View File

@ -31,30 +31,25 @@ trait EventContext {
"content" -> content
)
def defaultTitle: Either[String, String]
def defaultTitleHtml: Either[String, String]
def defaultBody: Either[String, String]
def defaultBodyHtml: Either[String, String]
def defaultMessage: Either[String, EventMessage]
def defaultMessageHtml: Either[String, EventMessage]
def defaultBoth: Either[String, String]
def defaultBothHtml: Either[String, String]
lazy val asJsonWithMessage: Either[String, Json] =
for {
tt1 <- defaultTitle
tb1 <- defaultBody
tt2 <- defaultTitleHtml
tb2 <- defaultBodyHtml
dm1 <- defaultMessage
dm2 <- defaultMessageHtml
data = asJson
msg = Json.obj(
"message" -> Json.obj(
"title" -> tt1.asJson,
"body" -> tb1.asJson
"title" -> dm1.title.asJson,
"body" -> dm1.body.asJson
),
"messageHtml" -> Json.obj(
"title" -> tt2.asJson,
"body" -> tb2.asJson
"title" -> dm2.title.asJson,
"body" -> dm2.body.asJson
)
)
} yield data.withObject(o1 => msg.withObject(o2 => o1.deepMerge(o2).asJson))
@ -65,10 +60,8 @@ object EventContext {
new EventContext {
val event = ev
def content = Json.obj()
def defaultTitle = Right("")
def defaultTitleHtml = Right("")
def defaultBody = Right("")
def defaultBodyHtml = Right("")
def defaultMessage = Right(EventMessage.empty)
def defaultMessageHtml = Right(EventMessage.empty)
def defaultBoth = Right("")
def defaultBothHtml = Right("")
}

View File

@ -0,0 +1,13 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.notification.api
final case class EventMessage(title: String, body: String)
object EventMessage {
val empty: EventMessage = EventMessage("", "")
}

View File

@ -6,7 +6,7 @@
package docspell.notification.impl
import docspell.notification.api.EventContext
import docspell.notification.api.{EventContext, EventMessage}
import yamusca.circe._
import yamusca.implicits._
@ -24,17 +24,17 @@ abstract class AbstractEventContext extends EventContext {
def renderHtml(template: Template): String =
Markdown.toHtml(render(template))
lazy val defaultTitle: Either[String, String] =
titleTemplate.map(render)
lazy val defaultMessage: Either[String, EventMessage] =
for {
title <- titleTemplate.map(render)
body <- bodyTemplate.map(render)
} yield EventMessage(title, body)
lazy val defaultTitleHtml: Either[String, String] =
titleTemplate.map(renderHtml)
lazy val defaultBody: Either[String, String] =
bodyTemplate.map(render)
lazy val defaultBodyHtml: Either[String, String] =
bodyTemplate.map(renderHtml)
lazy val defaultMessageHtml: Either[String, EventMessage] =
for {
title <- titleTemplate.map(renderHtml)
body <- bodyTemplate.map(renderHtml)
} yield EventMessage(title, body)
lazy val defaultBoth: Either[String, String] =
for {

View File

@ -18,8 +18,9 @@ trait EventContextSyntax {
implicit final class EventContextOps(self: EventContext) {
def withDefault[F[_]](logger: Logger[F])(f: (String, String) => F[Unit]): F[Unit] =
(for {
tt <- self.defaultTitle
tb <- self.defaultBody
dm <- self.defaultMessage
tt = dm.title
tb = dm.body
} yield f(tt, tb)).fold(logError(logger), identity)
def withJsonMessage[F[_]](logger: Logger[F])(f: Json => F[Unit]): F[Unit] =

View File

@ -23,9 +23,14 @@ final case class JobDoneCtx(event: Event.JobDone, data: JobDoneCtx.Data)
val content = data.asJson
val titleTemplate = Right(mustache"{{eventType}} (by *{{account.user}}*)")
val bodyTemplate = Right(
mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}"""
)
val bodyTemplate =
data.resultMsg match {
case None =>
Right(mustache"""{{#content}}_'{{subject}}'_ finished {{/content}}""")
case Some(msg) =>
val tpl = s"""{{#content}}$msg{{/content}}"""
yamusca.imports.mustache.parse(tpl).left.map(_._2)
}
}
object JobDoneCtx {
@ -46,7 +51,8 @@ object JobDoneCtx {
state: JobState,
subject: String,
submitter: Ident,
result: Json
resultData: Json,
resultMsg: Option[String]
)
object Data {
implicit val jsonEncoder: Encoder[Data] =
@ -61,7 +67,8 @@ object JobDoneCtx {
ev.state,
ev.subject,
ev.submitter,
ev.result
ev.resultData,
ev.resultMsg
)
}
}

View File

@ -46,9 +46,10 @@ class TagsChangedCtxTest extends FunSuite {
TagsChangedCtx.Data(account, List(item), List(tag), Nil, url.some.map(_.asString))
)
assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)")
val dm = ctx.defaultMessage.toOption.get
assertEquals(dm.title, "TagsChanged (by *user2*)")
assertEquals(
ctx.defaultBody.toOption.get,
dm.body,
"Adding *tag-red* on [`Report 2`](http://test/item-1)."
)
}
@ -65,9 +66,10 @@ class TagsChangedCtxTest extends FunSuite {
)
)
assertEquals(ctx.defaultTitle.toOption.get, "TagsChanged (by *user2*)")
val dm = ctx.defaultMessage.toOption.get
assertEquals(dm.title, "TagsChanged (by *user2*)")
assertEquals(
ctx.defaultBody.toOption.get,
dm.body,
"Adding *tag-red*; Removing *tag-blue* on [`Report 2`](http://test/item-1)."
)
}

View File

@ -2487,6 +2487,59 @@ paths:
schema:
$ref: "#/components/schemas/BasicResult"
/admin/files/cloneFileRepository:
post:
operationId: "admin-files-cloneFileRepository"
tags: [Admin]
summary: Copy all files into a new repository
description: |
Submits a task that will copy all files of the application
(from the default file repository) into another file
repository as specified in the request. The request may define
ids of file repository configurations that must be present in
the config file. An empty list means to copy to all enabled
file repositories from te default file repository.
security:
- adminHeader: []
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/FileRepositoryCloneRequest"
responses:
422:
description: BadRequest
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/admin/files/integrityCheck:
post:
operationId: "admin-files-integrityCheck"
tags: [ Admin ]
summary: Verifies the stored checksum
description: |
Submits a task that goes through the files and compares the
stored checksum (at the time of inserting) against a newly
calculated one.
security:
- adminHeader: []
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/FileIntegrityCheckRequest"
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/source:
get:
operationId: "sec-source-get-all"
@ -5433,6 +5486,26 @@ paths:
components:
schemas:
FileIntegrityCheckRequest:
description: |
Data for running a file integrity check
properties:
collective:
type: string
format: ident
FileRepositoryCloneRequest:
description: |
Clone the file repository to a new location.
required:
- targetRepositories
properties:
targetRepositories:
type: array
items:
type: string
format: ident
BookmarkedQuery:
description: |
A query bookmark.

View File

@ -358,6 +358,41 @@ docspell.server {
# restrict file types that should be handed over to processing.
# By default all files are allowed.
valid-mime-types = [ ]
# The id of an enabled store from the `stores` array that should
# be used.
#
# IMPORTANT NOTE: All nodes must have the exact same file store
# configuration!
default-store = "database"
# A list of possible file stores. Each entry must have a unique
# id. The `type` is one of: default-database, filesystem, s3.
#
# The enabled property serves currently to define target stores
# for te "copy files" task. All stores with enabled=false are
# removed from the list. The `default-store` must be enabled.
stores = {
database =
{ enabled = true
type = "default-database"
}
filesystem =
{ enabled = false
type = "file-system"
directory = "/some/directory"
}
minio =
{ enabled = false
type = "s3"
endpoint = "http://localhost:9000"
access-key = "username"
secret-key = "password"
bucket = "docspell"
}
}
}
}
}

View File

@ -24,12 +24,18 @@ import scodec.bits.ByteVector
object ConfigFile {
private[this] val unsafeLogger = docspell.logging.unsafeLogger
// IntelliJ is wrong, this is required
import Implicits._
def loadConfig[F[_]: Async](args: List[String]): F[Config] = {
val logger = docspell.logging.getLogger[F]
val validate =
Validation.of(generateSecretIfEmpty, duplicateOpenIdProvider, signKeyVsUserUrl)
Validation.of(
generateSecretIfEmpty,
duplicateOpenIdProvider,
signKeyVsUserUrl,
filesValidate
)
ConfigFactory
.default[F, Config](logger, "docspell.server")(args, validate)
}
@ -97,4 +103,7 @@ object ConfigFile {
.map(checkProvider)
)
}
def filesValidate: Validation[Config] =
Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
}

View File

@ -28,7 +28,8 @@ object Main extends IOApp {
Option(System.getProperty("config.file")),
cfg.appId,
cfg.baseUrl,
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled)
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
cfg.backend.files.defaultStoreConfig
)
_ <- logger.info(s"\n${banner.render("***>")}")
_ <-

View File

@ -10,6 +10,9 @@ import fs2.Stream
import docspell.backend.BackendApp
import org.http4s.HttpRoutes
import org.http4s.server.websocket.WebSocketBuilder2
trait RestApp[F[_]] {
/** Access to the configuration used to build backend services. */
@ -25,4 +28,7 @@ trait RestApp[F[_]] {
* via websocket.
*/
def subscriptions: Stream[F, Nothing]
/** Http4s endpoint definitions. */
def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F]
}

View File

@ -11,20 +11,31 @@ import fs2.Stream
import fs2.concurrent.Topic
import docspell.backend.BackendApp
import docspell.backend.auth.{AuthToken, ShareToken}
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.notification.api.NotificationModule
import docspell.notification.impl.NotificationModuleImpl
import docspell.oidc.CodeFlowRoutes
import docspell.pubsub.api.{PubSub, PubSubT}
import docspell.restserver.ws.OutputEvent
import docspell.restserver.auth.OpenId
import docspell.restserver.http4s.EnvMiddleware
import docspell.restserver.routes._
import docspell.restserver.webapp.{TemplateRoutes, Templates, WebjarRoutes}
import docspell.restserver.ws.{OutputEvent, WebSocketRoutes}
import docspell.scheduler.impl.JobStoreModuleBuilder
import docspell.store.Store
import emil.javamail.JavaMailEmil
import org.http4s.HttpRoutes
import org.http4s.client.Client
import org.http4s.server.Router
import org.http4s.server.websocket.WebSocketBuilder2
final class RestAppImpl[F[_]: Async](
val config: Config,
val backend: BackendApp[F],
httpClient: Client[F],
notificationMod: NotificationModule[F],
wsTopic: Topic[F, OutputEvent],
pubSub: PubSubT[F]
@ -35,6 +46,108 @@ final class RestAppImpl[F[_]: Async](
def subscriptions: Stream[F, Nothing] =
Subscriptions[F](wsTopic, pubSub)
def routes(wsb: WebSocketBuilder2[F]): HttpRoutes[F] =
createHttpApp(wsb)
val templates = TemplateRoutes[F](config, Templates[F])
def createHttpApp(
wsB: WebSocketBuilder2[F]
) =
Router(
"/api/info" -> InfoRoutes(),
"/api/v1/open/" -> openRoutes(httpClient),
"/api/v1/sec/" -> Authenticate(backend.login, config.auth) { token =>
securedRoutes(wsB, token)
},
"/api/v1/admin" -> AdminAuth(config.adminEndpoint) {
adminRoutes
},
"/api/v1/share" -> ShareAuth(backend.share, config.auth) { token =>
shareRoutes(token)
},
"/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker)
)
def adminRoutes: HttpRoutes[F] =
Router(
"fts" -> FullTextIndexRoutes.admin(config, backend),
"user/otp" -> TotpRoutes.admin(backend),
"user" -> UserRoutes.admin(backend),
"info" -> InfoRoutes.admin(config),
"attachments" -> AttachmentRoutes.admin(backend),
"files" -> FileRepositoryRoutes.admin(backend)
)
def shareRoutes(
token: ShareToken
): HttpRoutes[F] =
Router(
"search" -> ShareSearchRoutes(backend, config, token),
"attachment" -> ShareAttachmentRoutes(backend, token),
"item" -> ShareItemRoutes(backend, token),
"clientSettings" -> ClientSettingsRoutes.share(backend, token)
)
def openRoutes(
client: Client[F]
): HttpRoutes[F] =
Router(
"auth/openid" -> CodeFlowRoutes(
config.openIdEnabled,
OpenId.handle[F](backend, config),
OpenId.codeFlowConfig(config),
client
),
"auth" -> LoginRoutes.login(backend.login, config),
"signup" -> RegisterRoutes(backend, config),
"upload" -> UploadRoutes.open(backend, config),
"checkfile" -> CheckFileRoutes.open(backend),
"integration" -> IntegrationEndpointRoutes.open(backend, config),
"share" -> ShareRoutes.verify(backend, config)
)
def securedRoutes(
wsB: WebSocketBuilder2[F],
token: AuthToken
): HttpRoutes[F] =
Router(
"ws" -> WebSocketRoutes(token, backend, wsTopic, wsB),
"auth" -> LoginRoutes.session(backend.login, config, token),
"tag" -> TagRoutes(backend, token),
"equipment" -> EquipmentRoutes(backend, token),
"organization" -> OrganizationRoutes(backend, token),
"person" -> PersonRoutes(backend, token),
"source" -> SourceRoutes(backend, token),
"user/otp" -> TotpRoutes(backend, config, token),
"user" -> UserRoutes(backend, token),
"collective" -> CollectiveRoutes(backend, token),
"queue" -> JobQueueRoutes(backend, token),
"item" -> ItemRoutes(config, backend, token),
"items" -> ItemMultiRoutes(config, backend, token),
"attachment" -> AttachmentRoutes(backend, token),
"attachments" -> AttachmentMultiRoutes(backend, token),
"upload" -> UploadRoutes.secured(backend, config, token),
"checkfile" -> CheckFileRoutes.secured(backend, token),
"email/send" -> MailSendRoutes(backend, token),
"email/settings" -> MailSettingsRoutes(backend, token),
"email/sent" -> SentMailRoutes(backend, token),
"share" -> ShareRoutes.manage(backend, token),
"usertask/notifydueitems" -> NotifyDueItemsRoutes(config, backend, token),
"usertask/scanmailbox" -> ScanMailboxRoutes(backend, token),
"usertask/periodicquery" -> PeriodicQueryRoutes(config, backend, token),
"calevent/check" -> CalEventCheckRoutes(),
"fts" -> FullTextIndexRoutes.secured(config, backend, token),
"folder" -> FolderRoutes(backend, token),
"customfield" -> CustomFieldRoutes(backend, token),
"clientSettings" -> ClientSettingsRoutes(backend, token),
"notification" -> NotificationRoutes(config, backend, token),
"querybookmark" -> BookmarkRoutes(backend, token)
)
}
object RestAppImpl {
@ -55,10 +168,21 @@ object RestAppImpl {
notificationMod <- Resource.eval(
NotificationModuleImpl[F](store, javaEmil, httpClient, 200)
)
schedulerMod = JobStoreModuleBuilder(store)
.withPubsub(pubSubT)
.withEventSink(notificationMod)
.build
backend <- BackendApp
.create[F](store, javaEmil, ftsClient, pubSubT, notificationMod)
.create[F](store, javaEmil, ftsClient, pubSubT, schedulerMod, notificationMod)
app = new RestAppImpl[F](cfg, backend, notificationMod, wsTopic, pubSubT)
app = new RestAppImpl[F](
cfg,
backend,
httpClient,
notificationMod,
wsTopic,
pubSubT
)
} yield app
}

View File

@ -13,24 +13,18 @@ import cats.implicits._
import fs2.Stream
import fs2.concurrent.Topic
import docspell.backend.auth.{AuthToken, ShareToken}
import docspell.backend.msg.Topics
import docspell.common._
import docspell.oidc.CodeFlowRoutes
import docspell.pubsub.naive.NaivePubSub
import docspell.restserver.auth.OpenId
import docspell.restserver.http4s.{EnvMiddleware, InternalHeader}
import docspell.restserver.routes._
import docspell.restserver.webapp._
import docspell.restserver.http4s.InternalHeader
import docspell.restserver.ws.OutputEvent
import docspell.restserver.ws.OutputEvent.KeepAlive
import docspell.restserver.ws.{OutputEvent, WebSocketRoutes}
import docspell.store.Store
import docspell.store.records.RInternalSetting
import org.http4s._
import org.http4s.blaze.client.BlazeClientBuilder
import org.http4s.blaze.server.BlazeServerBuilder
import org.http4s.client.Client
import org.http4s.dsl.Http4sDsl
import org.http4s.headers.Location
import org.http4s.implicits._
@ -51,7 +45,7 @@ object RestServer {
server =
Stream
.resource(createApp(cfg, pools, wsTopic))
.flatMap { case (restApp, pubSub, httpClient, setting) =>
.flatMap { case (restApp, pubSub, setting) =>
Stream(
restApp.subscriptions,
restApp.eventConsume(2),
@ -59,7 +53,7 @@ object RestServer {
.bindHttp(cfg.bind.port, cfg.bind.address)
.withoutBanner
.withHttpWebSocketApp(
createHttpApp(cfg, setting, httpClient, pubSub, restApp, wsTopic)
createHttpApp(setting, pubSub, restApp)
)
.serve
.drain
@ -76,13 +70,13 @@ object RestServer {
wsTopic: Topic[F, OutputEvent]
): Resource[
F,
(RestApp[F], NaivePubSub[F], Client[F], RInternalSetting)
(RestApp[F], NaivePubSub[F], RInternalSetting)
] =
for {
httpClient <- BlazeClientBuilder[F].resource
store <- Store.create[F](
cfg.backend.jdbc,
cfg.backend.files.chunkSize,
cfg.backend.files.defaultFileRepositoryConfig,
pools.connectEC
)
setting <- Resource.eval(store.transact(RInternalSetting.create))
@ -92,41 +86,22 @@ object RestServer {
httpClient
)(Topics.all.map(_.topic))
restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic)
} yield (restApp, pubSub, httpClient, setting)
} yield (restApp, pubSub, setting)
def createHttpApp[F[_]: Async](
cfg: Config,
internSettings: RInternalSetting,
httpClient: Client[F],
pubSub: NaivePubSub[F],
restApp: RestApp[F],
topic: Topic[F, OutputEvent]
restApp: RestApp[F]
)(
wsB: WebSocketBuilder2[F]
) = {
val templates = TemplateRoutes[F](cfg, Templates[F])
val httpApp = Router(
val internal = Router(
"/" -> redirectTo("/app"),
"/internal" -> InternalHeader(internSettings.internalRouteKey) {
internalRoutes(pubSub)
},
"/api/info" -> routes.InfoRoutes(),
"/api/v1/open/" -> openRoutes(cfg, httpClient, restApp),
"/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token =>
securedRoutes(cfg, restApp, wsB, topic, token)
},
"/api/v1/admin" -> AdminAuth(cfg.adminEndpoint) {
adminRoutes(cfg, restApp)
},
"/api/v1/share" -> ShareAuth(restApp.backend.share, cfg.auth) { token =>
shareRoutes(cfg, restApp, token)
},
"/api/doc" -> templates.doc,
"/app/assets" -> EnvMiddleware(WebjarRoutes.appRoutes[F]),
"/app" -> EnvMiddleware(templates.app),
"/sw.js" -> EnvMiddleware(templates.serviceWorker),
"/" -> redirectTo("/app")
).orNotFound
}
)
val httpApp = (internal <+> restApp.routes(wsB)).orNotFound
Logger.httpApp(logHeaders = false, logBody = false)(httpApp)
}
@ -135,88 +110,6 @@ object RestServer {
"pubsub" -> pubSub.receiveRoute
)
def securedRoutes[F[_]: Async](
cfg: Config,
restApp: RestApp[F],
wsB: WebSocketBuilder2[F],
topic: Topic[F, OutputEvent],
token: AuthToken
): HttpRoutes[F] =
Router(
"ws" -> WebSocketRoutes(token, restApp.backend, topic, wsB),
"auth" -> LoginRoutes.session(restApp.backend.login, cfg, token),
"tag" -> TagRoutes(restApp.backend, token),
"equipment" -> EquipmentRoutes(restApp.backend, token),
"organization" -> OrganizationRoutes(restApp.backend, token),
"person" -> PersonRoutes(restApp.backend, token),
"source" -> SourceRoutes(restApp.backend, token),
"user/otp" -> TotpRoutes(restApp.backend, cfg, token),
"user" -> UserRoutes(restApp.backend, token),
"collective" -> CollectiveRoutes(restApp.backend, token),
"queue" -> JobQueueRoutes(restApp.backend, token),
"item" -> ItemRoutes(cfg, restApp.backend, token),
"items" -> ItemMultiRoutes(cfg, restApp.backend, token),
"attachment" -> AttachmentRoutes(restApp.backend, token),
"attachments" -> AttachmentMultiRoutes(restApp.backend, token),
"upload" -> UploadRoutes.secured(restApp.backend, cfg, token),
"checkfile" -> CheckFileRoutes.secured(restApp.backend, token),
"email/send" -> MailSendRoutes(restApp.backend, token),
"email/settings" -> MailSettingsRoutes(restApp.backend, token),
"email/sent" -> SentMailRoutes(restApp.backend, token),
"share" -> ShareRoutes.manage(restApp.backend, token),
"usertask/notifydueitems" -> NotifyDueItemsRoutes(cfg, restApp.backend, token),
"usertask/scanmailbox" -> ScanMailboxRoutes(restApp.backend, token),
"usertask/periodicquery" -> PeriodicQueryRoutes(cfg, restApp.backend, token),
"calevent/check" -> CalEventCheckRoutes(),
"fts" -> FullTextIndexRoutes.secured(cfg, restApp.backend, token),
"folder" -> FolderRoutes(restApp.backend, token),
"customfield" -> CustomFieldRoutes(restApp.backend, token),
"clientSettings" -> ClientSettingsRoutes(restApp.backend, token),
"notification" -> NotificationRoutes(cfg, restApp.backend, token),
"querybookmark" -> BookmarkRoutes(restApp.backend, token)
)
def openRoutes[F[_]: Async](
cfg: Config,
client: Client[F],
restApp: RestApp[F]
): HttpRoutes[F] =
Router(
"auth/openid" -> CodeFlowRoutes(
cfg.openIdEnabled,
OpenId.handle[F](restApp.backend, cfg),
OpenId.codeFlowConfig(cfg),
client
),
"auth" -> LoginRoutes.login(restApp.backend.login, cfg),
"signup" -> RegisterRoutes(restApp.backend, cfg),
"upload" -> UploadRoutes.open(restApp.backend, cfg),
"checkfile" -> CheckFileRoutes.open(restApp.backend),
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg),
"share" -> ShareRoutes.verify(restApp.backend, cfg)
)
def adminRoutes[F[_]: Async](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
Router(
"fts" -> FullTextIndexRoutes.admin(cfg, restApp.backend),
"user/otp" -> TotpRoutes.admin(restApp.backend),
"user" -> UserRoutes.admin(restApp.backend),
"info" -> InfoRoutes.admin(cfg),
"attachments" -> AttachmentRoutes.admin(restApp.backend)
)
def shareRoutes[F[_]: Async](
cfg: Config,
restApp: RestApp[F],
token: ShareToken
): HttpRoutes[F] =
Router(
"search" -> ShareSearchRoutes(restApp.backend, cfg, token),
"attachment" -> ShareAttachmentRoutes(restApp.backend, token),
"item" -> ShareItemRoutes(restApp.backend, token),
"clientSettings" -> ClientSettingsRoutes.share(restApp.backend, token)
)
def redirectTo[F[_]: Async](path: String): HttpRoutes[F] = {
val dsl = new Http4sDsl[F] {}
import dsl._

View File

@ -10,9 +10,9 @@ import cats.effect.Async
import fs2.Stream
import fs2.concurrent.Topic
import docspell.backend.msg.{JobDone, JobSubmitted}
import docspell.pubsub.api.PubSubT
import docspell.restserver.ws.OutputEvent
import docspell.scheduler.msg.{JobDone, JobSubmitted}
/** Subscribes to those events from docspell that are forwarded to the websocket endpoints
*/

View File

@ -0,0 +1,69 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.restserver.routes
import cats.data.NonEmptyList
import cats.effect._
import cats.implicits._
import docspell.backend.BackendApp
import docspell.common.FileCopyTaskArgs.Selection
import docspell.common.{FileCopyTaskArgs, FileIntegrityCheckArgs, FileKeyPart}
import docspell.restapi.model._
import org.http4s._
import org.http4s.circe.CirceEntityDecoder._
import org.http4s.circe.CirceEntityEncoder._
import org.http4s.dsl.Http4sDsl
object FileRepositoryRoutes {
def admin[F[_]: Async](backend: BackendApp[F]): HttpRoutes[F] = {
val dsl = Http4sDsl[F]
import dsl._
val logger = docspell.logging.getLogger[F]
HttpRoutes.of {
case req @ POST -> Root / "cloneFileRepository" =>
for {
input <- req.as[FileRepositoryCloneRequest]
args = makeTaskArgs(input)
job <- backend.fileRepository.cloneFileRepository(args, true)
result = BasicResult(
job.isDefined,
job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j =>
s"Job for '${FileCopyTaskArgs.taskName.id}' submitted: ${j.id.id}"
)
)
_ <- logger.info(result.message)
resp <- Ok(result)
} yield resp
case req @ POST -> Root / "integrityCheck" =>
for {
input <- req.as[FileKeyPart]
job <- backend.fileRepository.checkIntegrityAll(input, true)
result = BasicResult(
job.isDefined,
job.fold(s"Job for '${FileCopyTaskArgs.taskName.id}' already running")(j =>
s"Job for '${FileIntegrityCheckArgs.taskName.id}' submitted: ${j.id.id}"
)
)
_ <- logger.info(result.message)
resp <- Ok(result)
} yield resp
}
}
def makeTaskArgs(input: FileRepositoryCloneRequest): FileCopyTaskArgs =
NonEmptyList.fromList(input.targetRepositories) match {
case Some(nel) =>
FileCopyTaskArgs(None, Selection.Stores(nel))
case None =>
FileCopyTaskArgs(None, Selection.All)
}
}

Some files were not shown because too many files have changed in this diff Show More