Integrate periodic tasks

The first use case for periodic task is the cleanup of expired
invitation keys. This is part of a house-keeping periodic task.
This commit is contained in:
Eike Kettner
2020-03-08 15:26:56 +01:00
parent 616c333fa5
commit 854a596da3
25 changed files with 388 additions and 108 deletions

View File

@ -77,6 +77,21 @@ docspell.joex {
wakeup-period = "10 minutes"
}
# Docspell uses periodic house keeping tasks, like cleaning expired
# invites, that can be configured here.
house-keeping {
# When the house keeping task executes. Default is every 4 hours.
schedule = "*-*-* 0/4:00:00"
# This task removes invitation keys that have been created but not
# used. The timespan here must be greater than the `invite-time'
# setting in the rest server config file.
cleanup-invites = {
older-than = "30 days"
}
}
# Configuration of text extraction
extraction {
# For PDF files it is first tried to read the text parts of the

View File

@ -5,6 +5,7 @@ import docspell.joex.scheduler.{PeriodicSchedulerConfig, SchedulerConfig}
import docspell.store.JdbcConfig
import docspell.convert.ConvertConfig
import docspell.extract.ExtractConfig
import docspell.joex.hk.HouseKeepingConfig
case class Config(
appId: Ident,
@ -13,6 +14,7 @@ case class Config(
jdbc: JdbcConfig,
scheduler: SchedulerConfig,
periodicScheduler: PeriodicSchedulerConfig,
houseKeeping: HouseKeepingConfig,
extraction: ExtractConfig,
convert: ConvertConfig
)

View File

@ -3,9 +3,10 @@ package docspell.joex
import cats.implicits._
import cats.effect._
import docspell.common.{Ident, NodeType, ProcessItemArgs}
import docspell.joex.background._
import docspell.joex.hk._
import docspell.joex.process.ItemHandler
import docspell.joex.scheduler._
import docspell.joexapi.client.JoexClient
import docspell.store.Store
import docspell.store.queue._
import docspell.store.ops.ONode
@ -18,6 +19,7 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
cfg: Config,
nodeOps: ONode[F],
store: Store[F],
pstore: PeriodicTaskStore[F],
termSignal: SignallingRef[F, Boolean],
val scheduler: Scheduler[F],
val periodicScheduler: PeriodicScheduler[F]
@ -27,6 +29,7 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
val run = scheduler.start.compile.drain
val prun = periodicScheduler.start.compile.drain
for {
_ <- HouseKeepingTask.submit(pstore, cfg.houseKeeping.schedule)
_ <- ConcurrentEffect[F].start(run)
_ <- ConcurrentEffect[F].start(prun)
_ <- scheduler.periodicAwake
@ -52,14 +55,15 @@ object JoexAppImpl {
cfg: Config,
termSignal: SignallingRef[F, Boolean],
connectEC: ExecutionContext,
clientEC: ExecutionContext,
blocker: Blocker
): Resource[F, JoexApp[F]] =
for {
client <- JoexClient.resource(clientEC)
store <- Store.create(cfg.jdbc, connectEC, blocker)
queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store)
psch <- PeriodicScheduler.create(cfg.periodicScheduler, queue, pstore, Timer[F])
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
.withQueue(queue)
.withTask(
@ -71,13 +75,14 @@ object JoexAppImpl {
)
.withTask(
JobTask.json(
PeriodicTask.taskName,
PeriodicTask[F](cfg),
PeriodicTask.onCancel[F]
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg),
HouseKeepingTask.onCancel[F]
)
)
.resource
app = new JoexAppImpl(cfg, nodeOps, store, termSignal, sch, psch)
psch <- PeriodicScheduler.create(cfg.periodicScheduler, sch, queue, pstore, client, Timer[F])
app = new JoexAppImpl(cfg, nodeOps, store, pstore, termSignal, sch, psch)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
}

View File

@ -24,13 +24,14 @@ object JoexServer {
def stream[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
connectEC: ExecutionContext,
clientEC: ExecutionContext,
blocker: Blocker
)(implicit T: Timer[F]): Stream[F, Nothing] = {
val app = for {
signal <- Resource.liftF(SignallingRef[F, Boolean](false))
exitCode <- Resource.liftF(Ref[F].of(ExitCode.Success))
joexApp <- JoexAppImpl.create[F](cfg, signal, connectEC, blocker)
joexApp <- JoexAppImpl.create[F](cfg, signal, connectEC, clientEC, blocker)
httpApp = Router(
"/api/info" -> InfoRoutes(),

View File

@ -13,10 +13,10 @@ import org.log4s._
object Main extends IOApp {
private[this] val logger = getLogger
val blockingEc: ExecutionContext = ExecutionContext.fromExecutor(
val blockingEC: ExecutionContext = ExecutionContext.fromExecutor(
Executors.newCachedThreadPool(ThreadFactories.ofName("docspell-joex-blocking"))
)
val blocker = Blocker.liftExecutionContext(blockingEc)
val blocker = Blocker.liftExecutionContext(blockingEC)
val connectEC: ExecutionContext = ExecutionContext.fromExecutorService(
Executors.newFixedThreadPool(5, ThreadFactories.ofName("docspell-joex-dbconnect"))
)
@ -52,6 +52,6 @@ object Main extends IOApp {
cfg.baseUrl
)
logger.info(s"\n${banner.render("***>")}")
JoexServer.stream[IO](cfg, connectEC, blocker).compile.drain.as(ExitCode.Success)
JoexServer.stream[IO](cfg, connectEC, blockingEC, blocker).compile.drain.as(ExitCode.Success)
}
}

View File

@ -0,0 +1,24 @@
package docspell.joex.hk
import cats.implicits._
import cats.effect._
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.records._
object CleanupInvitesTask {
def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, Unit, Unit] =
Task { ctx =>
val threshold = cfg.houseKeeping.cleanupInvites.olderThan
for {
now <- Timestamp.current[F]
ts = now - threshold
_ <- ctx.logger.info(s"Cleanup invitations older than $ts")
n <- ctx.store.transact(RInvitation.deleteOlderThan(ts))
_ <- ctx.logger.info(s"Removed $n invitations")
} yield ()
}
}

View File

@ -0,0 +1,17 @@
package docspell.joex.hk
import com.github.eikek.calev.CalEvent
import docspell.common._
import HouseKeepingConfig._
case class HouseKeepingConfig(
schedule: CalEvent,
cleanupInvites: CleanupInvites
)
object HouseKeepingConfig {
case class CleanupInvites(olderThan: Duration)
}

View File

@ -0,0 +1,50 @@
package docspell.joex.hk
import cats.implicits._
import cats.effect._
import com.github.eikek.calev._
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.queue._
import docspell.store.records._
object HouseKeepingTask {
private val periodicId = Ident.unsafe("docspell-houskeeping")
val systemGroup: Ident = Ident.unsafe("docspell-system")
val taskName: Ident = Ident.unsafe("housekeeping")
def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, Unit, Unit] =
log[F](_.info(s"Running house-keeping task now"))
.flatMap(_ => CleanupInvitesTask(cfg))
def onCancel[F[_]: Sync: ContextShift]: Task[F, Unit, Unit] =
Task(_.logger.warn("Cancelling background task"))
def submit[F[_]: Sync](
pstore: PeriodicTaskStore[F],
ce: CalEvent
): F[Unit] = {
val makeJob =
RPeriodicTask.createJson(
true,
taskName,
systemGroup,
(),
"Docspell house-keeping",
systemGroup,
Priority.Low,
ce
)
for {
job <- makeJob
_ <- pstore.insert(job.copy(id = periodicId)).attempt
} yield ()
}
private def log[F[_]](f: Logger[F] => F[Unit]): Task[F, Unit, Unit] =
Task(ctx => f(ctx.logger))
}

View File

@ -4,6 +4,7 @@ import fs2._
import fs2.concurrent.SignallingRef
import cats.effect._
import docspell.joexapi.client.JoexClient
import docspell.store.queue._
/** A periodic scheduler takes care to submit periodic tasks to the
@ -32,14 +33,16 @@ object PeriodicScheduler {
def create[F[_]: ConcurrentEffect: ContextShift](
cfg: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
client: JoexClient[F],
timer: Timer[F]
): Resource[F, PeriodicScheduler[F]] =
for {
waiter <- Resource.liftF(SignallingRef(true))
state <- Resource.liftF(SignallingRef(PeriodicSchedulerImpl.emptyState[F]))
psch = new PeriodicSchedulerImpl[F](cfg, queue, store, waiter, state, timer)
psch = new PeriodicSchedulerImpl[F](cfg, sch, queue, store, client, waiter, state, timer)
_ <- Resource.liftF(psch.init)
} yield psch

View File

@ -9,45 +9,18 @@ import com.github.eikek.fs2calev._
import docspell.common._
import docspell.common.syntax.all._
import docspell.joexapi.client.JoexClient
import docspell.store.queue._
import docspell.store.records.RPeriodicTask
import PeriodicSchedulerImpl.State
/*
onStartUp:
- remove worker value from all of the current
Loop:
- get earliest pjob
- if none: stop
- if triggered:
- mark worker, restart loop on fail
- submit new job
- check for non-final jobs of that name
- if exist: log info
- if not exist: submit
- update next trigger (in both cases)
- remove worker
- restart loop
- if future
- schedule notify
- stop loop
onNotify:
- cancel current scheduled notify
- start Loop
onShutdown:
- nothing to do
*/
final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
val config: PeriodicSchedulerConfig,
sch: Scheduler[F],
queue: JobQueue[F],
store: PeriodicTaskStore[F],
client: JoexClient[F],
waiter: SignallingRef[F, Boolean],
state: SignallingRef[F, State[F]],
timer: Timer[F]
@ -91,15 +64,18 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
_ <- logger.fdebug(s"Looking for next periodic task")
go <- logThrow("Error getting next task")(
store
.takeNext(config.name)
.takeNext(config.name, None)
.use({
case Some(pj) =>
case Marked.Found(pj) =>
logger
.fdebug(s"Found periodic task '${pj.subject}/${pj.timer.asString}'") *>
(if (isTriggered(pj, now)) submitJob(pj)
else scheduleNotify(pj)).map(_ => true)
case None =>
else scheduleNotify(pj).map(_ => false))
case Marked.NotFound =>
logger.fdebug("No periodic task found") *> false.pure[F]
case Marked.NotMarkable =>
logger.fdebug("Periodic job cannot be marked. Trying again.") *> true
.pure[F]
})
)
} yield go
@ -123,32 +99,44 @@ final class PeriodicSchedulerImpl[F[_]: ConcurrentEffect: ContextShift](
}
def isTriggered(pj: RPeriodicTask, now: Timestamp): Boolean =
pj.timer.contains(now.value)
pj.nextrun < now
def submitJob(pj: RPeriodicTask): F[Unit] =
def submitJob(pj: RPeriodicTask): F[Boolean] =
store
.findNonFinalJob(pj.id)
.flatMap({
case Some(job) =>
logger.finfo[F](
s"There is already a job with non-final state '${job.state}' in the queue"
)
) *> scheduleNotify(pj) *> false.pure[F]
case None =>
logger.finfo[F](s"Submitting job for periodic task '${pj.task.id}'") *>
pj.toJob.flatMap(queue.insert)
pj.toJob.flatMap(queue.insert) *> notifyJoex *> true.pure[F]
})
def notifyJoex: F[Unit] =
sch.notifyChange *> store.findJoexNodes.flatMap(
_.traverse(n => client.notifyJoexIgnoreErrors(n.url)).map(_ => ())
)
def scheduleNotify(pj: RPeriodicTask): F[Unit] =
ConcurrentEffect[F]
.start(
CalevFs2
.sleep[F](pj.timer)
.evalMap(_ => notifyChange)
.compile
.drain
)
.flatMap(fb => state.modify(_.setNotify(fb)))
Timestamp
.current[F]
.flatMap(now =>
logger.fdebug(
s"Scheduling next notify for timer ${pj.timer.asString} -> ${pj.timer.nextElapse(now.toUtcDateTime)}"
)
) *>
ConcurrentEffect[F]
.start(
CalevFs2
.sleep[F](pj.timer)
.evalMap(_ => notifyChange)
.compile
.drain
)
.flatMap(fb => state.modify(_.setNotify(fb)))
def cancelNotify: F[Unit] =
state