Move scheduler code into separate module

This commit is contained in:
eikek
2022-03-12 14:10:49 +01:00
parent 0739957fd7
commit 0ce3abb3ff
20 changed files with 1376 additions and 3 deletions

View File

@ -0,0 +1,82 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.implicits._
import cats.{Applicative, Functor}
import docspell.common._
import docspell.logging.Logger
import docspell.store.Store
import docspell.store.records.RJob
trait Context[F[_], A] { self =>
def jobId: Ident
def args: A
def config: SchedulerConfig
def logger: Logger[F]
def setProgress(percent: Int): F[Unit]
def store: Store[F]
final def isLastRetry(implicit ev: Applicative[F]): F[Boolean] =
for {
current <- store.transact(RJob.getRetries(jobId))
last = config.retries == current.getOrElse(0)
} yield last
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
new Context.ContextImpl[F, C](f(args), logger, store, config, jobId)
}
object Context {
def create[F[_]: Async, A](
jobId: Ident,
arg: A,
config: SchedulerConfig,
log: Logger[F],
store: Store[F]
): Context[F, A] =
new ContextImpl(arg, log, store, config, jobId)
def apply[F[_]: Async, A](
job: RJob,
arg: A,
config: SchedulerConfig,
logSink: LogSink[F],
store: Store[F]
): F[Context[F, A]] = {
val log = docspell.logging.getLogger[F]
for {
_ <- log.trace("Creating logger for task run")
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
_ <- log.trace("Logger created, instantiating context")
ctx = create[F, A](job.id, arg, config, logger, store)
} yield ctx
}
final private class ContextImpl[F[_]: Functor, A](
val args: A,
val logger: Logger[F],
val store: Store[F],
val config: SchedulerConfig,
val jobId: Ident
) extends Context[F, A] {
def setProgress(percent: Int): F[Unit] = {
val pval = math.min(100, math.max(0, percent))
store.transact(RJob.setProgress(jobId, pval)).map(_ => ())
}
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.implicits._
import docspell.common.Priority
/** A counting scheme to indicate a ratio between scheduling high and low priority jobs.
*
* For example high=4, low=1 means: ”schedule 4 high priority jobs and then 1 low
* priority job“.
*/
case class CountingScheme(high: Int, low: Int, counter: Int = 0) {
def nextPriority: (CountingScheme, Priority) =
if (counter <= 0) (increment, Priority.High)
else {
val rest = counter % (high + low)
if (rest < high) (increment, Priority.High)
else (increment, Priority.Low)
}
def increment: CountingScheme =
copy(counter = counter + 1)
}
object CountingScheme {
def writeString(cs: CountingScheme): String =
s"${cs.high},${cs.low}"
def readString(str: String): Either[String, CountingScheme] =
str.split(',') match {
case Array(h, l) =>
Either.catchNonFatal(CountingScheme(h.toInt, l.toInt)).left.map(_.getMessage)
case _ =>
Left(s"Invalid counting scheme: $str")
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect.Sync
import cats.implicits._
import docspell.common.Ident
import docspell.common.syntax.all._
import io.circe.Decoder
/** Binds a Task to a name. This is required to lookup the code based on the taskName in
* the RJob data and to execute it given the arguments that have to be read from a
* string.
*
* Since the scheduler only has a string for the task argument, this only works for Task
* impls that accept a string. There is a convenience constructor that uses circe to
* decode json into some type A.
*/
case class JobTask[F[_]](
name: Ident,
task: Task[F, String, JobTaskResult],
onCancel: Task[F, String, Unit]
)
object JobTask {
def json[F[_]: Sync, A, B](
name: Ident,
task: Task[F, A, B],
onCancel: Task[F, A, Unit]
)(implicit
D: Decoder[A],
E: JobTaskResultEncoder[B]
): JobTask[F] = {
val convert: String => F[A] =
str =>
str.parseJsonAs[A] match {
case Right(a) => a.pure[F]
case Left(ex) =>
Sync[F].raiseError(new Exception(s"Cannot parse task arguments: $str", ex))
}
JobTask(name, task.contramap(convert).map(E.encode), onCancel.contramap(convert))
}
}

View File

@ -0,0 +1,30 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import docspell.common.Ident
/** This is a mapping from some identifier to a task. This is used by the scheduler to
* lookup an implementation using the taskName field of the RJob database record.
*/
final class JobTaskRegistry[F[_]](tasks: Map[Ident, JobTask[F]]) {
def withTask(task: JobTask[F]): JobTaskRegistry[F] =
JobTaskRegistry(tasks.updated(task.name, task))
def find(taskName: Ident): Option[JobTask[F]] =
tasks.get(taskName)
}
object JobTaskRegistry {
def apply[F[_]](map: Map[Ident, JobTask[F]]): JobTaskRegistry[F] =
new JobTaskRegistry[F](map)
def empty[F[_]]: JobTaskRegistry[F] = apply(Map.empty)
}

View File

@ -0,0 +1,27 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import io.circe.Json
final case class JobTaskResult(message: Option[String], json: Option[Json]) {
def withMessage(m: String): JobTaskResult =
copy(message = Some(m))
def withJson(json: Json): JobTaskResult =
copy(json = Some(json))
}
object JobTaskResult {
val empty: JobTaskResult = JobTaskResult(None, None)
def message(msg: String): JobTaskResult = JobTaskResult(Some(msg), None)
def json(json: Json): JobTaskResult = JobTaskResult(None, Some(json))
}

View File

@ -0,0 +1,49 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import docspell.scheduler.JobTaskResultEncoder.instance
import io.circe.Encoder
trait JobTaskResultEncoder[A] { self =>
def encode(a: A): JobTaskResult
final def contramap[B](f: B => A): JobTaskResultEncoder[B] =
JobTaskResultEncoder.instance(b => self.encode(f(b)))
final def map(f: JobTaskResult => JobTaskResult): JobTaskResultEncoder[A] =
instance(a => f(self.encode(a)))
final def modify(f: (A, JobTaskResult) => JobTaskResult): JobTaskResultEncoder[A] =
instance(a => f(a, self.encode(a)))
final def withMessage(f: A => String): JobTaskResultEncoder[A] =
modify((a, r) => r.withMessage(f(a)))
}
object JobTaskResultEncoder {
def apply[A](implicit v: JobTaskResultEncoder[A]): JobTaskResultEncoder[A] = v
def instance[A](f: A => JobTaskResult): JobTaskResultEncoder[A] =
(a: A) => f(a)
def fromJson[A: Encoder]: JobTaskResultEncoder[A] =
instance(a => JobTaskResult.json(Encoder[A].apply(a)))
implicit val unitJobTaskResultEncoder: JobTaskResultEncoder[Unit] =
instance(_ => JobTaskResult.empty)
implicit def optionJobTaskResultEncoder[A](implicit
ea: JobTaskResultEncoder[A]
): JobTaskResultEncoder[Option[A]] =
instance {
case Some(a) => ea.encode(a)
case None => JobTaskResult.empty
}
}

View File

@ -0,0 +1,38 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect.Sync
import cats.implicits._
import docspell.common._
case class LogEvent(
jobId: Ident,
jobInfo: String,
time: Timestamp,
level: LogLevel,
msg: String,
ex: Option[Throwable] = None
) {
def logLine: String =
s">>> ${time.asString} $level $jobInfo: $msg"
}
object LogEvent {
def create[F[_]: Sync](
jobId: Ident,
jobInfo: String,
level: LogLevel,
msg: String
): F[LogEvent] =
Timestamp.current[F].map(now => LogEvent(jobId, jobInfo, now, level, msg))
}

View File

@ -0,0 +1,76 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.implicits._
import fs2.Pipe
import docspell.common._
import docspell.logging
import docspell.store.Store
import docspell.store.records.RJobLog
trait LogSink[F[_]] {
def receive: Pipe[F, LogEvent, Unit]
}
object LogSink {
def apply[F[_]](sink: Pipe[F, LogEvent, Unit]): LogSink[F] =
new LogSink[F] {
val receive = sink
}
def logInternal[F[_]: Sync](e: LogEvent): F[Unit] = {
val logger = docspell.logging.getLogger[F]
val addData: logging.LogEvent => logging.LogEvent =
_.data("jobId", e.jobId).data("jobInfo", e.jobInfo)
e.level match {
case LogLevel.Info =>
logger.infoWith(e.logLine)(addData)
case LogLevel.Debug =>
logger.debugWith(e.logLine)(addData)
case LogLevel.Warn =>
logger.warnWith(e.logLine)(addData)
case LogLevel.Error =>
e.ex match {
case Some(exc) =>
logger.errorWith(e.logLine)(addData.andThen(_.addError(exc)))
case None =>
logger.errorWith(e.logLine)(addData)
}
}
}
def printer[F[_]: Sync]: LogSink[F] =
LogSink(_.evalMap(e => logInternal(e)))
def db[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink(
_.evalMap(ev =>
for {
id <- Ident.randomId[F]
joblog = RJobLog(
id,
ev.jobId,
ev.level,
ev.time,
ev.msg + ev.ex.map(th => ": " + th.getMessage).getOrElse("")
)
_ <- logInternal(ev)
_ <- store.transact(RJobLog.insert(joblog))
} yield ()
)
)
def dbAndLog[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink(_.broadcastThrough(printer[F].receive, db[F](store).receive))
}

View File

@ -0,0 +1,32 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import fs2._
/** A periodic scheduler takes care to submit periodic tasks to the job queue.
*
* It is run in the background to regularly find a periodic task to execute. If the task
* is due, it will be submitted into the job queue where it will be picked up by the
* scheduler from some joex instance. If it is due in the future, a notification is
* scheduled to be received at that time so the task can be looked up again.
*/
trait PeriodicScheduler[F[_]] {
def config: PeriodicSchedulerConfig
def start: Stream[F, Nothing]
def shutdown: F[Unit]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
def notifyChange: F[Unit]
}
object PeriodicScheduler {}

View File

@ -0,0 +1,14 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import docspell.common._
case class PeriodicSchedulerConfig(
name: Ident,
wakeupPeriod: Duration
)

View File

@ -0,0 +1,58 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.effect.std.Queue
import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.logging
import docspell.logging.{Level, Logger}
object QueueLogger {
def create[F[_]: Sync](
jobId: Ident,
jobInfo: String,
q: Queue[F, LogEvent]
): Logger[F] =
new Logger[F] {
def log(logEvent: logging.LogEvent) =
LogEvent
.create[F](jobId, jobInfo, level2Level(logEvent.level), logEvent.msg())
.flatMap { ev =>
val event =
logEvent.findErrors.headOption
.map(ex => ev.copy(ex = Some(ex)))
.getOrElse(ev)
q.offer(event)
}
def asUnsafe = Logger.off
}
def apply[F[_]: Async](
jobId: Ident,
jobInfo: String,
bufferSize: Int,
sink: LogSink[F]
): F[Logger[F]] =
for {
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
log = create(jobId, jobInfo, q)
_ <- Async[F].start(
Stream.fromQueueUnterminated(q).through(sink.receive).compile.drain
)
} yield log
private def level2Level(level: Level): LogLevel =
LogLevel.fromLevel(level)
}

View File

@ -0,0 +1,38 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import fs2.Stream
import docspell.common.Ident
import docspell.store.records.RJob
trait Scheduler[F[_]] {
def config: SchedulerConfig
def getRunning: F[Vector[RJob]]
def requestCancel(jobId: Ident): F[Boolean]
def notifyChange: F[Unit]
def start: Stream[F, Nothing]
/** Requests to shutdown the scheduler.
*
* The scheduler will not take any new jobs from the queue. If there are still running
* jobs, it waits for them to complete. when the cancelAll flag is set to true, it
* cancels all running jobs.
*
* The returned F[Unit] can be evaluated to wait for all that to complete.
*/
def shutdown(cancelAll: Boolean): F[Unit]
def periodicAwake: F[Fiber[F, Throwable, Unit]]
}

View File

@ -0,0 +1,33 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import docspell.common._
case class SchedulerConfig(
name: Ident,
poolSize: Int,
countingScheme: CountingScheme,
retries: Int,
retryDelay: Duration,
logBufferSize: Int,
wakeupPeriod: Duration
)
object SchedulerConfig {
val default = SchedulerConfig(
name = Ident.unsafe("default-scheduler"),
poolSize = 2 // math.max(2, Runtime.getRuntime.availableProcessors / 2)
,
countingScheme = CountingScheme(2, 1),
retries = 5,
retryDelay = Duration.seconds(30),
logBufferSize = 500,
wakeupPeriod = Duration.minutes(10)
)
}

View File

@ -0,0 +1,72 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats._
import cats.data.Kleisli
import cats.effect.Sync
import cats.implicits._
import docspell.logging.Logger
/** The code that is executed by the scheduler */
trait Task[F[_], A, B] {
def run(ctx: Context[F, A]): F[B]
def andThen[C](f: B => F[C])(implicit F: FlatMap[F]): Task[F, A, C] =
Task(Task.toKleisli(this).andThen(f))
def mapF[C](f: F[B] => F[C]): Task[F, A, C] =
Task(Task.toKleisli(this).mapF(f))
def attempt(implicit
F: ApplicativeError[F, Throwable]
): Task[F, A, Either[Throwable, B]] =
mapF(_.attempt)
def contramap[C](f: C => F[A])(implicit F: FlatMap[F]): Task[F, C, B] = {
ctxc: Context[F, C] => f(ctxc.args).flatMap(a => run(ctxc.map(_ => a)))
}
}
object Task {
def pure[F[_]: Applicative, A, B](b: B): Task[F, A, B] =
Task(_ => b.pure[F])
def of[F[_], A, B](b: F[B]): Task[F, A, B] =
Task(_ => b)
def apply[F[_], A, B](f: Context[F, A] => F[B]): Task[F, A, B] =
(ctx: Context[F, A]) => f(ctx)
def apply[F[_], A, B](k: Kleisli[F, Context[F, A], B]): Task[F, A, B] =
c => k.run(c)
def toKleisli[F[_], A, B](t: Task[F, A, B]): Kleisli[F, Context[F, A], B] =
Kleisli(t.run)
def setProgress[F[_]: Sync, A, B](n: Int)(data: B): Task[F, A, B] =
Task(_.setProgress(n).map(_ => data))
def log[F[_], A](f: Logger[F] => F[Unit]): Task[F, A, Unit] =
Task(ctx => f(ctx.logger))
implicit def taskMonad[F[_]: Monad, T]: Monad[Task[F, T, *]] =
new Monad[Task[F, T, *]] {
def pure[A](x: A) = Task(_ => Monad[F].pure(x))
def flatMap[A, B](fa: Task[F, T, A])(f: A => Task[F, T, B]) =
Task(Task.toKleisli(fa).flatMap(a => Task.toKleisli(f(a))))
def tailRecM[A, B](a: A)(f: A => Task[F, T, Either[A, B]]) = {
val monadK = Monad[Kleisli[F, Context[F, T], *]]
val r = monadK.tailRecM(a)(x => Task.toKleisli(f(x)))
Task(r)
}
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler.msg
import docspell.common._
import docspell.pubsub.api.{Topic, TypedTopic}
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
/** Message to notify about finished jobs. They have a final state. */
final case class JobDone(
jobId: Ident,
group: Ident,
task: Ident,
args: String,
state: JobState
)
object JobDone {
implicit val jsonDecoder: Decoder[JobDone] =
deriveDecoder[JobDone]
implicit val jsonEncoder: Encoder[JobDone] =
deriveEncoder[JobDone]
val topic: TypedTopic[JobDone] =
TypedTopic(Topic("job-finished"))
}