Refactor scheduler into api / impl

This commit is contained in:
eikek
2022-03-13 14:27:06 +01:00
parent 69765f05ff
commit 3a05dc56cb
50 changed files with 1076 additions and 867 deletions

View File

@ -6,14 +6,9 @@
package docspell.scheduler
import cats.effect._
import cats.implicits._
import cats.{Applicative, Functor}
import docspell.common._
import docspell.logging.Logger
import docspell.store.Store
import docspell.store.records.RJob
trait Context[F[_], A] { self =>
@ -29,54 +24,8 @@ trait Context[F[_], A] { self =>
def store: Store[F]
final def isLastRetry(implicit ev: Applicative[F]): F[Boolean] =
for {
current <- store.transact(RJob.getRetries(jobId))
last = config.retries == current.getOrElse(0)
} yield last
def isLastRetry: F[Boolean]
def map[C](f: A => C): Context[F, C]
def map[C](f: A => C)(implicit F: Functor[F]): Context[F, C] =
new Context.ContextImpl[F, C](f(args), logger, store, config, jobId)
}
object Context {
def create[F[_]: Async, A](
jobId: Ident,
arg: A,
config: SchedulerConfig,
log: Logger[F],
store: Store[F]
): Context[F, A] =
new ContextImpl(arg, log, store, config, jobId)
def apply[F[_]: Async, A](
job: RJob,
arg: A,
config: SchedulerConfig,
logSink: LogSink[F],
store: Store[F]
): F[Context[F, A]] = {
val log = docspell.logging.getLogger[F]
for {
_ <- log.trace("Creating logger for task run")
logger <- QueueLogger(job.id, job.info, config.logBufferSize, logSink)
_ <- log.trace("Logger created, instantiating context")
ctx = create[F, A](job.id, arg, config, logger, store)
} yield ctx
}
final private class ContextImpl[F[_]: Functor, A](
val args: A,
val logger: Logger[F],
val store: Store[F],
val config: SchedulerConfig,
val jobId: Ident
) extends Context[F, A] {
def setProgress(percent: Int): F[Unit] = {
val pval = math.min(100, math.max(0, percent))
store.transact(RJob.setProgress(jobId, pval)).map(_ => ())
}
}
}

View File

@ -0,0 +1,36 @@
package docspell.scheduler
import cats.effect.Sync
import cats.syntax.functor._
import docspell.common._
import io.circe.Encoder
final case class Job[A](
id: Ident,
task: Ident,
group: Ident,
args: A,
subject: String,
submitter: Ident,
priority: Priority,
tracker: Option[Ident]
) {
def encode(implicit E: Encoder[A]): Job[String] =
Job(id, task, group, E.apply(args).noSpaces, subject, submitter, priority, tracker)
}
object Job {
def createNew[F[_]: Sync, A](
task: Ident,
group: Ident,
args: A,
subject: String,
submitter: Ident,
priority: Priority,
tracker: Option[Ident]
): F[Job[A]] =
Ident.randomId[F].map { id =>
Job(id, task, group, args, subject, submitter, priority, tracker)
}
}

View File

@ -1,97 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.store.Store
import docspell.store.queries.QJob
import docspell.store.records.RJob
trait JobQueue[F[_]] {
/** Inserts the job into the queue to get picked up as soon as possible. The job must
* have a new unique id.
*/
def insert(job: RJob): F[Unit]
/** Inserts the job into the queue only, if there is no job with the same tracker-id
* running at the moment. The job id must be a new unique id.
*
* If the job has no tracker defined, it is simply inserted.
*/
def insertIfNew(job: RJob): F[Boolean]
def insertAll(jobs: Seq[RJob]): F[List[Boolean]]
def insertAllIfNew(jobs: Seq[RJob]): F[List[Boolean]]
def nextJob(
prio: Ident => F[Priority],
worker: Ident,
retryPause: Duration
): F[Option[RJob]]
}
object JobQueue {
private[scheduler] def create[F[_]: Async](store: Store[F]): Resource[F, JobQueue[F]] =
Resource.pure[F, JobQueue[F]](new JobQueue[F] {
private[this] val logger = docspell.logging.getLogger[F]
def nextJob(
prio: Ident => F[Priority],
worker: Ident,
retryPause: Duration
): F[Option[RJob]] =
logger
.trace("Select next job") *> QJob.takeNextJob(store)(prio, worker, retryPause)
def insert(job: RJob): F[Unit] =
store
.transact(RJob.insert(job))
.flatMap { n =>
if (n != 1)
Async[F]
.raiseError(new Exception(s"Inserting job failed. Update count: $n"))
else ().pure[F]
}
def insertIfNew(job: RJob): F[Boolean] =
for {
rj <- job.tracker match {
case Some(tid) =>
store.transact(RJob.findNonFinalByTracker(tid))
case None =>
None.pure[F]
}
ret <-
if (rj.isDefined) false.pure[F]
else insert(job).as(true)
} yield ret
def insertAll(jobs: Seq[RJob]): F[List[Boolean]] =
jobs.toList
.traverse(j => insert(j).attempt)
.flatMap(_.traverse {
case Right(()) => true.pure[F]
case Left(ex) =>
logger.error(ex)("Could not insert job. Skipping it.").as(false)
})
def insertAllIfNew(jobs: Seq[RJob]): F[List[Boolean]] =
jobs.toList
.traverse(j => insertIfNew(j).attempt)
.flatMap(_.traverse {
case Right(true) => true.pure[F]
case Right(false) => false.pure[F]
case Left(ex) =>
logger.error(ex)("Could not insert job. Skipping it.").as(false)
})
})
}

View File

@ -0,0 +1,21 @@
package docspell.scheduler
trait JobStore[F[_]] {
/** Inserts the job into the queue to get picked up as soon as possible. The job must
* have a new unique id.
*/
def insert(job: Job[String]): F[Unit]
/** Inserts the job into the queue only, if there is no job with the same tracker-id
* running at the moment. The job id must be a new unique id.
*
* If the job has no tracker defined, it is simply inserted.
*/
def insertIfNew(job: Job[String]): F[Boolean]
def insertAll(jobs: Seq[Job[String]]): F[List[Boolean]]
def insertAllIfNew(jobs: Seq[Job[String]]): F[List[Boolean]]
}

View File

@ -0,0 +1,9 @@
package docspell.scheduler
import docspell.scheduler.usertask.UserTaskStore
trait JobStoreModule[F[_]] {
def userTasks: UserTaskStore[F]
def jobs: JobStore[F]
}

View File

@ -43,7 +43,7 @@ object JobTask {
str.parseJsonAs[A] match {
case Right(a) => a.pure[F]
case Left(ex) =>
Sync[F].raiseError(new Exception(s"Cannot parse task arguments: $str", ex))
Sync[F].raiseError(new Exception(s"Cannot parse task arguments: '$str'", ex))
}
JobTask(name, task.contramap(convert).map(E.encode), onCancel.contramap(convert))

View File

@ -1,76 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.implicits._
import fs2.Pipe
import docspell.common._
import docspell.logging
import docspell.store.Store
import docspell.store.records.RJobLog
trait LogSink[F[_]] {
def receive: Pipe[F, LogEvent, Unit]
}
object LogSink {
def apply[F[_]](sink: Pipe[F, LogEvent, Unit]): LogSink[F] =
new LogSink[F] {
val receive = sink
}
def logInternal[F[_]: Sync](e: LogEvent): F[Unit] = {
val logger = docspell.logging.getLogger[F]
val addData: logging.LogEvent => logging.LogEvent =
_.data("jobId", e.jobId).data("jobInfo", e.jobInfo)
e.level match {
case LogLevel.Info =>
logger.infoWith(e.logLine)(addData)
case LogLevel.Debug =>
logger.debugWith(e.logLine)(addData)
case LogLevel.Warn =>
logger.warnWith(e.logLine)(addData)
case LogLevel.Error =>
e.ex match {
case Some(exc) =>
logger.errorWith(e.logLine)(addData.andThen(_.addError(exc)))
case None =>
logger.errorWith(e.logLine)(addData)
}
}
}
def printer[F[_]: Sync]: LogSink[F] =
LogSink(_.evalMap(e => logInternal(e)))
def db[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink(
_.evalMap(ev =>
for {
id <- Ident.randomId[F]
joblog = RJobLog(
id,
ev.jobId,
ev.level,
ev.time,
ev.msg + ev.ex.map(th => ": " + th.getMessage).getOrElse("")
)
_ <- logInternal(ev)
_ <- store.transact(RJobLog.insert(joblog))
} yield ()
)
)
def dbAndLog[F[_]: Async](store: Store[F]): LogSink[F] =
LogSink(_.broadcastThrough(printer[F].receive, db[F](store).receive))
}

View File

@ -12,3 +12,8 @@ case class PeriodicSchedulerConfig(
name: Ident,
wakeupPeriod: Duration
)
object PeriodicSchedulerConfig {
def default(id: Ident): PeriodicSchedulerConfig =
PeriodicSchedulerConfig(id, Duration.minutes(10))
}

View File

@ -1,58 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import cats.effect._
import cats.effect.std.Queue
import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.logging
import docspell.logging.{Level, Logger}
object QueueLogger {
def create[F[_]: Sync](
jobId: Ident,
jobInfo: String,
q: Queue[F, LogEvent]
): Logger[F] =
new Logger[F] {
def log(logEvent: logging.LogEvent) =
LogEvent
.create[F](jobId, jobInfo, level2Level(logEvent.level), logEvent.msg())
.flatMap { ev =>
val event =
logEvent.findErrors.headOption
.map(ex => ev.copy(ex = Some(ex)))
.getOrElse(ev)
q.offer(event)
}
def asUnsafe = Logger.off
}
def apply[F[_]: Async](
jobId: Ident,
jobInfo: String,
bufferSize: Int,
sink: LogSink[F]
): F[Logger[F]] =
for {
q <- Queue.circularBuffer[F, LogEvent](bufferSize)
log = create(jobId, jobInfo, q)
_ <- Async[F].start(
Stream.fromQueueUnterminated(q).through(sink.receive).compile.drain
)
} yield log
private def level2Level(level: Level): LogLevel =
LogLevel.fromLevel(level)
}

View File

@ -20,11 +20,10 @@ case class SchedulerConfig(
object SchedulerConfig {
val default = SchedulerConfig(
name = Ident.unsafe("default-scheduler"),
poolSize = 2 // math.max(2, Runtime.getRuntime.availableProcessors / 2)
,
countingScheme = CountingScheme(2, 1),
def default(id: Ident) = SchedulerConfig(
name = id,
poolSize = 1,
countingScheme = CountingScheme(3, 1),
retries = 5,
retryDelay = Duration.seconds(30),
logBufferSize = 500,

View File

@ -0,0 +1,6 @@
package docspell.scheduler
trait SchedulerModule[F[_]] {
def scheduler: Scheduler[F]
def periodicScheduler: PeriodicScheduler[F]
}

View File

@ -1,78 +0,0 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler.msg
import cats.effect._
import cats.implicits._
import docspell.common.{Duration, Ident, Priority}
import docspell.notification.api.{Event, EventSink}
import docspell.pubsub.api.PubSubT
import docspell.scheduler.JobQueue
import docspell.store.Store
import docspell.store.records.RJob
final class JobQueuePublish[F[_]: Sync](
delegate: JobQueue[F],
pubsub: PubSubT[F],
eventSink: EventSink[F]
) extends JobQueue[F] {
private def msg(job: RJob): JobSubmitted =
JobSubmitted(job.id, job.group, job.task, job.args)
private def event(job: RJob): Event.JobSubmitted =
Event.JobSubmitted(
job.id,
job.group,
job.task,
job.args,
job.state,
job.subject,
job.submitter
)
private def publish(job: RJob): F[Unit] =
pubsub.publish1(JobSubmitted.topic, msg(job)).as(()) *>
eventSink.offer(event(job))
def insert(job: RJob) =
delegate.insert(job).flatTap(_ => publish(job))
def insertIfNew(job: RJob) =
delegate.insertIfNew(job).flatTap {
case true => publish(job)
case false => ().pure[F]
}
def insertAll(jobs: Seq[RJob]) =
delegate.insertAll(jobs).flatTap { results =>
results.zip(jobs).traverse { case (res, job) =>
if (res) publish(job)
else ().pure[F]
}
}
def insertAllIfNew(jobs: Seq[RJob]) =
delegate.insertAllIfNew(jobs).flatTap { results =>
results.zip(jobs).traverse { case (res, job) =>
if (res) publish(job)
else ().pure[F]
}
}
def nextJob(prio: Ident => F[Priority], worker: Ident, retryPause: Duration) =
delegate.nextJob(prio, worker, retryPause)
}
object JobQueuePublish {
def apply[F[_]: Async](
store: Store[F],
pubSub: PubSubT[F],
eventSink: EventSink[F]
): Resource[F, JobQueue[F]] =
JobQueue.create(store).map(q => new JobQueuePublish[F](q, pubSub, eventSink))
}

View File

@ -1,159 +0,0 @@
package docspell.scheduler.usertask
import cats.implicits._
import cats.effect.Sync
import com.github.eikek.calev.CalEvent
import docspell.common._
import docspell.store.qb.DML
import docspell.store.qb.DSL._
import docspell.store.records.RPeriodicTask
import doobie.ConnectionIO
import fs2.Stream
import io.circe.Encoder
object QUserTask {
private val RT = RPeriodicTask.T
def findAll(account: AccountId): Stream[ConnectionIO, UserTask[String]] =
run(
select(RT.all),
from(RT),
RT.group === account.collective && RT.submitter === account.user
).query[RPeriodicTask].stream.map(makeUserTask)
def findByName(
account: AccountId,
name: Ident
): Stream[ConnectionIO, UserTask[String]] =
run(
select(RT.all),
from(RT),
where(
RT.group === account.collective,
RT.submitter === account.user,
RT.task === name
)
).query[RPeriodicTask].stream.map(makeUserTask)
def findById(
account: AccountId,
id: Ident
): ConnectionIO[Option[UserTask[String]]] =
run(
select(RT.all),
from(RT),
where(
RT.group === account.collective,
RT.submitter === account.user,
RT.id === id
)
).query[RPeriodicTask].option.map(_.map(makeUserTask))
def insert(
scope: UserTaskScope,
subject: Option[String],
task: UserTask[String]
): ConnectionIO[Int] =
for {
r <- task.toPeriodicTask[ConnectionIO](scope, subject)
n <- RPeriodicTask.insert(r)
} yield n
def update(
scope: UserTaskScope,
subject: Option[String],
task: UserTask[String]
): ConnectionIO[Int] =
for {
r <- task.toPeriodicTask[ConnectionIO](scope, subject)
n <- RPeriodicTask.update(r)
} yield n
def exists(id: Ident): ConnectionIO[Boolean] =
RPeriodicTask.exists(id)
def delete(account: AccountId, id: Ident): ConnectionIO[Int] =
DML
.delete(
RT,
where(
RT.group === account.collective,
RT.submitter === account.user,
RT.id === id
)
)
def deleteAll(account: AccountId, name: Ident): ConnectionIO[Int] =
DML.delete(
RT,
where(
RT.group === account.collective,
RT.submitter === account.user,
RT.task === name
)
)
def makeUserTask(r: RPeriodicTask): UserTask[String] =
UserTask(r.id, r.task, r.enabled, r.timer, r.summary, r.args)
def create[F[_]: Sync](
enabled: Boolean,
scope: UserTaskScope,
task: Ident,
args: String,
subject: String,
priority: Priority,
timer: CalEvent,
summary: Option[String]
): F[RPeriodicTask] =
Ident
.randomId[F]
.flatMap(id =>
Timestamp
.current[F]
.map { now =>
RPeriodicTask(
id,
enabled,
task,
scope.collective,
args,
subject,
scope.fold(_.user, identity),
priority,
None,
None,
timer,
timer
.nextElapse(now.atZone(Timestamp.UTC))
.map(_.toInstant)
.map(Timestamp.apply)
.getOrElse(Timestamp.Epoch),
now,
summary
)
}
)
def createJson[F[_]: Sync, A](
enabled: Boolean,
scope: UserTaskScope,
task: Ident,
args: A,
subject: String,
priority: Priority,
timer: CalEvent,
summary: Option[String]
)(implicit E: Encoder[A]): F[RPeriodicTask] =
create[F](
enabled,
scope,
task,
E(args).noSpaces,
subject,
priority,
timer,
summary
)
}

View File

@ -6,15 +6,11 @@
package docspell.scheduler.usertask
import cats.effect._
import cats.implicits._
import com.github.eikek.calev.CalEvent
import docspell.common._
import docspell.common.syntax.all._
import docspell.store.records.RPeriodicTask
import io.circe.{Decoder, Encoder}
import io.circe.Encoder
case class UserTask[A](
final case class UserTask[A](
id: Ident,
name: Ident,
enabled: Boolean,
@ -32,33 +28,3 @@ case class UserTask[A](
def mapArgs[B](f: A => B): UserTask[B] =
withArgs(f(args))
}
object UserTask {
implicit final class UserTaskCodec(ut: UserTask[String]) {
def decode[A](implicit D: Decoder[A]): Either[String, UserTask[A]] =
ut.args
.parseJsonAs[A]
.left
.map(_.getMessage)
.map(a => ut.copy(args = a))
def toPeriodicTask[F[_]: Sync](
scope: UserTaskScope,
subject: Option[String]
): F[RPeriodicTask] =
QUserTask
.create[F](
ut.enabled,
scope,
ut.name,
ut.args,
subject.getOrElse(s"${scope.fold(_.user.id, _.id)}: ${ut.name.id}"),
Priority.Low,
ut.timer,
ut.summary
)
.map(r => r.copy(id = ut.id))
}
}

View File

@ -20,7 +20,7 @@ sealed trait UserTaskScope { self: Product =>
/** Maps to the account or uses the collective for both parts if the scope is collective
* wide.
*/
private[usertask] def toAccountId: AccountId =
private[scheduler] def toAccountId: AccountId =
AccountId(collective, fold(_.user, identity))
}
@ -49,4 +49,7 @@ object UserTaskScope {
def apply(collective: Ident): UserTaskScope =
UserTaskScope.collective(collective)
def system: UserTaskScope =
collective(DocspellSystem.taskGroup)
}

View File

@ -7,10 +7,7 @@
package docspell.scheduler.usertask
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.store.{AddResult, Store}
import fs2.Stream
import io.circe._
@ -88,96 +85,11 @@ trait UserTaskStore[F[_]] {
/** Delete all tasks of the given user that have name `name`. */
def deleteAll(scope: UserTaskScope, name: Ident): F[Int]
}
object UserTaskStore {
def apply[F[_]: Async](store: Store[F]): Resource[F, UserTaskStore[F]] =
Resource.pure[F, UserTaskStore[F]](new UserTaskStore[F] {
def getAll(scope: UserTaskScope): Stream[F, UserTask[String]] =
store.transact(QUserTask.findAll(scope.toAccountId))
def getByNameRaw(scope: UserTaskScope, name: Ident): Stream[F, UserTask[String]] =
store.transact(QUserTask.findByName(scope.toAccountId, name))
def getByIdRaw(scope: UserTaskScope, id: Ident): OptionT[F, UserTask[String]] =
OptionT(store.transact(QUserTask.findById(scope.toAccountId, id)))
def getByName[A](scope: UserTaskScope, name: Ident)(implicit
D: Decoder[A]
): Stream[F, UserTask[A]] =
getByNameRaw(scope, name).flatMap(_.decode match {
case Right(ua) => Stream.emit(ua)
case Left(err) => Stream.raiseError[F](new Exception(err))
})
def updateTask[A](scope: UserTaskScope, subject: Option[String], ut: UserTask[A])(
implicit E: Encoder[A]
): F[Int] = {
val exists = QUserTask.exists(ut.id)
val insert = QUserTask.insert(scope, subject, ut.encode)
store.add(insert, exists).flatMap {
case AddResult.Success =>
1.pure[F]
case AddResult.EntityExists(_) =>
store.transact(QUserTask.update(scope, subject, ut.encode))
case AddResult.Failure(ex) =>
Async[F].raiseError(ex)
}
}
def deleteTask(scope: UserTaskScope, id: Ident): F[Int] =
store.transact(QUserTask.delete(scope.toAccountId, id))
def getOneByNameRaw(
scope: UserTaskScope,
name: Ident
): OptionT[F, UserTask[String]] =
OptionT(
getByNameRaw(scope, name)
.take(2)
.compile
.toList
.flatMap {
case Nil => (None: Option[UserTask[String]]).pure[F]
case ut :: Nil => ut.some.pure[F]
case _ => Async[F].raiseError(new Exception("More than one result found"))
}
)
def getOneByName[A](scope: UserTaskScope, name: Ident)(implicit
D: Decoder[A]
): OptionT[F, UserTask[A]] =
getOneByNameRaw(scope, name)
.semiflatMap(_.decode match {
case Right(ua) => ua.pure[F]
case Left(err) => Async[F].raiseError(new Exception(err))
})
def updateOneTask[A](
scope: UserTaskScope,
subject: Option[String],
ut: UserTask[A]
)(implicit
E: Encoder[A]
): F[UserTask[String]] =
getByNameRaw(scope, ut.name).compile.toList.flatMap {
case a :: rest =>
val task = ut.copy(id = a.id).encode
for {
_ <- store.transact(QUserTask.update(scope, subject, task))
_ <- store.transact(
rest.traverse(t => QUserTask.delete(scope.toAccountId, t.id))
)
} yield task
case Nil =>
val task = ut.encode
store.transact(QUserTask.insert(scope, subject, task)).map(_ => task)
}
def deleteAll(scope: UserTaskScope, name: Ident): F[Int] =
store.transact(QUserTask.deleteAll(scope.toAccountId, name))
})
/** Discards the schedule and immediately submits the task to the job executor's queue.
* It will not update the corresponding periodic task.
*/
def executeNow[A](scope: UserTaskScope, subject: Option[String], task: UserTask[A])(
implicit E: Encoder[A]
): F[Unit]
}

View File

@ -0,0 +1,21 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.scheduler
import docspell.common.Priority
import munit._
class CountingSchemeSpec extends FunSuite {
test("counting") {
val cs = CountingScheme(2, 1)
val list = List.iterate(cs.nextPriority, 6)(_._1.nextPriority).map(_._2)
val expect = List(Priority.High, Priority.High, Priority.Low)
assertEquals(list, expect ++ expect)
}
}