mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Add api docs and cleanup
This commit is contained in:
@ -87,7 +87,7 @@ object JoexAppImpl {
|
||||
joex <- OJoex(client, store)
|
||||
upload <- OUpload(store, queue, cfg.files, joex)
|
||||
fts <- createFtsClient(cfg)(httpClient)
|
||||
itemOps <- OItem(store, fts)
|
||||
itemOps <- OItem(store, fts, queue, joex)
|
||||
javaEmil =
|
||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
|
||||
|
@ -11,15 +11,19 @@ import docspell.store.queue.JobQueue
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records._
|
||||
|
||||
/* A task to find all non-converted pdf files (of a collective, or
|
||||
* all) and converting them using ocrmypdf by submitting a job for
|
||||
* each found file.
|
||||
*/
|
||||
object ConvertAllPdfTask {
|
||||
type Args = ConvertAllPdfArgs
|
||||
|
||||
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
_ <- ctx.logger.info("Converting older pdfs using ocrmypdf")
|
||||
_ <- ctx.logger.info("Converting pdfs using ocrmypdf")
|
||||
n <- submitConversionJobs(ctx, queue)
|
||||
_ <- ctx.logger.info(s"Submitted $n jobs for file conversion")
|
||||
_ <- ctx.logger.info(s"Submitted $n file conversion jobs")
|
||||
_ <- joex.notifyAllNodes
|
||||
} yield ()
|
||||
}
|
||||
@ -36,7 +40,7 @@ object ConvertAllPdfTask {
|
||||
.chunks
|
||||
.flatMap(createJobs[F](ctx))
|
||||
.chunks
|
||||
.evalMap(jobs => queue.insertAll(jobs.toVector).map(_ => jobs.size))
|
||||
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
|
||||
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
|
||||
.compile
|
||||
.foldMonoid
|
||||
@ -59,7 +63,7 @@ object ConvertAllPdfTask {
|
||||
now,
|
||||
collectiveOrSystem,
|
||||
Priority.Low,
|
||||
Some(ra.id)
|
||||
Some(PdfConvTask.taskName / ra.id)
|
||||
)
|
||||
|
||||
val jobs = ras.traverse(mkJob)
|
||||
|
@ -17,12 +17,7 @@ object ProcessItem {
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
ExtractArchive(item)
|
||||
.flatMap(Task.setProgress(20))
|
||||
.flatMap(ConvertPdf(cfg.convert, _))
|
||||
.flatMap(Task.setProgress(40))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(Task.setProgress(60))
|
||||
.flatMap(analysisOnly[F](cfg))
|
||||
.flatMap(Task.setProgress(80))
|
||||
.flatMap(processAttachments0(cfg, fts, (40, 60, 80)))
|
||||
.flatMap(LinkProposal[F])
|
||||
.flatMap(SetGivenData[F](itemOps))
|
||||
.flatMap(Task.setProgress(99))
|
||||
@ -31,12 +26,7 @@ object ProcessItem {
|
||||
cfg: Config,
|
||||
fts: FtsClient[F]
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
ConvertPdf(cfg.convert, item)
|
||||
.flatMap(Task.setProgress(30))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(Task.setProgress(60))
|
||||
.flatMap(analysisOnly[F](cfg))
|
||||
.flatMap(Task.setProgress(90))
|
||||
processAttachments0[F](cfg, fts, (30, 60, 90))(item)
|
||||
|
||||
def analysisOnly[F[_]: Sync](
|
||||
cfg: Config
|
||||
@ -45,4 +35,16 @@ object ProcessItem {
|
||||
.flatMap(FindProposal[F](cfg.processing))
|
||||
.flatMap(EvalProposals[F])
|
||||
.flatMap(SaveProposals[F])
|
||||
|
||||
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
fts: FtsClient[F],
|
||||
progress: (Int, Int, Int)
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
ConvertPdf(cfg.convert, item)
|
||||
.flatMap(Task.setProgress(progress._1))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(Task.setProgress(progress._2))
|
||||
.flatMap(analysisOnly[F](cfg))
|
||||
.flatMap(Task.setProgress(progress._3))
|
||||
}
|
||||
|
Reference in New Issue
Block a user