mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-02-15 20:33:26 +00:00
Restrict proposals for due date
Avoid dates too far in the future.
This commit is contained in:
parent
91da3b149e
commit
d79ae6233a
@ -53,6 +53,9 @@ object Duration {
|
||||
def days(n: Long): Duration =
|
||||
apply(JDur.ofDays(n))
|
||||
|
||||
def years(n: Long): Duration =
|
||||
days(n * 365)
|
||||
|
||||
def nanos(n: Long): Duration =
|
||||
Duration(n)
|
||||
|
||||
|
@ -341,6 +341,13 @@ docspell.joex {
|
||||
}
|
||||
}
|
||||
|
||||
# General config for processing documents
|
||||
processing {
|
||||
# Restricts proposals for due dates. Only dates earlier than this
|
||||
# number of years in the future are considered.
|
||||
max-due-date-years = 10
|
||||
}
|
||||
|
||||
# The same section is also present in the rest-server config. It is
|
||||
# used when submitting files into the job queue for processing.
|
||||
#
|
||||
|
@ -25,7 +25,8 @@ case class Config(
|
||||
sendMail: MailSendConfig,
|
||||
files: Files,
|
||||
mailDebug: Boolean,
|
||||
fullTextSearch: Config.FullTextSearch
|
||||
fullTextSearch: Config.FullTextSearch,
|
||||
processing: Config.Processing
|
||||
)
|
||||
|
||||
object Config {
|
||||
@ -47,4 +48,6 @@ object Config {
|
||||
|
||||
final case class Migration(indexAllChunk: Int)
|
||||
}
|
||||
|
||||
case class Processing(maxDueDateYears: Int)
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import cats.effect.Sync
|
||||
import docspell.analysis.contact._
|
||||
import docspell.common.MetaProposal.Candidate
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler.{Context, Task}
|
||||
import docspell.store.records._
|
||||
|
||||
@ -16,33 +17,42 @@ import docspell.store.records._
|
||||
*/
|
||||
object FindProposal {
|
||||
|
||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
def apply[F[_]: Sync](
|
||||
cfg: Config.Processing
|
||||
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { ctx =>
|
||||
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
|
||||
|
||||
ctx.logger.info("Starting find-proposal") *>
|
||||
rmas
|
||||
.traverse(rm =>
|
||||
processAttachment(rm, data.findDates(rm), ctx)
|
||||
processAttachment(cfg, rm, data.findDates(rm), ctx)
|
||||
.map(ml => rm.copy(proposals = ml))
|
||||
)
|
||||
.map(rmv => data.copy(metas = rmv))
|
||||
}
|
||||
|
||||
def processAttachment[F[_]: Sync](
|
||||
cfg: Config.Processing,
|
||||
rm: RAttachmentMeta,
|
||||
rd: Vector[NerDateLabel],
|
||||
ctx: Context[F, ProcessItemArgs]
|
||||
): F[MetaProposalList] = {
|
||||
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx))
|
||||
List(finder.find(rm.nerlabels), makeDateProposal(rd))
|
||||
List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd))
|
||||
.traverse(identity)
|
||||
.map(MetaProposalList.flatten)
|
||||
}
|
||||
|
||||
def makeDateProposal[F[_]: Sync](dates: Vector[NerDateLabel]): F[MetaProposalList] =
|
||||
def makeDateProposal[F[_]: Sync](
|
||||
cfg: Config.Processing,
|
||||
dates: Vector[NerDateLabel]
|
||||
): F[MetaProposalList] =
|
||||
Timestamp.current[F].map { now =>
|
||||
val latestFirst = dates.sortWith((l1, l2) => l1.date.isAfter(l2.date))
|
||||
val maxFuture = now.plus(Duration.years(cfg.maxDueDateYears.toLong))
|
||||
val latestFirst = dates
|
||||
.filter(_.date.isBefore(maxFuture.toUtcDate))
|
||||
.sortWith((l1, l2) => l1.date.isAfter(l2.date))
|
||||
val nowDate = now.value.atZone(ZoneId.of("GMT")).toLocalDate
|
||||
val (after, before) = latestFirst.span(ndl => ndl.date.isAfter(nowDate))
|
||||
|
||||
|
@ -2,7 +2,6 @@ package docspell.joex.process
|
||||
|
||||
import cats.effect._
|
||||
import docspell.common.ProcessItemArgs
|
||||
import docspell.analysis.TextAnalysisConfig
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.joex.Config
|
||||
import docspell.ftsclient.FtsClient
|
||||
@ -19,16 +18,16 @@ object ProcessItem {
|
||||
.flatMap(Task.setProgress(40))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(Task.setProgress(60))
|
||||
.flatMap(analysisOnly[F](cfg.textAnalysis))
|
||||
.flatMap(analysisOnly[F](cfg))
|
||||
.flatMap(Task.setProgress(80))
|
||||
.flatMap(LinkProposal[F])
|
||||
.flatMap(Task.setProgress(99))
|
||||
|
||||
def analysisOnly[F[_]: Sync](
|
||||
cfg: TextAnalysisConfig
|
||||
cfg: Config
|
||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
TextAnalysis[F](cfg)(item)
|
||||
.flatMap(FindProposal[F])
|
||||
TextAnalysis[F](cfg.textAnalysis)(item)
|
||||
.flatMap(FindProposal[F](cfg.processing))
|
||||
.flatMap(EvalProposals[F])
|
||||
.flatMap(SaveProposals[F])
|
||||
|
||||
|
@ -92,6 +92,9 @@ let
|
||||
text-analysis = {
|
||||
max-length = 10000;
|
||||
};
|
||||
processing = {
|
||||
max-due-date-years = 10;
|
||||
};
|
||||
convert = {
|
||||
chunk-size = 524288;
|
||||
max-image-size = 14000000;
|
||||
@ -666,6 +669,23 @@ in {
|
||||
description = "Settings for text analysis";
|
||||
};
|
||||
|
||||
processing = mkOption {
|
||||
type = types.submodule({
|
||||
options = {
|
||||
max-due-date-years = mkOption {
|
||||
type = types.int;
|
||||
default = defaults.processing.max-due-date-years;
|
||||
description = ''
|
||||
Restricts proposals for due dates. Only dates earlier than this
|
||||
number of years in the future are considered.
|
||||
'';
|
||||
};
|
||||
};
|
||||
});
|
||||
default = defaults.processing;
|
||||
description = "General config for processing documents";
|
||||
};
|
||||
|
||||
convert = mkOption {
|
||||
type = types.submodule({
|
||||
options = {
|
||||
|
Loading…
Reference in New Issue
Block a user