Restrict proposals for due date

Avoid dates too far in the future.
This commit is contained in:
Eike Kettner 2020-06-26 01:27:11 +02:00
parent 91da3b149e
commit d79ae6233a
6 changed files with 53 additions and 11 deletions

View File

@ -53,6 +53,9 @@ object Duration {
def days(n: Long): Duration =
apply(JDur.ofDays(n))
def years(n: Long): Duration =
days(n * 365)
def nanos(n: Long): Duration =
Duration(n)

View File

@ -341,6 +341,13 @@ docspell.joex {
}
}
# General config for processing documents
processing {
# Restricts proposals for due dates. Only dates earlier than this
# number of years in the future are considered.
max-due-date-years = 10
}
# The same section is also present in the rest-server config. It is
# used when submitting files into the job queue for processing.
#

View File

@ -25,7 +25,8 @@ case class Config(
sendMail: MailSendConfig,
files: Files,
mailDebug: Boolean,
fullTextSearch: Config.FullTextSearch
fullTextSearch: Config.FullTextSearch,
processing: Config.Processing
)
object Config {
@ -47,4 +48,6 @@ object Config {
final case class Migration(indexAllChunk: Int)
}
case class Processing(maxDueDateYears: Int)
}

View File

@ -8,6 +8,7 @@ import cats.effect.Sync
import docspell.analysis.contact._
import docspell.common.MetaProposal.Candidate
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records._
@ -16,33 +17,42 @@ import docspell.store.records._
*/
object FindProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def apply[F[_]: Sync](
cfg: Config.Processing
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
ctx.logger.info("Starting find-proposal") *>
rmas
.traverse(rm =>
processAttachment(rm, data.findDates(rm), ctx)
processAttachment(cfg, rm, data.findDates(rm), ctx)
.map(ml => rm.copy(proposals = ml))
)
.map(rmv => data.copy(metas = rmv))
}
def processAttachment[F[_]: Sync](
cfg: Config.Processing,
rm: RAttachmentMeta,
rd: Vector[NerDateLabel],
ctx: Context[F, ProcessItemArgs]
): F[MetaProposalList] = {
val finder = Finder.searchExact(ctx).next(Finder.searchFuzzy(ctx))
List(finder.find(rm.nerlabels), makeDateProposal(rd))
List(finder.find(rm.nerlabels), makeDateProposal(cfg, rd))
.traverse(identity)
.map(MetaProposalList.flatten)
}
def makeDateProposal[F[_]: Sync](dates: Vector[NerDateLabel]): F[MetaProposalList] =
def makeDateProposal[F[_]: Sync](
cfg: Config.Processing,
dates: Vector[NerDateLabel]
): F[MetaProposalList] =
Timestamp.current[F].map { now =>
val latestFirst = dates.sortWith((l1, l2) => l1.date.isAfter(l2.date))
val maxFuture = now.plus(Duration.years(cfg.maxDueDateYears.toLong))
val latestFirst = dates
.filter(_.date.isBefore(maxFuture.toUtcDate))
.sortWith((l1, l2) => l1.date.isAfter(l2.date))
val nowDate = now.value.atZone(ZoneId.of("GMT")).toLocalDate
val (after, before) = latestFirst.span(ndl => ndl.date.isAfter(nowDate))

View File

@ -2,7 +2,6 @@ package docspell.joex.process
import cats.effect._
import docspell.common.ProcessItemArgs
import docspell.analysis.TextAnalysisConfig
import docspell.joex.scheduler.Task
import docspell.joex.Config
import docspell.ftsclient.FtsClient
@ -19,16 +18,16 @@ object ProcessItem {
.flatMap(Task.setProgress(40))
.flatMap(TextExtraction(cfg.extraction, fts))
.flatMap(Task.setProgress(60))
.flatMap(analysisOnly[F](cfg.textAnalysis))
.flatMap(analysisOnly[F](cfg))
.flatMap(Task.setProgress(80))
.flatMap(LinkProposal[F])
.flatMap(Task.setProgress(99))
def analysisOnly[F[_]: Sync](
cfg: TextAnalysisConfig
cfg: Config
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextAnalysis[F](cfg)(item)
.flatMap(FindProposal[F])
TextAnalysis[F](cfg.textAnalysis)(item)
.flatMap(FindProposal[F](cfg.processing))
.flatMap(EvalProposals[F])
.flatMap(SaveProposals[F])

View File

@ -92,6 +92,9 @@ let
text-analysis = {
max-length = 10000;
};
processing = {
max-due-date-years = 10;
};
convert = {
chunk-size = 524288;
max-image-size = 14000000;
@ -666,6 +669,23 @@ in {
description = "Settings for text analysis";
};
processing = mkOption {
type = types.submodule({
options = {
max-due-date-years = mkOption {
type = types.int;
default = defaults.processing.max-due-date-years;
description = ''
Restricts proposals for due dates. Only dates earlier than this
number of years in the future are considered.
'';
};
};
});
default = defaults.processing;
description = "General config for processing documents";
};
convert = mkOption {
type = types.submodule({
options = {