Move date proposal setting to nlp config

This commit is contained in:
Eike Kettner 2021-01-20 19:17:29 +01:00
parent 5d366c3bd6
commit 85ddc61d9d
4 changed files with 16 additions and 17 deletions

View File

@ -319,6 +319,10 @@ docspell.joex {
# This has only any effect, if mode != disabled. # This has only any effect, if mode != disabled.
clear-interval = "15 minutes" clear-interval = "15 minutes"
# Restricts proposals for due dates. Only dates earlier than this
# number of years in the future are considered.
max-due-date-years = 10
regex-ner { regex-ner {
# Whether to enable custom NER annotation. This uses the # Whether to enable custom NER annotation. This uses the
# address book of a collective as input for NER tagging (to # address book of a collective as input for NER tagging (to
@ -517,13 +521,6 @@ docspell.joex {
} }
} }
# General config for processing documents
processing {
# Restricts proposals for due dates. Only dates earlier than this
# number of years in the future are considered.
max-due-date-years = 10
}
# The same section is also present in the rest-server config. It is # The same section is also present in the rest-server config. It is
# used when submitting files into the job queue for processing. # used when submitting files into the job queue for processing.
# #

View File

@ -31,8 +31,7 @@ case class Config(
sendMail: MailSendConfig, sendMail: MailSendConfig,
files: Files, files: Files,
mailDebug: Boolean, mailDebug: Boolean,
fullTextSearch: Config.FullTextSearch, fullTextSearch: Config.FullTextSearch
processing: Config.Processing
) )
object Config { object Config {
@ -55,8 +54,6 @@ object Config {
final case class Migration(indexAllChunk: Int) final case class Migration(indexAllChunk: Int)
} }
case class Processing(maxDueDateYears: Int)
case class TextAnalysis( case class TextAnalysis(
maxLength: Int, maxLength: Int,
workingDir: Path, workingDir: Path,
@ -84,7 +81,12 @@ object Config {
) )
} }
case class NlpConfig(mode: NlpMode, clearInterval: Duration, regexNer: RegexNer) case class NlpConfig(
mode: NlpMode,
clearInterval: Duration,
maxDueDateYears: Int,
regexNer: RegexNer
)
case class RegexNer(maxEntries: Int, fileCacheTime: Duration) case class RegexNer(maxEntries: Int, fileCacheTime: Duration)

View File

@ -20,7 +20,7 @@ object FindProposal {
type Args = ProcessItemArgs type Args = ProcessItemArgs
def apply[F[_]: Sync]( def apply[F[_]: Sync](
cfg: Config.Processing cfg: Config.TextAnalysis
)(data: ItemData): Task[F, Args, ItemData] = )(data: ItemData): Task[F, Args, ItemData] =
Task { ctx => Task { ctx =>
val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels))) val rmas = data.metas.map(rm => rm.copy(nerlabels = removeDuplicates(rm.nerlabels)))
@ -102,7 +102,7 @@ object FindProposal {
} }
def processAttachment[F[_]: Sync]( def processAttachment[F[_]: Sync](
cfg: Config.Processing, cfg: Config.TextAnalysis,
rm: RAttachmentMeta, rm: RAttachmentMeta,
rd: Vector[NerDateLabel], rd: Vector[NerDateLabel],
ctx: Context[F, ProcessItemArgs] ctx: Context[F, ProcessItemArgs]
@ -114,11 +114,11 @@ object FindProposal {
} }
def makeDateProposal[F[_]: Sync]( def makeDateProposal[F[_]: Sync](
cfg: Config.Processing, cfg: Config.TextAnalysis,
dates: Vector[NerDateLabel] dates: Vector[NerDateLabel]
): F[MetaProposalList] = ): F[MetaProposalList] =
Timestamp.current[F].map { now => Timestamp.current[F].map { now =>
val maxFuture = now.plus(Duration.years(cfg.maxDueDateYears.toLong)) val maxFuture = now.plus(Duration.years(cfg.nlp.maxDueDateYears.toLong))
val latestFirst = dates val latestFirst = dates
.filter(_.date.isBefore(maxFuture.toUtcDate)) .filter(_.date.isBefore(maxFuture.toUtcDate))
.sortWith((l1, l2) => l1.date.isAfter(l2.date)) .sortWith((l1, l2) => l1.date.isAfter(l2.date))

View File

@ -41,7 +41,7 @@ object ProcessItem {
regexNer: RegexNerFile[F] regexNer: RegexNerFile[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] = )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item) TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item)
.flatMap(FindProposal[F](cfg.processing)) .flatMap(FindProposal[F](cfg.textAnalysis))
.flatMap(EvalProposals[F]) .flatMap(EvalProposals[F])
.flatMap(SaveProposals[F]) .flatMap(SaveProposals[F])