Add language to schema, extend fts-client

This commit is contained in:
Eike Kettner
2020-06-20 22:27:26 +02:00
parent 3576c45d1a
commit 1f4ff0d4c4
13 changed files with 145 additions and 54 deletions

View File

@ -89,7 +89,14 @@ object Migration {
)
.map(caa =>
TextData
.attachment(caa.item, caa.id, caa.collective, caa.name, caa.content)
.attachment(
caa.item,
caa.id,
caa.collective,
caa.lang,
caa.name,
caa.content
)
)
)
)

View File

@ -1,6 +1,5 @@
package docspell.joex.process
import fs2.Stream
import bitpeace.{Mimetype, RangeDef}
import cats.data.OptionT
import cats.implicits._
@ -30,9 +29,11 @@ object TextExtraction {
item
)
)
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
_ <- fts.indexData(ctx.logger, Stream.emits(txt.map(_._2)))
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
idxItem =
TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None)
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*)
dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
} yield item.copy(metas = txt.map(_._1))
@ -52,6 +53,7 @@ object TextExtraction {
item.item.id,
ra.id,
collective,
lang,
ra.name,
rm.content
)