mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 02:49:32 +00:00
parent
6cf3f9be5a
commit
a70e9ab614
@ -78,7 +78,14 @@ object AttachmentPageCount {
|
||||
s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
|
||||
) *> ctx.store.transact(
|
||||
RAttachmentMeta.insert(
|
||||
RAttachmentMeta(ra.id, None, Nil, MetaProposalList.empty, md.pageCount.some)
|
||||
RAttachmentMeta(
|
||||
ra.id,
|
||||
None,
|
||||
Nil,
|
||||
MetaProposalList.empty,
|
||||
md.pageCount.some,
|
||||
None
|
||||
)
|
||||
)
|
||||
)
|
||||
else 0.pure[F]
|
||||
|
@ -108,7 +108,18 @@ object ConvertPdf {
|
||||
ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
|
||||
storePDF(ctx, cfg, ra, pdf)
|
||||
.flatMap(r =>
|
||||
txt.map(t => (r, item.changeMeta(ra.id, _.setContentIfEmpty(t.some)).some))
|
||||
txt.map(t =>
|
||||
(
|
||||
r,
|
||||
item
|
||||
.changeMeta(
|
||||
ra.id,
|
||||
ctx.args.meta.language,
|
||||
_.setContentIfEmpty(t.some)
|
||||
)
|
||||
.some
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
case ConversionResult.UnsupportedFormat(mt) =>
|
||||
|
@ -32,8 +32,12 @@ case class ItemData(
|
||||
def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
|
||||
dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
|
||||
|
||||
def mapMeta(attachId: Ident, f: RAttachmentMeta => RAttachmentMeta): ItemData = {
|
||||
val item = changeMeta(attachId, f)
|
||||
def mapMeta(
|
||||
attachId: Ident,
|
||||
lang: Language,
|
||||
f: RAttachmentMeta => RAttachmentMeta
|
||||
): ItemData = {
|
||||
val item = changeMeta(attachId, lang, f)
|
||||
val next = metas.map(a => if (a.id == attachId) item else a)
|
||||
copy(metas = next)
|
||||
}
|
||||
@ -43,13 +47,14 @@ case class ItemData(
|
||||
|
||||
def changeMeta(
|
||||
attachId: Ident,
|
||||
lang: Language,
|
||||
f: RAttachmentMeta => RAttachmentMeta
|
||||
): RAttachmentMeta =
|
||||
f(findOrCreate(attachId))
|
||||
f(findOrCreate(attachId, lang))
|
||||
|
||||
def findOrCreate(attachId: Ident): RAttachmentMeta =
|
||||
def findOrCreate(attachId: Ident, lang: Language): RAttachmentMeta =
|
||||
metas.find(_.id == attachId).getOrElse {
|
||||
RAttachmentMeta.empty(attachId)
|
||||
RAttachmentMeta.empty(attachId, lang)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -78,7 +78,7 @@ object TextExtraction {
|
||||
pair._2
|
||||
)
|
||||
|
||||
val rm = item.findOrCreate(ra.id)
|
||||
val rm = item.findOrCreate(ra.id, lang)
|
||||
rm.content match {
|
||||
case Some(_) =>
|
||||
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
|
||||
@ -102,6 +102,7 @@ object TextExtraction {
|
||||
res <- extractTextFallback(ctx, cfg, ra, lang)(fids)
|
||||
meta = item.changeMeta(
|
||||
ra.id,
|
||||
lang,
|
||||
rm =>
|
||||
rm.setContentIfEmpty(
|
||||
res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
|
||||
|
@ -0,0 +1,35 @@
|
||||
ALTER TABLE "attachmentmeta"
|
||||
ADD COLUMN "language" varchar(254);
|
||||
|
||||
update "attachmentmeta"
|
||||
set "language" = 'deu'
|
||||
where "attachid" in (
|
||||
select "m"."attachid"
|
||||
from "attachmentmeta" m
|
||||
inner join "attachment" a on "a"."attachid" = "m"."attachid"
|
||||
inner join "item" i on "a"."itemid" = "i"."itemid"
|
||||
inner join "collective" c on "c"."cid" = "i"."cid"
|
||||
where "c"."doclang" = 'deu'
|
||||
);
|
||||
|
||||
update "attachmentmeta"
|
||||
set "language" = 'eng'
|
||||
where "attachid" in (
|
||||
select "m"."attachid"
|
||||
from "attachmentmeta" m
|
||||
inner join "attachment" a on "a"."attachid" = "m"."attachid"
|
||||
inner join "item" i on "a"."itemid" = "i"."itemid"
|
||||
inner join "collective" c on "c"."cid" = "i"."cid"
|
||||
where "c"."doclang" = 'eng'
|
||||
);
|
||||
|
||||
update "attachmentmeta"
|
||||
set "language" = 'fra'
|
||||
where "attachid" in (
|
||||
select "m"."attachid"
|
||||
from "attachmentmeta" m
|
||||
inner join "attachment" a on "a"."attachid" = "m"."attachid"
|
||||
inner join "item" i on "a"."itemid" = "i"."itemid"
|
||||
inner join "collective" c on "c"."cid" = "i"."cid"
|
||||
where "c"."doclang" = 'fra'
|
||||
);
|
@ -0,0 +1,14 @@
|
||||
ALTER TABLE `attachmentmeta`
|
||||
ADD COLUMN (`language` varchar(254));
|
||||
|
||||
update `attachmentmeta` `m`
|
||||
inner join (
|
||||
select `m`.`attachid`, `c`.`doclang`
|
||||
from `attachmentmeta` m
|
||||
inner join `attachment` a on `a`.`attachid` = `m`.`attachid`
|
||||
inner join `item` i on `a`.`itemid` = `i`.`itemid`
|
||||
inner join `collective` c on `c`.`cid` = `i`.`cid`
|
||||
) as `c`
|
||||
set `m`.`language` = `c`.`doclang`
|
||||
where `m`.`attachid` = `c`.`attachid` and `m`.`language` is null;
|
||||
|
@ -0,0 +1,15 @@
|
||||
ALTER TABLE "attachmentmeta"
|
||||
ADD COLUMN "language" varchar(254);
|
||||
|
||||
with
|
||||
"attachlang" as (
|
||||
select "m"."attachid", "m"."language", "c"."doclang"
|
||||
from "attachmentmeta" m
|
||||
inner join "attachment" a on "a"."attachid" = "m"."attachid"
|
||||
inner join "item" i on "a"."itemid" = "i"."itemid"
|
||||
inner join "collective" c on "c"."cid" = "i"."cid"
|
||||
)
|
||||
update "attachmentmeta" as "m"
|
||||
set "language" = "c"."doclang"
|
||||
from "attachlang" c
|
||||
where "m"."attachid" = "c"."attachid" and "m"."language" is null;
|
@ -160,7 +160,15 @@ object QAttachment {
|
||||
chunkSize: Int
|
||||
): Stream[ConnectionIO, ContentAndName] =
|
||||
Select(
|
||||
select(a.id, a.itemId, item.cid, item.folder, c.language, a.name, am.content),
|
||||
select(
|
||||
a.id.s,
|
||||
a.itemId.s,
|
||||
item.cid.s,
|
||||
item.folder.s,
|
||||
coalesce(am.language.s, c.language.s).s,
|
||||
a.name.s,
|
||||
am.content.s
|
||||
),
|
||||
from(a)
|
||||
.innerJoin(am, am.id === a.id)
|
||||
.innerJoin(item, item.id === a.itemId)
|
||||
|
@ -15,7 +15,8 @@ case class RAttachmentMeta(
|
||||
content: Option[String],
|
||||
nerlabels: List[NerLabel],
|
||||
proposals: MetaProposalList,
|
||||
pages: Option[Int]
|
||||
pages: Option[Int],
|
||||
language: Option[Language]
|
||||
) {
|
||||
|
||||
def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
|
||||
@ -27,8 +28,8 @@ case class RAttachmentMeta(
|
||||
}
|
||||
|
||||
object RAttachmentMeta {
|
||||
def empty(attachId: Ident) =
|
||||
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None)
|
||||
def empty(attachId: Ident, lang: Language) =
|
||||
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
|
||||
|
||||
final case class Table(alias: Option[String]) extends TableDef {
|
||||
val tableName = "attachmentmeta"
|
||||
@ -38,7 +39,9 @@ object RAttachmentMeta {
|
||||
val nerlabels = Column[List[NerLabel]]("nerlabels", this)
|
||||
val proposals = Column[MetaProposalList]("itemproposals", this)
|
||||
val pages = Column[Int]("page_count", this)
|
||||
val all = NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages)
|
||||
val language = Column[Language]("language", this)
|
||||
val all =
|
||||
NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages, language)
|
||||
}
|
||||
|
||||
val T = Table(None)
|
||||
@ -49,7 +52,7 @@ object RAttachmentMeta {
|
||||
DML.insert(
|
||||
T,
|
||||
T.all,
|
||||
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages}"
|
||||
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
|
||||
)
|
||||
|
||||
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
||||
|
Loading…
x
Reference in New Issue
Block a user