mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-03 18:00:11 +00:00 
			
		
		
		
	@@ -78,7 +78,14 @@ object AttachmentPageCount {
 | 
			
		||||
            s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
 | 
			
		||||
          ) *> ctx.store.transact(
 | 
			
		||||
            RAttachmentMeta.insert(
 | 
			
		||||
              RAttachmentMeta(ra.id, None, Nil, MetaProposalList.empty, md.pageCount.some)
 | 
			
		||||
              RAttachmentMeta(
 | 
			
		||||
                ra.id,
 | 
			
		||||
                None,
 | 
			
		||||
                Nil,
 | 
			
		||||
                MetaProposalList.empty,
 | 
			
		||||
                md.pageCount.some,
 | 
			
		||||
                None
 | 
			
		||||
              )
 | 
			
		||||
            )
 | 
			
		||||
          )
 | 
			
		||||
        else 0.pure[F]
 | 
			
		||||
 
 | 
			
		||||
@@ -108,7 +108,18 @@ object ConvertPdf {
 | 
			
		||||
        ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
 | 
			
		||||
          storePDF(ctx, cfg, ra, pdf)
 | 
			
		||||
            .flatMap(r =>
 | 
			
		||||
              txt.map(t => (r, item.changeMeta(ra.id, _.setContentIfEmpty(t.some)).some))
 | 
			
		||||
              txt.map(t =>
 | 
			
		||||
                (
 | 
			
		||||
                  r,
 | 
			
		||||
                  item
 | 
			
		||||
                    .changeMeta(
 | 
			
		||||
                      ra.id,
 | 
			
		||||
                      ctx.args.meta.language,
 | 
			
		||||
                      _.setContentIfEmpty(t.some)
 | 
			
		||||
                    )
 | 
			
		||||
                    .some
 | 
			
		||||
                )
 | 
			
		||||
              )
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
      case ConversionResult.UnsupportedFormat(mt) =>
 | 
			
		||||
 
 | 
			
		||||
@@ -32,8 +32,12 @@ case class ItemData(
 | 
			
		||||
  def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
 | 
			
		||||
    dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
 | 
			
		||||
 | 
			
		||||
  def mapMeta(attachId: Ident, f: RAttachmentMeta => RAttachmentMeta): ItemData = {
 | 
			
		||||
    val item = changeMeta(attachId, f)
 | 
			
		||||
  def mapMeta(
 | 
			
		||||
      attachId: Ident,
 | 
			
		||||
      lang: Language,
 | 
			
		||||
      f: RAttachmentMeta => RAttachmentMeta
 | 
			
		||||
  ): ItemData = {
 | 
			
		||||
    val item = changeMeta(attachId, lang, f)
 | 
			
		||||
    val next = metas.map(a => if (a.id == attachId) item else a)
 | 
			
		||||
    copy(metas = next)
 | 
			
		||||
  }
 | 
			
		||||
@@ -43,13 +47,14 @@ case class ItemData(
 | 
			
		||||
 | 
			
		||||
  def changeMeta(
 | 
			
		||||
      attachId: Ident,
 | 
			
		||||
      lang: Language,
 | 
			
		||||
      f: RAttachmentMeta => RAttachmentMeta
 | 
			
		||||
  ): RAttachmentMeta =
 | 
			
		||||
    f(findOrCreate(attachId))
 | 
			
		||||
    f(findOrCreate(attachId, lang))
 | 
			
		||||
 | 
			
		||||
  def findOrCreate(attachId: Ident): RAttachmentMeta =
 | 
			
		||||
  def findOrCreate(attachId: Ident, lang: Language): RAttachmentMeta =
 | 
			
		||||
    metas.find(_.id == attachId).getOrElse {
 | 
			
		||||
      RAttachmentMeta.empty(attachId)
 | 
			
		||||
      RAttachmentMeta.empty(attachId, lang)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -78,7 +78,7 @@ object TextExtraction {
 | 
			
		||||
        pair._2
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
    val rm = item.findOrCreate(ra.id)
 | 
			
		||||
    val rm = item.findOrCreate(ra.id, lang)
 | 
			
		||||
    rm.content match {
 | 
			
		||||
      case Some(_) =>
 | 
			
		||||
        ctx.logger.info("TextExtraction skipped, since text is already available.") *>
 | 
			
		||||
@@ -102,6 +102,7 @@ object TextExtraction {
 | 
			
		||||
      res  <- extractTextFallback(ctx, cfg, ra, lang)(fids)
 | 
			
		||||
      meta = item.changeMeta(
 | 
			
		||||
        ra.id,
 | 
			
		||||
        lang,
 | 
			
		||||
        rm =>
 | 
			
		||||
          rm.setContentIfEmpty(
 | 
			
		||||
            res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,35 @@
 | 
			
		||||
ALTER TABLE "attachmentmeta"
 | 
			
		||||
ADD COLUMN "language" varchar(254);
 | 
			
		||||
 | 
			
		||||
update "attachmentmeta"
 | 
			
		||||
set "language" = 'deu'
 | 
			
		||||
where "attachid" in (
 | 
			
		||||
  select "m"."attachid"
 | 
			
		||||
  from "attachmentmeta" m
 | 
			
		||||
  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
			
		||||
  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
			
		||||
  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
			
		||||
  where "c"."doclang" = 'deu'
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
update "attachmentmeta"
 | 
			
		||||
set "language" = 'eng'
 | 
			
		||||
where "attachid" in (
 | 
			
		||||
  select "m"."attachid"
 | 
			
		||||
  from "attachmentmeta" m
 | 
			
		||||
  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
			
		||||
  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
			
		||||
  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
			
		||||
  where "c"."doclang" = 'eng'
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
update "attachmentmeta"
 | 
			
		||||
set "language" = 'fra'
 | 
			
		||||
where "attachid" in (
 | 
			
		||||
  select "m"."attachid"
 | 
			
		||||
  from "attachmentmeta" m
 | 
			
		||||
  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
			
		||||
  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
			
		||||
  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
			
		||||
  where "c"."doclang" = 'fra'
 | 
			
		||||
);
 | 
			
		||||
@@ -0,0 +1,14 @@
 | 
			
		||||
ALTER TABLE `attachmentmeta`
 | 
			
		||||
ADD COLUMN (`language` varchar(254));
 | 
			
		||||
 | 
			
		||||
update `attachmentmeta` `m`
 | 
			
		||||
inner join (
 | 
			
		||||
    select `m`.`attachid`, `c`.`doclang`
 | 
			
		||||
    from `attachmentmeta` m
 | 
			
		||||
    inner join `attachment` a on `a`.`attachid` = `m`.`attachid`
 | 
			
		||||
    inner join `item` i on `a`.`itemid` = `i`.`itemid`
 | 
			
		||||
    inner join `collective` c on `c`.`cid` = `i`.`cid`
 | 
			
		||||
  ) as `c`
 | 
			
		||||
set `m`.`language` = `c`.`doclang`
 | 
			
		||||
where `m`.`attachid` = `c`.`attachid` and `m`.`language` is null;
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,15 @@
 | 
			
		||||
ALTER TABLE "attachmentmeta"
 | 
			
		||||
ADD COLUMN "language" varchar(254);
 | 
			
		||||
 | 
			
		||||
with
 | 
			
		||||
  "attachlang" as (
 | 
			
		||||
    select "m"."attachid", "m"."language", "c"."doclang"
 | 
			
		||||
    from "attachmentmeta" m
 | 
			
		||||
    inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
			
		||||
    inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
			
		||||
    inner join "collective" c on "c"."cid" = "i"."cid"
 | 
			
		||||
  )
 | 
			
		||||
update "attachmentmeta" as "m"
 | 
			
		||||
set "language" = "c"."doclang"
 | 
			
		||||
from "attachlang" c
 | 
			
		||||
where "m"."attachid" = "c"."attachid" and "m"."language" is null;
 | 
			
		||||
@@ -160,7 +160,15 @@ object QAttachment {
 | 
			
		||||
      chunkSize: Int
 | 
			
		||||
  ): Stream[ConnectionIO, ContentAndName] =
 | 
			
		||||
    Select(
 | 
			
		||||
      select(a.id, a.itemId, item.cid, item.folder, c.language, a.name, am.content),
 | 
			
		||||
      select(
 | 
			
		||||
        a.id.s,
 | 
			
		||||
        a.itemId.s,
 | 
			
		||||
        item.cid.s,
 | 
			
		||||
        item.folder.s,
 | 
			
		||||
        coalesce(am.language.s, c.language.s).s,
 | 
			
		||||
        a.name.s,
 | 
			
		||||
        am.content.s
 | 
			
		||||
      ),
 | 
			
		||||
      from(a)
 | 
			
		||||
        .innerJoin(am, am.id === a.id)
 | 
			
		||||
        .innerJoin(item, item.id === a.itemId)
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,8 @@ case class RAttachmentMeta(
 | 
			
		||||
    content: Option[String],
 | 
			
		||||
    nerlabels: List[NerLabel],
 | 
			
		||||
    proposals: MetaProposalList,
 | 
			
		||||
    pages: Option[Int]
 | 
			
		||||
    pages: Option[Int],
 | 
			
		||||
    language: Option[Language]
 | 
			
		||||
) {
 | 
			
		||||
 | 
			
		||||
  def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
 | 
			
		||||
@@ -27,8 +28,8 @@ case class RAttachmentMeta(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object RAttachmentMeta {
 | 
			
		||||
  def empty(attachId: Ident) =
 | 
			
		||||
    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None)
 | 
			
		||||
  def empty(attachId: Ident, lang: Language) =
 | 
			
		||||
    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
 | 
			
		||||
 | 
			
		||||
  final case class Table(alias: Option[String]) extends TableDef {
 | 
			
		||||
    val tableName = "attachmentmeta"
 | 
			
		||||
@@ -38,7 +39,9 @@ object RAttachmentMeta {
 | 
			
		||||
    val nerlabels = Column[List[NerLabel]]("nerlabels", this)
 | 
			
		||||
    val proposals = Column[MetaProposalList]("itemproposals", this)
 | 
			
		||||
    val pages     = Column[Int]("page_count", this)
 | 
			
		||||
    val all       = NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages)
 | 
			
		||||
    val language  = Column[Language]("language", this)
 | 
			
		||||
    val all =
 | 
			
		||||
      NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages, language)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val T = Table(None)
 | 
			
		||||
@@ -49,7 +52,7 @@ object RAttachmentMeta {
 | 
			
		||||
    DML.insert(
 | 
			
		||||
      T,
 | 
			
		||||
      T.all,
 | 
			
		||||
      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages}"
 | 
			
		||||
      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def exists(attachId: Ident): ConnectionIO[Boolean] =
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user