mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-03 18:00:11 +00:00 
			
		
		
		
	@@ -78,7 +78,14 @@ object AttachmentPageCount {
 | 
				
			|||||||
            s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
 | 
					            s"No attachmentmeta record exists for ${ra.id.id}. Creating new."
 | 
				
			||||||
          ) *> ctx.store.transact(
 | 
					          ) *> ctx.store.transact(
 | 
				
			||||||
            RAttachmentMeta.insert(
 | 
					            RAttachmentMeta.insert(
 | 
				
			||||||
              RAttachmentMeta(ra.id, None, Nil, MetaProposalList.empty, md.pageCount.some)
 | 
					              RAttachmentMeta(
 | 
				
			||||||
 | 
					                ra.id,
 | 
				
			||||||
 | 
					                None,
 | 
				
			||||||
 | 
					                Nil,
 | 
				
			||||||
 | 
					                MetaProposalList.empty,
 | 
				
			||||||
 | 
					                md.pageCount.some,
 | 
				
			||||||
 | 
					                None
 | 
				
			||||||
 | 
					              )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
          )
 | 
					          )
 | 
				
			||||||
        else 0.pure[F]
 | 
					        else 0.pure[F]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -108,7 +108,18 @@ object ConvertPdf {
 | 
				
			|||||||
        ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
 | 
					        ctx.logger.info(s"Conversion to pdf+txt successful. Saving file.") *>
 | 
				
			||||||
          storePDF(ctx, cfg, ra, pdf)
 | 
					          storePDF(ctx, cfg, ra, pdf)
 | 
				
			||||||
            .flatMap(r =>
 | 
					            .flatMap(r =>
 | 
				
			||||||
              txt.map(t => (r, item.changeMeta(ra.id, _.setContentIfEmpty(t.some)).some))
 | 
					              txt.map(t =>
 | 
				
			||||||
 | 
					                (
 | 
				
			||||||
 | 
					                  r,
 | 
				
			||||||
 | 
					                  item
 | 
				
			||||||
 | 
					                    .changeMeta(
 | 
				
			||||||
 | 
					                      ra.id,
 | 
				
			||||||
 | 
					                      ctx.args.meta.language,
 | 
				
			||||||
 | 
					                      _.setContentIfEmpty(t.some)
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                    .some
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					              )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      case ConversionResult.UnsupportedFormat(mt) =>
 | 
					      case ConversionResult.UnsupportedFormat(mt) =>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,8 +32,12 @@ case class ItemData(
 | 
				
			|||||||
  def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
 | 
					  def findDates(rm: RAttachmentMeta): Vector[NerDateLabel] =
 | 
				
			||||||
    dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
 | 
					    dateLabels.find(m => m.rm.id == rm.id).map(_.dates).getOrElse(Vector.empty)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def mapMeta(attachId: Ident, f: RAttachmentMeta => RAttachmentMeta): ItemData = {
 | 
					  def mapMeta(
 | 
				
			||||||
    val item = changeMeta(attachId, f)
 | 
					      attachId: Ident,
 | 
				
			||||||
 | 
					      lang: Language,
 | 
				
			||||||
 | 
					      f: RAttachmentMeta => RAttachmentMeta
 | 
				
			||||||
 | 
					  ): ItemData = {
 | 
				
			||||||
 | 
					    val item = changeMeta(attachId, lang, f)
 | 
				
			||||||
    val next = metas.map(a => if (a.id == attachId) item else a)
 | 
					    val next = metas.map(a => if (a.id == attachId) item else a)
 | 
				
			||||||
    copy(metas = next)
 | 
					    copy(metas = next)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -43,13 +47,14 @@ case class ItemData(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  def changeMeta(
 | 
					  def changeMeta(
 | 
				
			||||||
      attachId: Ident,
 | 
					      attachId: Ident,
 | 
				
			||||||
 | 
					      lang: Language,
 | 
				
			||||||
      f: RAttachmentMeta => RAttachmentMeta
 | 
					      f: RAttachmentMeta => RAttachmentMeta
 | 
				
			||||||
  ): RAttachmentMeta =
 | 
					  ): RAttachmentMeta =
 | 
				
			||||||
    f(findOrCreate(attachId))
 | 
					    f(findOrCreate(attachId, lang))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def findOrCreate(attachId: Ident): RAttachmentMeta =
 | 
					  def findOrCreate(attachId: Ident, lang: Language): RAttachmentMeta =
 | 
				
			||||||
    metas.find(_.id == attachId).getOrElse {
 | 
					    metas.find(_.id == attachId).getOrElse {
 | 
				
			||||||
      RAttachmentMeta.empty(attachId)
 | 
					      RAttachmentMeta.empty(attachId, lang)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -78,7 +78,7 @@ object TextExtraction {
 | 
				
			|||||||
        pair._2
 | 
					        pair._2
 | 
				
			||||||
      )
 | 
					      )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    val rm = item.findOrCreate(ra.id)
 | 
					    val rm = item.findOrCreate(ra.id, lang)
 | 
				
			||||||
    rm.content match {
 | 
					    rm.content match {
 | 
				
			||||||
      case Some(_) =>
 | 
					      case Some(_) =>
 | 
				
			||||||
        ctx.logger.info("TextExtraction skipped, since text is already available.") *>
 | 
					        ctx.logger.info("TextExtraction skipped, since text is already available.") *>
 | 
				
			||||||
@@ -102,6 +102,7 @@ object TextExtraction {
 | 
				
			|||||||
      res  <- extractTextFallback(ctx, cfg, ra, lang)(fids)
 | 
					      res  <- extractTextFallback(ctx, cfg, ra, lang)(fids)
 | 
				
			||||||
      meta = item.changeMeta(
 | 
					      meta = item.changeMeta(
 | 
				
			||||||
        ra.id,
 | 
					        ra.id,
 | 
				
			||||||
 | 
					        lang,
 | 
				
			||||||
        rm =>
 | 
					        rm =>
 | 
				
			||||||
          rm.setContentIfEmpty(
 | 
					          rm.setContentIfEmpty(
 | 
				
			||||||
            res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
 | 
					            res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,35 @@
 | 
				
			|||||||
 | 
					ALTER TABLE "attachmentmeta"
 | 
				
			||||||
 | 
					ADD COLUMN "language" varchar(254);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					update "attachmentmeta"
 | 
				
			||||||
 | 
					set "language" = 'deu'
 | 
				
			||||||
 | 
					where "attachid" in (
 | 
				
			||||||
 | 
					  select "m"."attachid"
 | 
				
			||||||
 | 
					  from "attachmentmeta" m
 | 
				
			||||||
 | 
					  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
				
			||||||
 | 
					  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
				
			||||||
 | 
					  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
				
			||||||
 | 
					  where "c"."doclang" = 'deu'
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					update "attachmentmeta"
 | 
				
			||||||
 | 
					set "language" = 'eng'
 | 
				
			||||||
 | 
					where "attachid" in (
 | 
				
			||||||
 | 
					  select "m"."attachid"
 | 
				
			||||||
 | 
					  from "attachmentmeta" m
 | 
				
			||||||
 | 
					  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
				
			||||||
 | 
					  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
				
			||||||
 | 
					  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
				
			||||||
 | 
					  where "c"."doclang" = 'eng'
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					update "attachmentmeta"
 | 
				
			||||||
 | 
					set "language" = 'fra'
 | 
				
			||||||
 | 
					where "attachid" in (
 | 
				
			||||||
 | 
					  select "m"."attachid"
 | 
				
			||||||
 | 
					  from "attachmentmeta" m
 | 
				
			||||||
 | 
					  inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
				
			||||||
 | 
					  inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
				
			||||||
 | 
					  inner join "collective" c on "c"."cid" = "i"."cid"
 | 
				
			||||||
 | 
					  where "c"."doclang" = 'fra'
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
@@ -0,0 +1,14 @@
 | 
				
			|||||||
 | 
					ALTER TABLE `attachmentmeta`
 | 
				
			||||||
 | 
					ADD COLUMN (`language` varchar(254));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					update `attachmentmeta` `m`
 | 
				
			||||||
 | 
					inner join (
 | 
				
			||||||
 | 
					    select `m`.`attachid`, `c`.`doclang`
 | 
				
			||||||
 | 
					    from `attachmentmeta` m
 | 
				
			||||||
 | 
					    inner join `attachment` a on `a`.`attachid` = `m`.`attachid`
 | 
				
			||||||
 | 
					    inner join `item` i on `a`.`itemid` = `i`.`itemid`
 | 
				
			||||||
 | 
					    inner join `collective` c on `c`.`cid` = `i`.`cid`
 | 
				
			||||||
 | 
					  ) as `c`
 | 
				
			||||||
 | 
					set `m`.`language` = `c`.`doclang`
 | 
				
			||||||
 | 
					where `m`.`attachid` = `c`.`attachid` and `m`.`language` is null;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -0,0 +1,15 @@
 | 
				
			|||||||
 | 
					ALTER TABLE "attachmentmeta"
 | 
				
			||||||
 | 
					ADD COLUMN "language" varchar(254);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					with
 | 
				
			||||||
 | 
					  "attachlang" as (
 | 
				
			||||||
 | 
					    select "m"."attachid", "m"."language", "c"."doclang"
 | 
				
			||||||
 | 
					    from "attachmentmeta" m
 | 
				
			||||||
 | 
					    inner join "attachment" a on "a"."attachid" = "m"."attachid"
 | 
				
			||||||
 | 
					    inner join "item" i on "a"."itemid" = "i"."itemid"
 | 
				
			||||||
 | 
					    inner join "collective" c on "c"."cid" = "i"."cid"
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					update "attachmentmeta" as "m"
 | 
				
			||||||
 | 
					set "language" = "c"."doclang"
 | 
				
			||||||
 | 
					from "attachlang" c
 | 
				
			||||||
 | 
					where "m"."attachid" = "c"."attachid" and "m"."language" is null;
 | 
				
			||||||
@@ -160,7 +160,15 @@ object QAttachment {
 | 
				
			|||||||
      chunkSize: Int
 | 
					      chunkSize: Int
 | 
				
			||||||
  ): Stream[ConnectionIO, ContentAndName] =
 | 
					  ): Stream[ConnectionIO, ContentAndName] =
 | 
				
			||||||
    Select(
 | 
					    Select(
 | 
				
			||||||
      select(a.id, a.itemId, item.cid, item.folder, c.language, a.name, am.content),
 | 
					      select(
 | 
				
			||||||
 | 
					        a.id.s,
 | 
				
			||||||
 | 
					        a.itemId.s,
 | 
				
			||||||
 | 
					        item.cid.s,
 | 
				
			||||||
 | 
					        item.folder.s,
 | 
				
			||||||
 | 
					        coalesce(am.language.s, c.language.s).s,
 | 
				
			||||||
 | 
					        a.name.s,
 | 
				
			||||||
 | 
					        am.content.s
 | 
				
			||||||
 | 
					      ),
 | 
				
			||||||
      from(a)
 | 
					      from(a)
 | 
				
			||||||
        .innerJoin(am, am.id === a.id)
 | 
					        .innerJoin(am, am.id === a.id)
 | 
				
			||||||
        .innerJoin(item, item.id === a.itemId)
 | 
					        .innerJoin(item, item.id === a.itemId)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,7 +15,8 @@ case class RAttachmentMeta(
 | 
				
			|||||||
    content: Option[String],
 | 
					    content: Option[String],
 | 
				
			||||||
    nerlabels: List[NerLabel],
 | 
					    nerlabels: List[NerLabel],
 | 
				
			||||||
    proposals: MetaProposalList,
 | 
					    proposals: MetaProposalList,
 | 
				
			||||||
    pages: Option[Int]
 | 
					    pages: Option[Int],
 | 
				
			||||||
 | 
					    language: Option[Language]
 | 
				
			||||||
) {
 | 
					) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
 | 
					  def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
 | 
				
			||||||
@@ -27,8 +28,8 @@ case class RAttachmentMeta(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
object RAttachmentMeta {
 | 
					object RAttachmentMeta {
 | 
				
			||||||
  def empty(attachId: Ident) =
 | 
					  def empty(attachId: Ident, lang: Language) =
 | 
				
			||||||
    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None)
 | 
					    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  final case class Table(alias: Option[String]) extends TableDef {
 | 
					  final case class Table(alias: Option[String]) extends TableDef {
 | 
				
			||||||
    val tableName = "attachmentmeta"
 | 
					    val tableName = "attachmentmeta"
 | 
				
			||||||
@@ -38,7 +39,9 @@ object RAttachmentMeta {
 | 
				
			|||||||
    val nerlabels = Column[List[NerLabel]]("nerlabels", this)
 | 
					    val nerlabels = Column[List[NerLabel]]("nerlabels", this)
 | 
				
			||||||
    val proposals = Column[MetaProposalList]("itemproposals", this)
 | 
					    val proposals = Column[MetaProposalList]("itemproposals", this)
 | 
				
			||||||
    val pages     = Column[Int]("page_count", this)
 | 
					    val pages     = Column[Int]("page_count", this)
 | 
				
			||||||
    val all       = NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages)
 | 
					    val language  = Column[Language]("language", this)
 | 
				
			||||||
 | 
					    val all =
 | 
				
			||||||
 | 
					      NonEmptyList.of[Column[_]](id, content, nerlabels, proposals, pages, language)
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  val T = Table(None)
 | 
					  val T = Table(None)
 | 
				
			||||||
@@ -49,7 +52,7 @@ object RAttachmentMeta {
 | 
				
			|||||||
    DML.insert(
 | 
					    DML.insert(
 | 
				
			||||||
      T,
 | 
					      T,
 | 
				
			||||||
      T.all,
 | 
					      T.all,
 | 
				
			||||||
      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages}"
 | 
					      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def exists(attachId: Ident): ConnectionIO[Boolean] =
 | 
					  def exists(attachId: Ident): ConnectionIO[Boolean] =
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user