mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Starting to support more file types
First, files are be converted to PDF for archiving. It is also easier to create a preview. This is done via the `ConvertPdf` processing task (which is not yet implemented). Text extraction then tries first with the original file. If that fails, OCR is done on the (potentially) converted pdf file. To not loose information of the original file, it is saved using the table `attachment_source`. If the original file is already a pdf, or the conversion did not succeed, the `attachment` and `attachment_source` record point to the same file.
This commit is contained in:
@ -0,0 +1,11 @@
|
||||
CREATE TABLE `attachment_source` (
|
||||
`id` varchar(254) not null primary key,
|
||||
`file_id` varchar(254) not null,
|
||||
`filename` varchar(254),
|
||||
`created` timestamp not null,
|
||||
foreign key (`file_id`) references `filemeta`(`id`),
|
||||
foreign key (`id`) references `attachment`(`attachid`)
|
||||
);
|
||||
|
||||
INSERT INTO `attachment_source`
|
||||
SELECT `attachid`,`filemetaid`,`name`,`created` FROM `attachment`;
|
@ -0,0 +1,11 @@
|
||||
CREATE TABLE "attachment_source" (
|
||||
"id" varchar(254) not null primary key,
|
||||
"file_id" varchar(254) not null,
|
||||
"filename" varchar(254),
|
||||
"created" timestamp not null,
|
||||
foreign key ("file_id") references "filemeta"("id"),
|
||||
foreign key ("id") references "attachment"("attachid")
|
||||
);
|
||||
|
||||
INSERT INTO "attachment_source"
|
||||
SELECT "attachid","filemetaid","name","created" FROM "attachment";
|
@ -41,6 +41,20 @@ object RAttachment {
|
||||
def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] =
|
||||
selectSimple(all, table, id.is(attachId)).query[RAttachment].option
|
||||
|
||||
def findMeta(attachId: Ident): ConnectionIO[Option[FileMeta]] = {
|
||||
import bitpeace.sql._
|
||||
|
||||
val cols = RFileMeta.Columns.all.map(_.prefix("m"))
|
||||
val aId = id.prefix("a")
|
||||
val aFileMeta = fileId.prefix("a")
|
||||
val mId = RFileMeta.Columns.id.prefix("m")
|
||||
|
||||
val from = table ++ fr"a INNER JOIN" ++ RFileMeta.table ++ fr"m ON" ++ aFileMeta.is(mId)
|
||||
val cond = aId.is(attachId)
|
||||
|
||||
selectSimple(cols, from, cond).query[FileMeta].option
|
||||
}
|
||||
|
||||
def findByIdAndCollective(attachId: Ident, collective: Ident): ConnectionIO[Option[RAttachment]] =
|
||||
selectSimple(
|
||||
all.map(_.prefix("a")),
|
||||
|
@ -0,0 +1,44 @@
|
||||
package docspell.store.records
|
||||
|
||||
import doobie._
|
||||
import doobie.implicits._
|
||||
import docspell.common._
|
||||
import docspell.store.impl._
|
||||
import docspell.store.impl.Implicits._
|
||||
|
||||
/** The origin file of an attachment. The `id` is shared with the
|
||||
* attachment, to create a 1-1 (or 0..1-1) relationship.
|
||||
*/
|
||||
case class RAttachmentSource(
|
||||
id: Ident, //same as RAttachment.id
|
||||
fileId: Ident,
|
||||
name: Option[String],
|
||||
created: Timestamp
|
||||
)
|
||||
|
||||
object RAttachmentSource {
|
||||
|
||||
val table = fr"attachment_source"
|
||||
|
||||
object Columns {
|
||||
val id = Column("id")
|
||||
val fileId = Column("file_id")
|
||||
val name = Column("filename")
|
||||
val created = Column("created")
|
||||
|
||||
val all = List(id, fileId, name, created)
|
||||
}
|
||||
|
||||
import Columns._
|
||||
|
||||
def of(ra: RAttachment): RAttachmentSource =
|
||||
RAttachmentSource(ra.id, ra.fileId, ra.name, ra.created)
|
||||
|
||||
def insert(v: RAttachmentSource): ConnectionIO[Int] =
|
||||
insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run
|
||||
|
||||
|
||||
def findById(attachId: Ident): ConnectionIO[Option[RAttachmentSource]] =
|
||||
selectSimple(all, table, id.is(attachId)).query[RAttachmentSource].option
|
||||
|
||||
}
|
@ -1,7 +1,12 @@
|
||||
package docspell.store.records
|
||||
|
||||
import bitpeace.FileMeta
|
||||
import doobie._
|
||||
import doobie.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.store.impl._
|
||||
import docspell.store.impl.Implicits._
|
||||
|
||||
object RFileMeta {
|
||||
|
||||
@ -19,4 +24,10 @@ object RFileMeta {
|
||||
val all = List(id, timestamp, mimetype, length, checksum, chunks, chunksize)
|
||||
|
||||
}
|
||||
|
||||
def findById(fid: Ident): ConnectionIO[Option[FileMeta]] = {
|
||||
import bitpeace.sql._
|
||||
|
||||
selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user