mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-24 03:18:26 +00:00
Starting to support more file types
First, files are be converted to PDF for archiving. It is also easier to create a preview. This is done via the `ConvertPdf` processing task (which is not yet implemented). Text extraction then tries first with the original file. If that fails, OCR is done on the (potentially) converted pdf file. To not loose information of the original file, it is saved using the table `attachment_source`. If the original file is already a pdf, or the conversion did not succeed, the `attachment` and `attachment_source` record point to the same file.
This commit is contained in:
@ -0,0 +1,11 @@
|
||||
CREATE TABLE `attachment_source` (
|
||||
`id` varchar(254) not null primary key,
|
||||
`file_id` varchar(254) not null,
|
||||
`filename` varchar(254),
|
||||
`created` timestamp not null,
|
||||
foreign key (`file_id`) references `filemeta`(`id`),
|
||||
foreign key (`id`) references `attachment`(`attachid`)
|
||||
);
|
||||
|
||||
INSERT INTO `attachment_source`
|
||||
SELECT `attachid`,`filemetaid`,`name`,`created` FROM `attachment`;
|
@ -0,0 +1,11 @@
|
||||
CREATE TABLE "attachment_source" (
|
||||
"id" varchar(254) not null primary key,
|
||||
"file_id" varchar(254) not null,
|
||||
"filename" varchar(254),
|
||||
"created" timestamp not null,
|
||||
foreign key ("file_id") references "filemeta"("id"),
|
||||
foreign key ("id") references "attachment"("attachid")
|
||||
);
|
||||
|
||||
INSERT INTO "attachment_source"
|
||||
SELECT "attachid","filemetaid","name","created" FROM "attachment";
|
Reference in New Issue
Block a user