Refactoring for migrating to binny library

This commit is contained in:
eikek 2021-09-22 00:28:47 +02:00
parent 1f98d948b0
commit 20a829cf7a
45 changed files with 485 additions and 344 deletions

View File

@ -368,7 +368,7 @@ val store = project
name := "docspell-store",
libraryDependencies ++=
Dependencies.doobie ++
Dependencies.bitpeace ++
Dependencies.binny ++
Dependencies.tika ++
Dependencies.fs2 ++
Dependencies.databases ++

View File

@ -70,7 +70,7 @@ object BackendApp {
tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store)
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
uploadImpl <- OUpload(store, queue, joexImpl)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl)
createIndex <- CreateIndex.resource(ftsClient, store)
@ -115,7 +115,7 @@ object BackendApp {
httpClientEc: ExecutionContext
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
for {
store <- Store.create(cfg.jdbc, connectEC)
store <- Store.create(cfg.jdbc, cfg.files.chunkSize, connectEC)
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
ftsClient <- ftsFactory(httpClient)
backend <- create(cfg, store, httpClient, ftsClient)

View File

@ -17,7 +17,6 @@ import docspell.store._
import docspell.store.queries.{QAttachment, QItem}
import docspell.store.records._
import bitpeace.{FileMeta, RangeDef}
import doobie.implicits._
trait OItemSearch[F[_]] {
@ -90,10 +89,10 @@ object OItemSearch {
trait BinaryData[F[_]] {
def data: Stream[F, Byte]
def name: Option[String]
def meta: FileMeta
def meta: RFileMeta
def fileId: Ident
}
case class AttachmentData[F[_]](ra: RAttachment, meta: FileMeta, data: Stream[F, Byte])
case class AttachmentData[F[_]](ra: RAttachment, meta: RFileMeta, data: Stream[F, Byte])
extends BinaryData[F] {
val name = ra.name
val fileId = ra.fileId
@ -101,7 +100,7 @@ object OItemSearch {
case class AttachmentSourceData[F[_]](
rs: RAttachmentSource,
meta: FileMeta,
meta: RFileMeta,
data: Stream[F, Byte]
) extends BinaryData[F] {
val name = rs.name
@ -110,7 +109,7 @@ object OItemSearch {
case class AttachmentPreviewData[F[_]](
rs: RAttachmentPreview,
meta: FileMeta,
meta: RFileMeta,
data: Stream[F, Byte]
) extends BinaryData[F] {
val name = rs.name
@ -119,7 +118,7 @@ object OItemSearch {
case class AttachmentArchiveData[F[_]](
rs: RAttachmentArchive,
meta: FileMeta,
meta: RFileMeta,
data: Stream[F, Byte]
) extends BinaryData[F] {
val name = rs.name
@ -189,7 +188,7 @@ object OItemSearch {
AttachmentData[F](
ra,
m,
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
store.fileStore.getBytes(m.id)
)
}
@ -209,7 +208,7 @@ object OItemSearch {
AttachmentSourceData[F](
ra,
m,
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
store.fileStore.getBytes(m.id)
)
}
@ -229,7 +228,7 @@ object OItemSearch {
AttachmentPreviewData[F](
ra,
m,
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
store.fileStore.getBytes(m.id)
)
}
@ -249,7 +248,7 @@ object OItemSearch {
AttachmentPreviewData[F](
ra,
m,
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
store.fileStore.getBytes(m.id)
)
}
@ -269,7 +268,7 @@ object OItemSearch {
AttachmentArchiveData[F](
ra,
m,
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
store.fileStore.getBytes(m.id)
)
}
@ -277,15 +276,11 @@ object OItemSearch {
(None: Option[AttachmentArchiveData[F]]).pure[F]
}
private def makeBinaryData[A](fileId: Ident)(f: FileMeta => A): F[Option[A]] =
store.bitpeace
.get(fileId.id)
.unNoneTerminate
.compile
.last
.map(
_.map(m => f(m))
)
private def makeBinaryData[A](fileId: Ident)(f: RFileMeta => A): F[Option[A]] =
store.fileStore
.findMeta(fileId)
.map(fm => f(fm))
.value
def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] =
store.transact(QAttachment.getAttachmentMeta(id, collective))

View File

@ -9,7 +9,6 @@ package docspell.backend.ops
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import fs2.Stream
import docspell.backend.ops.OMail._
import docspell.common._
@ -18,7 +17,6 @@ import docspell.store.queries.QMails
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import bitpeace.{FileMeta, RangeDef}
import emil._
trait OMail[F[_]] {
@ -81,14 +79,17 @@ object OMail {
)
sealed trait AttachSelection {
def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)]
def filter(v: Vector[(RAttachment, RFileMeta)]): Vector[(RAttachment, RFileMeta)]
}
object AttachSelection {
case object All extends AttachSelection {
def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)] = v
def filter(v: Vector[(RAttachment, RFileMeta)]): Vector[(RAttachment, RFileMeta)] =
v
}
case class Selected(ids: List[Ident]) extends AttachSelection {
def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)] = {
def filter(
v: Vector[(RAttachment, RFileMeta)]
): Vector[(RAttachment, RFileMeta)] = {
val set = ids.toSet
v.filter(set contains _._1.id)
}
@ -232,10 +233,10 @@ object OMail {
} yield {
val addAttach = m.attach.filter(ras).map { a =>
Attach[F](
Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all))
store.fileStore.getBytes(a._2.id)
).withFilename(a._1.name)
.withLength(a._2.length)
.withMimeType(a._2.mimetype.toLocal.toEmil)
.withLength(a._2.length.bytes)
.withMimeType(a._2.mimetype.toEmil)
}
val fields: Seq[Trans[F]] = Seq(
From(sett.mailFrom),

View File

@ -12,14 +12,13 @@ import cats.effect._
import cats.implicits._
import fs2.Stream
import docspell.backend.{Config, JobFactory}
import docspell.backend.JobFactory
import docspell.common._
import docspell.common.syntax.all._
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.records._
import bitpeace.MimetypeHint
import org.log4s._
trait OUpload[F[_]] {
@ -116,7 +115,6 @@ object OUpload {
def apply[F[_]: Sync](
store: Store[F],
queue: JobQueue[F],
cfg: Config.Files,
joex: OJoex[F]
): Resource[F, OUpload[F]] =
Resource.pure[F, OUpload[F]](new OUpload[F] {
@ -205,11 +203,10 @@ object OUpload {
/** Saves the file into the database. */
private def saveFile(file: File[F]): F[Option[ProcessItemArgs.File]] =
logger.finfo(s"Receiving file $file") *>
store.bitpeace
.saveNew(file.data, cfg.chunkSize, MimetypeHint(file.name, None), None)
file.data
.through(store.fileStore.save(MimeTypeHint(file.name, None)))
.compile
.lastOrError
.map(fm => Ident.unsafe(fm.id))
.attempt
.map(
_.fold(

View File

@ -123,7 +123,10 @@ object MimeType {
object HtmlMatch {
def unapply(mt: MimeType): Option[MimeType] =
Some(mt).filter(_.matches(html))
if (
(mt.primary == "text" || mt.primary == "application") && mt.sub.contains("html")
) Some(mt)
else None
}
object NonHtmlText {

View File

@ -10,6 +10,9 @@ case class MimeTypeHint(filename: Option[String], advertised: Option[String]) {
def withName(name: String): MimeTypeHint =
copy(filename = Some(name))
def withAdvertised(advertised: String): MimeTypeHint =
copy(advertised = Some(advertised))
}
object MimeTypeHint {

View File

@ -468,7 +468,7 @@ Docpell Update Check
# The chunk size used when storing files. This should be the same
# as used with the rest server.
chunk-size = 524288
chunk-size = ${docspell.joex.files.chunk-size}
# A string used to change the filename of the converted pdf file.
# If empty, the original file name is used for the pdf file ( the

View File

@ -122,12 +122,12 @@ object JoexAppImpl {
for {
httpClient <- BlazeClientBuilder[F](clientEC).resource
client = JoexClient(httpClient)
store <- Store.create(cfg.jdbc, connectEC)
store <- Store.create(cfg.jdbc, cfg.files.chunkSize, connectEC)
queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store)
joex <- OJoex(client, store)
upload <- OUpload(store, queue, cfg.files, joex)
upload <- OUpload(store, queue, joex)
fts <- createFtsClient(cfg)(httpClient)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, queue, joex)
@ -212,7 +212,7 @@ object JoexAppImpl {
.withTask(
JobTask.json(
MakePreviewArgs.taskName,
MakePreviewTask[F](cfg.convert, cfg.extraction.preview),
MakePreviewTask[F](cfg.extraction.preview),
MakePreviewTask.onCancel[F]
)
)

View File

@ -17,8 +17,6 @@ import docspell.common._
import docspell.store.Store
import docspell.store.records.RClassifierModel
import bitpeace.RangeDef
object Classify {
def apply[F[_]: Async](
@ -33,11 +31,7 @@ object Classify {
_ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name}"))
model <- OptionT(store.transact(RClassifierModel.findByName(coll, cname.name)))
.flatTapNone(logger.debug("No classifier model found."))
modelData =
store.bitpeace
.get(model.fileId.id)
.unNoneTerminate
.through(store.bitpeace.fetchData2(RangeDef.all))
modelData = store.fileStore.getBytes(model.fileId)
cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
val modelFile = dir.resolve("model.ser.gz")
modelData

View File

@ -90,8 +90,8 @@ object LearnClassifierTask {
)
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id)))
_ <- list
.map(_.fileId.id)
.traverse(id => ctx.store.bitpeace.delete(id).compile.drain)
.map(_.fileId)
.traverse(id => ctx.store.fileStore.delete(id))
_ <- ctx.logger.debug(s"Deleted $n model files.")
} yield ()

View File

@ -16,8 +16,6 @@ import docspell.joex.scheduler._
import docspell.store.Store
import docspell.store.records.RClassifierModel
import bitpeace.MimetypeHint
object StoreClassifierModel {
def handleModel[F[_]: Async](
@ -43,16 +41,16 @@ object StoreClassifierModel {
)
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
fileData = Files[F].readAll(trainedModel.model)
newFile <-
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
newFileId <-
fileData.through(store.fileStore.save(MimeTypeHint.none)).compile.lastOrError
_ <- store.transact(
RClassifierModel.updateFile(collective, modelName.name, Ident.unsafe(newFile.id))
RClassifierModel.updateFile(collective, modelName.name, newFileId)
)
_ <- logger.debug(s"New model stored at file ${newFile.id}")
_ <- logger.debug(s"New model stored at file ${newFileId.id}")
_ <- oldFile match {
case Some(fid) =>
logger.debug(s"Deleting old model file ${fid.id}") *>
store.bitpeace.delete(fid.id).compile.drain
store.fileStore.delete(fid)
case None => ().pure[F]
}
} yield ()

View File

@ -19,10 +19,6 @@ import docspell.joex.Config
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records._
import bitpeace.FileMeta
import bitpeace.Mimetype
import bitpeace.MimetypeHint
import bitpeace.RangeDef
import io.circe.generic.semiauto._
import io.circe.{Decoder, Encoder}
@ -55,8 +51,11 @@ object PdfConvTask {
// --- Helper
// check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled
def checkInputs[F[_]: Sync](cfg: Config, ctx: Context[F, Args]): F[Option[FileMeta]] = {
val none: Option[FileMeta] = None
def checkInputs[F[_]: Sync](
cfg: Config,
ctx: Context[F, Args]
): F[Option[RFileMeta]] = {
val none: Option[RFileMeta] = None
val checkSameFiles =
(for {
ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId)))
@ -67,7 +66,7 @@ object PdfConvTask {
val existsPdf =
for {
meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId))
res = meta.filter(_.mimetype.matches(Mimetype.applicationPdf))
res = meta.filter(_.mimetype.matches(MimeType.pdf))
_ <-
if (res.isEmpty)
ctx.logger.info(
@ -91,12 +90,10 @@ object PdfConvTask {
def convert[F[_]: Async](
cfg: Config,
ctx: Context[F, Args],
in: FileMeta
in: RFileMeta
): F[Unit] = {
val bp = ctx.store.bitpeace
val data = Stream
.emit(in)
.through(bp.fetchData2(RangeDef.all))
val fs = ctx.store.fileStore
val data = fs.getBytes(in.id)
val storeResult: ConversionResult.Handler[F, Unit] =
Kleisli {
@ -122,7 +119,7 @@ object PdfConvTask {
OcrMyPdf.toPDF[F, Unit](
cfg.convert.ocrmypdf,
lang,
in.chunksize,
cfg.files.chunkSize,
ctx.logger
)(data, storeResult)
@ -140,18 +137,13 @@ object PdfConvTask {
def storeToAttachment[F[_]: Sync](
ctx: Context[F, Args],
meta: FileMeta,
meta: RFileMeta,
newFile: Stream[F, Byte]
): F[Unit] = {
val mimeHint = MimetypeHint.advertised(meta.mimetype.asString)
val mimeHint = MimeTypeHint.advertised(meta.mimetype)
for {
time <- Timestamp.current[F]
fid <- Ident.randomId[F]
_ <-
ctx.store.bitpeace
.saveNew(newFile, meta.chunksize, mimeHint, Some(fid.id), time.value)
.compile
.lastOrError
fid <-
newFile.through(ctx.store.fileStore.save(mimeHint)).compile.lastOrError
_ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
} yield ()
}

View File

@ -10,7 +10,6 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.convert.ConvertConfig
import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.process.AttachmentPreview
@ -23,7 +22,7 @@ object MakePreviewTask {
type Args = MakePreviewArgs
def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig): Task[F, Args, Unit] =
def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] =
Task { ctx =>
for {
exists <- previewExists(ctx)
@ -36,7 +35,7 @@ object MakePreviewTask {
else
ctx.logger.info(
s"Generating preview image for attachment ${ctx.args.attachment}"
) *> generatePreview(ctx, preview, cfg)
) *> generatePreview(ctx, preview)
} yield ()
}
@ -45,13 +44,12 @@ object MakePreviewTask {
private def generatePreview[F[_]: Sync](
ctx: Context[F, Args],
preview: PdfboxPreview[F],
cfg: ConvertConfig
preview: PdfboxPreview[F]
): F[Unit] =
for {
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
_ <- ra
.map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize))
.map(AttachmentPreview.createPreview(ctx, preview))
.getOrElse(
ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}")
)

View File

@ -18,9 +18,6 @@ import docspell.extract.pdfbox.PdfboxExtract
import docspell.joex.scheduler._
import docspell.store.records.RAttachment
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import bitpeace.{Mimetype, RangeDef}
/** Goes through all attachments that must be already converted into a pdf. If it is a
* pdf, the number of pages are retrieved and stored in the attachment metadata.
@ -100,13 +97,8 @@ object AttachmentPageCount {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(Mimetype.applicationOctetStream)
.map(_.toLocal)
.getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
ctx.store.fileStore.getBytes(ra.fileId)
}

View File

@ -13,16 +13,12 @@ import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.convert._
import docspell.extract.pdfbox.PdfboxPreview
import docspell.extract.pdfbox.PreviewConfig
import docspell.joex.scheduler._
import docspell.store.queries.QAttachment
import docspell.store.records.RAttachment
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
/** Goes through all attachments that must be already converted into a pdf. If it is a
* pdf, the first page is converted into a small preview png image and linked to the
@ -30,7 +26,7 @@ import bitpeace.{Mimetype, MimetypeHint, RangeDef}
*/
object AttachmentPreview {
def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig)(
def apply[F[_]: Sync](pcfg: PreviewConfig)(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -40,7 +36,7 @@ object AttachmentPreview {
)
preview <- PdfboxPreview(pcfg)
_ <- item.attachments
.traverse(createPreview(ctx, preview, cfg.chunkSize))
.traverse(createPreview(ctx, preview))
.attempt
.flatMap {
case Right(_) => ().pure[F]
@ -54,8 +50,7 @@ object AttachmentPreview {
def createPreview[F[_]: Sync](
ctx: Context[F, _],
preview: PdfboxPreview[F],
chunkSize: Int
preview: PdfboxPreview[F]
)(
ra: RAttachment
): F[Option[RAttachmentPreview]] =
@ -64,7 +59,7 @@ object AttachmentPreview {
preview.previewPNG(loadFile(ctx)(ra)).flatMap {
case Some(out) =>
ctx.logger.debug("Preview generated, saving to database…") *>
createRecord(ctx, out, ra, chunkSize).map(_.some)
createRecord(ctx, out, ra).map(_.some)
case None =>
ctx.logger
.info(s"Preview could not be generated. Maybe the pdf has no pages?") *>
@ -79,23 +74,20 @@ object AttachmentPreview {
private def createRecord[F[_]: Sync](
ctx: Context[F, _],
png: Stream[F, Byte],
ra: RAttachment,
chunkSize: Int
ra: RAttachment
): F[RAttachmentPreview] = {
val name = ra.name
.map(FileName.apply)
.map(_.withPart("preview", '_').withExtension("png"))
for {
fileMeta <- ctx.store.bitpeace
.saveNew(
png,
chunkSize,
MimetypeHint(name.map(_.fullName), Some("image/png"))
fileId <- png
.through(
ctx.store.fileStore.save(MimeTypeHint(name.map(_.fullName), Some("image/png")))
)
.compile
.lastOrError
now <- Timestamp.current[F]
rp = RAttachmentPreview(ra.id, Ident.unsafe(fileMeta.id), name.map(_.fullName), now)
rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now)
_ <- QAttachment.deletePreview(ctx.store)(ra.id)
_ <- ctx.store.transact(RAttachmentPreview.insert(rp))
} yield rp
@ -104,13 +96,8 @@ object AttachmentPreview {
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(Mimetype.applicationOctetStream)
.map(_.toLocal)
.getOrElse(MimeType.octetStream)
def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
ctx.store.fileStore.getBytes(ra.fileId)
}

View File

@ -19,9 +19,6 @@ import docspell.convert._
import docspell.joex.extract.JsoupSanitizer
import docspell.joex.scheduler._
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
/** Goes through all attachments and creates a PDF version of it where supported.
*
@ -69,24 +66,21 @@ object ConvertPdf {
): F[Boolean] =
ctx.store.transact(RAttachmentSource.isConverted(ra.id))
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(Mimetype.applicationOctetStream)
.getOrElse(MimeType.octetStream)
def convertSafe[F[_]: Async](
cfg: ConvertConfig,
sanitizeHtml: SanitizeHtml,
ctx: Context[F, ProcessItemArgs],
item: ItemData
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
mime.toLocal match {
mime match {
case mt =>
val data = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val data = ctx.store.fileStore.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item)
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
@ -154,11 +148,11 @@ object ConvertPdf {
.map(FileName.apply)
.map(_.withExtension("pdf").withPart(cfg.convertedFilenamePart, '.'))
.map(_.fullName)
ctx.store.bitpeace
.saveNew(pdf, cfg.chunkSize, MimetypeHint(hint.filename, hint.advertised))
pdf
.through(ctx.store.fileStore.save(MimeTypeHint(hint.filename, hint.advertised)))
.compile
.lastOrError
.map(fm => Ident.unsafe(fm.id))
.flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId))
.map(fmId => ra.copy(fileId = fmId, name = newName))
}
@ -184,10 +178,8 @@ object ConvertPdf {
if (sameFile) ().pure[F]
else
ctx.logger.info("Deleting previous attachment file") *>
ctx.store.bitpeace
.delete(raPrev.fileId.id)
.compile
.drain
ctx.store.fileStore
.delete(raPrev.fileId)
.attempt
.flatMap {
case Right(_) => ().pure[F]

View File

@ -15,9 +15,7 @@ import fs2.Stream
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.queries.QItem
import docspell.store.records.{RAttachment, RAttachmentSource, RItem}
import bitpeace.FileMeta
import docspell.store.records._
/** Task that creates the item.
*/
@ -31,12 +29,10 @@ object CreateItem {
def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
def isValidFile(fm: FileMeta) =
def isValidFile(fm: RFileMeta) =
ctx.args.meta.validFileTypes.isEmpty ||
ctx.args.meta.validFileTypes
.map(_.asString)
.toSet
.contains(fm.mimetype.baseType)
ctx.args.meta.validFileTypes.toSet
.contains(fm.mimetype)
def fileMetas(itemId: Ident, now: Timestamp) =
Stream
@ -44,7 +40,9 @@ object CreateItem {
.flatMap { offset =>
Stream
.emits(ctx.args.files)
.flatMap(f => ctx.store.bitpeace.get(f.fileMetaId.id).map(fm => (f, fm)))
.evalMap(f =>
ctx.store.fileStore.findMeta(f.fileMetaId).value.map(fm => (f, fm))
)
.collect { case (f, Some(fm)) if isValidFile(fm) => f }
.zipWithIndex
.evalMap { case (f, index) =>

View File

@ -15,7 +15,6 @@ import docspell.store.queries.QItem
import docspell.store.records.RFileMeta
import docspell.store.records.RJob
import bitpeace.FileMeta
import doobie._
object DuplicateCheck {
@ -40,7 +39,7 @@ object DuplicateCheck {
_ <- fileMetas.traverse(deleteDuplicate(ctx))
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
} yield ctx.args.copy(files =
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id))
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId))
)
private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] =
@ -49,13 +48,11 @@ object DuplicateCheck {
private def deleteDuplicate[F[_]: Sync](
ctx: Context[F, Args]
)(fd: FileMetaDupes): F[Unit] = {
val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name)
val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name)
if (fd.exists)
ctx.logger
.info(s"Deleting duplicate file $fname!") *> ctx.store.bitpeace
.info(s"Deleting duplicate file $fname!") *> ctx.store.fileStore
.delete(fd.fm.id)
.compile
.drain
else ().pure[F]
}
@ -69,12 +66,12 @@ object DuplicateCheck {
private def checkDuplicate[F[_]](
ctx: Context[F, Args]
)(fm: FileMeta): ConnectionIO[FileMetaDupes] = {
)(fm: RFileMeta): ConnectionIO[FileMetaDupes] = {
val excludes = ctx.args.files.map(_.fileMetaId).toSet
QItem
.findByChecksum(fm.checksum, ctx.args.meta.collective, excludes)
.findByChecksum(fm.checksum.toHex, ctx.args.meta.collective, excludes)
.map(v => FileMetaDupes(fm, v.nonEmpty))
}
case class FileMetaDupes(fm: FileMeta, exists: Boolean)
case class FileMetaDupes(fm: RFileMeta, exists: Boolean)
}

View File

@ -20,9 +20,7 @@ import docspell.files.Zip
import docspell.joex.mail._
import docspell.joex.scheduler._
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
import emil.Mail
/** Goes through all attachments and extracts archive files, like zip files. The process
@ -84,16 +82,16 @@ object ExtractArchive {
if (extract.archives.isEmpty) extract
else extract.updatePositions
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
.map(_.mimetype)
.getOrElse(Mimetype.applicationOctetStream)
.getOrElse(MimeType.octetStream)
def extractSafe[F[_]: Async](
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
mime.toLocal match {
)(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] =
mime match {
case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
extractZip(ctx, archive)(ra, pos)
@ -122,7 +120,7 @@ object ExtractArchive {
)
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
_ <- ctx.store.transact(RAttachment.delete(ra.id))
_ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain
_ <- ctx.store.fileStore.delete(ra.fileId)
} yield extracted
case None =>
for {
@ -137,11 +135,8 @@ object ExtractArchive {
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
val zipData = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
val zipData = ctx.store.fileStore.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
zipData
.through(Zip.unzipP[F](8192, glob))
@ -156,10 +151,7 @@ object ExtractArchive {
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int): F[Extracted] = {
val email: Stream[F, Byte] = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val email: Stream[F, Byte] = ctx.store.fileStore.getBytes(ra.fileId)
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
@ -200,15 +192,16 @@ object ExtractArchive {
tentry: (Binary[F], Long)
): Stream[F, Extracted] = {
val (entry, subPos) = tentry
val mimeHint = MimetypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint)
val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileId = entry.data.through(ctx.store.fileStore.save(mimeHint))
Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >>
fileMeta.evalMap { fm =>
fileId.evalMap { fid =>
Ident.randomId.map { id =>
val nra = RAttachment(
id,
ra.itemId,
Ident.unsafe(fm.id),
fid,
pos,
ra.created,
Option(entry.name).map(_.trim).filter(_.nonEmpty)

View File

@ -132,8 +132,8 @@ object ItemHandler {
Task(ctx =>
ctx.logger.info("Deleting input files …") *>
Stream
.emits(ctx.args.files.map(_.fileMetaId.id))
.flatMap(id => ctx.store.bitpeace.delete(id).attempt.drain)
.emits(ctx.args.files.map(_.fileMetaId))
.evalMap(id => ctx.store.fileStore.delete(id).attempt)
.compile
.drain
)

View File

@ -62,7 +62,7 @@ object ProcessItem {
ConvertPdf(cfg.convert, item)
.flatMap(Task.setProgress(progress._1))
.flatMap(TextExtraction(cfg.extraction, fts))
.flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview))
.flatMap(AttachmentPreview(cfg.extraction.preview))
.flatMap(AttachmentPageCount())
.flatMap(Task.setProgress(progress._2))
.flatMap(analysisOnly[F](cfg, analyser, regexNer))

View File

@ -15,9 +15,6 @@ import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.ftsclient.{FtsClient, TextData}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
import docspell.store.syntax.MimeTypes._
import bitpeace.{Mimetype, RangeDef}
object TextExtraction {
@ -130,18 +127,15 @@ object TextExtraction {
extr: Extraction[F],
lang: Language
)(fileId: Ident): F[ExtractResult] = {
val data = ctx.store.bitpeace
.get(fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val data = ctx.store.fileStore.getBytes(fileId)
def findMime: F[Mimetype] =
def findMime: F[MimeType] =
OptionT(ctx.store.transact(RFileMeta.findById(fileId)))
.map(_.mimetype)
.getOrElse(Mimetype.applicationOctetStream)
.getOrElse(MimeType.octetStream)
findMime
.flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang))
.flatMap(mt => extr.extractText(data, DataType(mt), lang))
}
private def extractTextFallback[F[_]: Async](

View File

@ -26,7 +26,6 @@ import docspell.store.queries.{AttachmentLight => QAttachmentLight}
import docspell.store.records._
import docspell.store.{AddResult, UpdateResult}
import bitpeace.FileMeta
import org.http4s.headers.`Content-Type`
import org.http4s.multipart.Multipart
import org.log4s.Logger
@ -140,17 +139,23 @@ trait Conversions {
def mkAttachment(
item: OItemSearch.ItemData
)(ra: RAttachment, m: FileMeta): Attachment = {
)(ra: RAttachment, m: RFileMeta): Attachment = {
val converted =
item.sources.find(_._1.id == ra.id).exists(_._2.checksum != m.checksum)
Attachment(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString), converted)
Attachment(
ra.id,
ra.name,
m.length.bytes,
MimeType.unsafe(m.mimetype.asString),
converted
)
}
def mkAttachmentSource(ra: RAttachmentSource, m: FileMeta): AttachmentSource =
AttachmentSource(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString))
def mkAttachmentSource(ra: RAttachmentSource, m: RFileMeta): AttachmentSource =
AttachmentSource(ra.id, ra.name, m.length.bytes, MimeType.unsafe(m.mimetype.asString))
def mkAttachmentArchive(ra: RAttachmentArchive, m: FileMeta): AttachmentSource =
AttachmentSource(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString))
def mkAttachmentArchive(ra: RAttachmentArchive, m: RFileMeta): AttachmentSource =
AttachmentSource(ra.id, ra.name, m.length.bytes, MimeType.unsafe(m.mimetype.asString))
// item list

View File

@ -12,8 +12,8 @@ import cats.effect._
import cats.implicits._
import docspell.backend.ops._
import docspell.store.records.RFileMeta
import bitpeace.FileMeta
import org.http4s._
import org.http4s.circe.CirceEntityEncoder._
import org.http4s.dsl.Http4sDsl
@ -30,8 +30,8 @@ object BinaryUtil {
val mt = MediaType.unsafeParse(data.meta.mimetype.asString)
val ctype = `Content-Type`(mt)
val cntLen = `Content-Length`.unsafeFromLong(data.meta.length)
val eTag = ETag(data.meta.checksum)
val cntLen = `Content-Length`.unsafeFromLong(data.meta.length.bytes)
val eTag = ETag(data.meta.checksum.toHex)
val disp =
`Content-Disposition`(
"inline",
@ -48,16 +48,16 @@ object BinaryUtil {
dsl: Http4sDsl[F]
)(data: OItemSearch.BinaryData[F]): F[Response[F]] = {
import dsl._
withResponseHeaders(dsl, Ok(data.data.take(data.meta.length)))(data)
withResponseHeaders(dsl, Ok(data.data.take(data.meta.length.bytes)))(data)
}
def matchETag[F[_]](
fileData: Option[FileMeta],
fileData: Option[RFileMeta],
noneMatch: Option[NonEmptyList[EntityTag]]
): Boolean =
(fileData, noneMatch) match {
case (Some(meta), Some(nm)) =>
meta.checksum == nm.head.tag
meta.checksum.toHex == nm.head.tag
case _ =>
false
}

View File

@ -0,0 +1,29 @@
ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunksize";
ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunks";
ALTER TABLE "filemeta"
RENAME COLUMN "id" TO "file_id";
ALTER TABLE "filechunk"
RENAME COLUMN "fileid" TO "file_id";
ALTER TABLE "filechunk"
RENAME COLUMN "chunknr" TO "chunk_nr";
ALTER TABLE "filechunk"
RENAME COLUMN "chunklength" TO "chunk_len";
ALTER TABLE "filechunk"
RENAME COLUMN "chunkdata" TO "chunk_data";
-- change timestamp format, bitpeace used a string
ALTER TABLE "filemeta"
ADD COLUMN "created" timestamp;
UPDATE "filemeta" SET "created" = TO_TIMESTAMP("timestamp", 'YYYY-MM-DD"T"HH24:MI:SS.MS');
ALTER TABLE "filemeta"
ALTER COLUMN "created" SET NOT NULL;
ALTER TABLE "filemeta"
DROP COLUMN "timestamp";

View File

@ -0,0 +1,29 @@
ALTER TABLE `filemeta` DROP COLUMN IF EXISTS `chunksize`;
ALTER TABLE `filemeta` DROP COLUMN IF EXISTS `chunks`;
ALTER TABLE `filemeta`
RENAME COLUMN `id` TO `file_id`;
ALTER TABLE `filechunk`
RENAME COLUMN `fileid` TO `file_id`;
ALTER TABLE `filechunk`
RENAME COLUMN `chunknr` TO `chunk_nr`;
ALTER TABLE `filechunk`
RENAME COLUMN `chunklength` TO `chunk_len`;
ALTER TABLE `filechunk`
RENAME COLUMN `chunkdata` TO `chunk_data`;
-- change timestamp format, bitpeace used a string
ALTER TABLE `filemeta`
ADD COLUMN `created` timestamp;
UPDATE `filemeta` SET `created` = STR_TO_DATE(`timestamp`, '%Y-%m-%dT%H:%i:%s.%fZ');
ALTER TABLE `filemeta`
MODIFY `created` timestamp NOT NULL;
ALTER TABLE `filemeta`
DROP COLUMN `timestamp`;

View File

@ -0,0 +1,29 @@
ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunksize";
ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunks";
ALTER TABLE "filemeta"
RENAME COLUMN "id" TO "file_id";
ALTER TABLE "filechunk"
RENAME COLUMN "fileid" TO "file_id";
ALTER TABLE "filechunk"
RENAME COLUMN "chunknr" TO "chunk_nr";
ALTER TABLE "filechunk"
RENAME COLUMN "chunklength" TO "chunk_len";
ALTER TABLE "filechunk"
RENAME COLUMN "chunkdata" TO "chunk_data";
-- change timestamp format, bitpeace used a string
ALTER TABLE "filemeta"
ADD COLUMN "created" timestamp;
UPDATE "filemeta" SET "created" = CAST("timestamp" as timestamp);
ALTER TABLE "filemeta"
ALTER COLUMN "created" SET NOT NULL;
ALTER TABLE "filemeta"
DROP COLUMN "timestamp";

View File

@ -11,9 +11,10 @@ import scala.concurrent.ExecutionContext
import cats.effect._
import fs2._
import docspell.store.file.FileStore
import docspell.store.impl.StoreImpl
import bitpeace.Bitpeace
import com.zaxxer.hikari.HikariDataSource
import doobie._
import doobie.hikari.HikariTransactor
@ -23,7 +24,7 @@ trait Store[F[_]] {
def transact[A](prg: Stream[ConnectionIO, A]): Stream[F, A]
def bitpeace: Bitpeace[F]
def fileStore: FileStore[F]
def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
}
@ -32,20 +33,23 @@ object Store {
def create[F[_]: Async](
jdbc: JdbcConfig,
chunkSize: Int,
connectEC: ExecutionContext
): Resource[F, Store[F]] = {
val hxa = HikariTransactor.newHikariTransactor[F](
jdbc.driverClass,
jdbc.url.asString,
jdbc.user,
jdbc.password,
connectEC
)
val acquire = Sync[F].delay(new HikariDataSource())
val free: HikariDataSource => F[Unit] = ds => Sync[F].delay(ds.close())
for {
xa <- hxa
st = new StoreImpl[F](jdbc, xa)
ds <- Resource.make(acquire)(free)
_ = Resource.pure {
ds.setJdbcUrl(jdbc.url.asString)
ds.setUsername(jdbc.user)
ds.setPassword(jdbc.password)
ds.setDriverClassName(jdbc.driverClass)
}
xa = HikariTransactor(ds, connectEC)
fs = FileStore[F](xa, ds, chunkSize)
st = new StoreImpl[F](fs, jdbc, xa)
_ <- Resource.eval(st.migrate)
} yield st
}

View File

@ -0,0 +1,50 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.store.file
import cats.data.OptionT
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.store.records.RFileMeta
import binny._
import doobie._
import doobie.implicits._
final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F])
extends BinaryAttributeStore[F] {
def saveAttr(id: BinaryId, attrs: F[BinaryAttributes]): F[Unit] =
for {
now <- Timestamp.current[F]
a <- attrs
fm = RFileMeta(
Ident.unsafe(id.id),
now,
MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream),
ByteSize(a.length),
a.sha256
)
_ <- RFileMeta.insert(fm).transact(xa)
} yield ()
def deleteAttr(id: BinaryId): F[Boolean] =
RFileMeta.delete(Ident.unsafe(id.id)).transact(xa).map(_ > 0)
def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] =
findMeta(id).map(fm =>
BinaryAttributes(
fm.checksum,
SimpleContentType(fm.mimetype.asString),
fm.length.bytes
)
)
def findMeta(id: BinaryId): OptionT[F, RFileMeta] =
OptionT(RFileMeta.findById(Ident.unsafe(id.id)).transact(xa))
}

View File

@ -0,0 +1,92 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.store.file
import javax.sql.DataSource
import cats.data.OptionT
import cats.effect._
import fs2.{Pipe, Stream}
import docspell.common._
import docspell.store.records.RFileMeta
import binny._
import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
import binny.tika.TikaContentTypeDetect
import doobie._
trait FileStore[F[_]] {
def find(id: Ident): OptionT[F, Stream[F, Byte]]
def getBytes(id: Ident): Stream[F, Byte]
def findMeta(id: Ident): OptionT[F, RFileMeta]
def delete(id: Ident): F[Unit]
def save(hint: MimeTypeHint): Pipe[F, Byte, Ident]
}
object FileStore {
private[this] val logger = org.log4s.getLogger
def apply[F[_]: Sync](
xa: Transactor[F],
ds: DataSource,
chunkSize: Int
): FileStore[F] = {
val attrStore = new AttributeStore[F](xa)
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default)
val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore)
new Impl[F](binStore, attrStore)
}
final private class Impl[F[_]](bs: BinaryStore[F], attrStore: AttributeStore[F])
extends FileStore[F] {
def find(id: Ident): OptionT[F, Stream[F, Byte]] =
bs.findBinary(BinaryId(id.id), ByteRange.All)
def getBytes(id: Ident): Stream[F, Byte] =
Stream.eval(find(id).value).unNoneTerminate.flatMap(identity)
def findMeta(id: Ident): OptionT[F, RFileMeta] =
attrStore.findMeta(BinaryId(id.id))
def delete(id: Ident): F[Unit] =
bs.delete(BinaryId(id.id))
def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] =
bs.insert(Hint(hint.filename, hint.advertised))
.andThen(_.map(bid => Ident.unsafe(bid.id)))
}
private object Log4sLogger {
def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] =
new binny.util.Logger[F] {
override def trace(msg: => String): F[Unit] =
Sync[F].delay(log.trace(msg))
override def debug(msg: => String): F[Unit] =
Sync[F].delay(log.debug(msg))
override def info(msg: => String): F[Unit] =
Sync[F].delay(log.info(msg))
override def warn(msg: => String): F[Unit] =
Sync[F].delay(log.warn(msg))
override def error(msg: => String): F[Unit] =
Sync[F].delay(log.error(msg))
override def error(ex: Throwable)(msg: => String): F[Unit] =
Sync[F].delay(log.error(ex)(msg))
}
}
}

View File

@ -20,6 +20,7 @@ import doobie.util.log.Success
import emil.doobie.EmilDoobieMeta
import io.circe.Json
import io.circe.{Decoder, Encoder}
import scodec.bits.ByteVector
trait DoobieMeta extends EmilDoobieMeta {
@ -132,6 +133,15 @@ trait DoobieMeta extends EmilDoobieMeta {
implicit val metaKey: Meta[Key] =
Meta[String].timap(Key.unsafeFromString)(_.asString)
implicit val metaMimeType: Meta[MimeType] =
Meta[String].timap(MimeType.unsafe)(_.asString)
implicit val metaByteVectorHex: Meta[ByteVector] =
Meta[String].timap(s => ByteVector.fromValidHex(s))(_.toHex)
implicit val metaByteSize: Meta[ByteSize] =
Meta[Long].timap(ByteSize.apply)(_.bytes)
}
object DoobieMeta extends DoobieMeta {

View File

@ -9,22 +9,18 @@ package docspell.store.impl
import cats.effect.Async
import cats.implicits._
import docspell.common.Ident
import docspell.store.file.FileStore
import docspell.store.migrate.FlywayMigrate
import docspell.store.{AddResult, JdbcConfig, Store}
import bitpeace.{Bitpeace, BitpeaceConfig, TikaMimetypeDetect}
import doobie._
import doobie.implicits._
final class StoreImpl[F[_]: Async](jdbc: JdbcConfig, xa: Transactor[F]) extends Store[F] {
val bitpeaceCfg =
BitpeaceConfig(
"filemeta",
"filechunk",
TikaMimetypeDetect,
Ident.randomId[F].map(_.id)
)
final class StoreImpl[F[_]: Async](
val fileStore: FileStore[F],
jdbc: JdbcConfig,
xa: Transactor[F]
) extends Store[F] {
def migrate: F[Int] =
FlywayMigrate.run[F](jdbc).map(_.migrationsExecuted)
@ -35,9 +31,6 @@ final class StoreImpl[F[_]: Async](jdbc: JdbcConfig, xa: Transactor[F]) extends
def transact[A](prg: fs2.Stream[doobie.ConnectionIO, A]): fs2.Stream[F, A] =
prg.transact(xa)
def bitpeace: Bitpeace[F] =
Bitpeace(bitpeaceCfg, xa)
def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] =
for {
save <- transact(insert).attempt

View File

@ -9,8 +9,6 @@ package docspell.store.queries
import docspell.common._
import docspell.store.records._
import bitpeace.FileMeta
case class ItemData(
item: RItem,
corrOrg: Option[ROrganization],
@ -20,9 +18,9 @@ case class ItemData(
inReplyTo: Option[IdRef],
folder: Option[IdRef],
tags: Vector[RTag],
attachments: Vector[(RAttachment, FileMeta)],
sources: Vector[(RAttachmentSource, FileMeta)],
archives: Vector[(RAttachmentArchive, FileMeta)],
attachments: Vector[(RAttachment, RFileMeta)],
sources: Vector[(RAttachmentSource, RFileMeta)],
archives: Vector[(RAttachmentArchive, RFileMeta)],
customFields: Vector[ItemFieldValue]
) {

View File

@ -38,10 +38,10 @@ object QAttachment {
Stream
.evalSeq(store.transact(findPreview))
.map(_.fileId.id)
.map(_.fileId)
.evalTap(_ => store.transact(RAttachmentPreview.delete(attachId)))
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
.evalMap(store.fileStore.delete)
.map(_ => 1)
.compile
.foldMonoid
}
@ -68,9 +68,8 @@ object QAttachment {
f <-
Stream
.emits(files._1)
.map(_.id)
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
.evalMap(store.fileStore.delete)
.map(_ => 1)
.compile
.foldMonoid
} yield n + k + f
@ -91,9 +90,9 @@ object QAttachment {
)
f <-
Stream
.emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq ++ p.map(_.fileId.id).toSeq))
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
.emits(ra.fileId +: (s.map(_.fileId).toSeq ++ p.map(_.fileId).toSeq))
.evalMap(store.fileStore.delete)
.map(_ => 1)
.compile
.foldMonoid
} yield n + f
@ -104,8 +103,8 @@ object QAttachment {
n <- OptionT.liftF(store.transact(RAttachmentArchive.deleteAll(aa.fileId)))
_ <- OptionT.liftF(
Stream
.emit(aa.fileId.id)
.flatMap(store.bitpeace.delete)
.emit(aa.fileId)
.evalMap(store.fileStore.delete)
.compile
.drain
)

View File

@ -99,16 +99,16 @@ object QCollective {
inner join item i on a.itemid = i.itemid
where i.cid = $coll)
select a.fid,m.length from attachs a
inner join filemeta m on m.id = a.fid
inner join filemeta m on m.file_id = a.fid
union distinct
select a.file_id,m.length from attachment_source a
inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs)
inner join filemeta m on m.file_id = a.file_id where a.id in (select aid from attachs)
union distinct
select p.file_id,m.length from attachment_preview p
inner join filemeta m on m.id = p.file_id where p.id in (select aid from attachs)
inner join filemeta m on m.file_id = p.file_id where p.id in (select aid from attachs)
union distinct
select a.file_id,m.length from attachment_archive a
inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs)
inner join filemeta m on m.file_id = a.file_id where a.id in (select aid from attachs)
) as t""".query[Option[Long]].unique
for {

View File

@ -496,7 +496,7 @@ object QItem {
where(
i.cid === collective &&
i.state.in(ItemState.validStates) &&
Condition.Or(fms.map(m => m.checksum === checksum)) &&?
Condition.Or(fms.map(m => m.checksum ==== checksum)) &&?
Nel
.fromList(excludeFileMeta.toList)
.map(excl => Condition.And(fms.map(m => m.id.isNull || m.id.notIn(excl))))

View File

@ -14,7 +14,6 @@ import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import bitpeace.FileMeta
import doobie._
import doobie.implicits._
@ -113,9 +112,7 @@ object RAttachment {
def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] =
run(select(T.all), from(T), T.id === attachId).query[RAttachment].option
def findMeta(attachId: Ident): ConnectionIO[Option[FileMeta]] = {
import bitpeace.sql._
def findMeta(attachId: Ident): ConnectionIO[Option[RFileMeta]] = {
val m = RFileMeta.as("m")
val a = RAttachment.as("a")
Select(
@ -123,7 +120,7 @@ object RAttachment {
from(a)
.innerJoin(m, a.fileId === m.id),
a.id === attachId
).build.query[FileMeta].option
).build.query[RFileMeta].option
}
def updateName(
@ -206,9 +203,7 @@ object RAttachment {
def findByItemAndCollectiveWithMeta(
id: Ident,
coll: Ident
): ConnectionIO[Vector[(RAttachment, FileMeta)]] = {
import bitpeace.sql._
): ConnectionIO[Vector[(RAttachment, RFileMeta)]] = {
val a = RAttachment.as("a")
val m = RFileMeta.as("m")
val i = RItem.as("i")
@ -218,12 +213,10 @@ object RAttachment {
.innerJoin(m, a.fileId === m.id)
.innerJoin(i, a.itemId === i.id),
a.itemId === id && i.cid === coll
).build.query[(RAttachment, FileMeta)].to[Vector]
).build.query[(RAttachment, RFileMeta)].to[Vector]
}
def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachment, FileMeta)]] = {
import bitpeace.sql._
def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachment, RFileMeta)]] = {
val a = RAttachment.as("a")
val m = RFileMeta.as("m")
Select(
@ -231,7 +224,7 @@ object RAttachment {
from(a)
.innerJoin(m, a.fileId === m.id),
a.itemId === id
).orderBy(a.position.asc).build.query[(RAttachment, FileMeta)].to[Vector]
).orderBy(a.position.asc).build.query[(RAttachment, RFileMeta)].to[Vector]
}
/** Deletes the attachment and its related source and meta records.

View File

@ -13,7 +13,6 @@ import docspell.store.qb.DSL._
import docspell.store.qb.TableDef
import docspell.store.qb._
import bitpeace.FileMeta
import doobie._
import doobie.implicits._
@ -98,9 +97,7 @@ object RAttachmentArchive {
def findByItemWithMeta(
id: Ident
): ConnectionIO[Vector[(RAttachmentArchive, FileMeta)]] = {
import bitpeace.sql._
): ConnectionIO[Vector[(RAttachmentArchive, RFileMeta)]] = {
val a = RAttachmentArchive.as("a")
val b = RAttachment.as("b")
val m = RFileMeta.as("m")
@ -110,7 +107,7 @@ object RAttachmentArchive {
.innerJoin(m, a.fileId === m.id)
.innerJoin(b, a.id === b.id),
b.itemId === id
).orderBy(b.position.asc).build.query[(RAttachmentArchive, FileMeta)].to[Vector]
).orderBy(b.position.asc).build.query[(RAttachmentArchive, RFileMeta)].to[Vector]
}
/** If the given attachment id has an associated archive, this returns the number of all

View File

@ -12,7 +12,6 @@ import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import bitpeace.FileMeta
import doobie._
import doobie.implicits._
@ -101,9 +100,7 @@ object RAttachmentPreview {
def findByItemWithMeta(
id: Ident
): ConnectionIO[Vector[(RAttachmentPreview, FileMeta)]] = {
import bitpeace.sql._
): ConnectionIO[Vector[(RAttachmentPreview, RFileMeta)]] = {
val a = RAttachmentPreview.as("a")
val b = RAttachment.as("b")
val m = RFileMeta.as("m")
@ -114,6 +111,6 @@ object RAttachmentPreview {
.innerJoin(m, a.fileId === m.id)
.innerJoin(b, b.id === a.id),
b.itemId === id
).orderBy(b.position.asc).build.query[(RAttachmentPreview, FileMeta)].to[Vector]
).orderBy(b.position.asc).build.query[(RAttachmentPreview, RFileMeta)].to[Vector]
}
}

View File

@ -12,7 +12,6 @@ import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import bitpeace.FileMeta
import doobie._
import doobie.implicits._
@ -97,9 +96,7 @@ object RAttachmentSource {
def findByItemWithMeta(
id: Ident
): ConnectionIO[Vector[(RAttachmentSource, FileMeta)]] = {
import bitpeace.sql._
): ConnectionIO[Vector[(RAttachmentSource, RFileMeta)]] = {
val a = RAttachmentSource.as("a")
val b = RAttachment.as("b")
val m = RFileMeta.as("m")
@ -110,7 +107,7 @@ object RAttachmentSource {
.innerJoin(m, a.fileId === m.id)
.innerJoin(b, b.id === a.id),
b.itemId === id
).orderBy(b.position.asc).build.query[(RAttachmentSource, FileMeta)].to[Vector]
).orderBy(b.position.asc).build.query[(RAttachmentSource, RFileMeta)].to[Vector]
}
}

View File

@ -6,35 +6,37 @@
package docspell.store.records
import java.time.Instant
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import docspell.store.syntax.MimeTypes._
import bitpeace.FileMeta
import bitpeace.Mimetype
import doobie._
import doobie.implicits._
import scodec.bits.ByteVector
final case class RFileMeta(
id: Ident,
created: Timestamp,
mimetype: MimeType,
length: ByteSize,
checksum: ByteVector
)
object RFileMeta {
final case class Table(alias: Option[String]) extends TableDef {
val tableName = "filemeta"
val id = Column[Ident]("id", this)
val timestamp = Column[Instant]("timestamp", this)
val mimetype = Column[Mimetype]("mimetype", this)
val length = Column[Long]("length", this)
val checksum = Column[String]("checksum", this)
val chunks = Column[Int]("chunks", this)
val chunksize = Column[Int]("chunksize", this)
val id = Column[Ident]("file_id", this)
val timestamp = Column[Timestamp]("created", this)
val mimetype = Column[MimeType]("mimetype", this)
val length = Column[ByteSize]("length", this)
val checksum = Column[ByteVector]("checksum", this)
val all = NonEmptyList
.of[Column[_]](id, timestamp, mimetype, length, checksum, chunks, chunksize)
.of[Column[_]](id, timestamp, mimetype, length, checksum)
}
@ -42,29 +44,25 @@ object RFileMeta {
def as(alias: String): Table =
Table(Some(alias))
def findById(fid: Ident): ConnectionIO[Option[FileMeta]] = {
import bitpeace.sql._
def insert(r: RFileMeta): ConnectionIO[Int] =
DML.insert(T, T.all, fr"${r.id},${r.created},${r.mimetype},${r.length},${r.checksum}")
run(select(T.all), from(T), T.id === fid).query[FileMeta].option
}
def findByIds(ids: List[Ident]): ConnectionIO[Vector[FileMeta]] = {
import bitpeace.sql._
def findById(fid: Ident): ConnectionIO[Option[RFileMeta]] =
run(select(T.all), from(T), T.id === fid).query[RFileMeta].option
def findByIds(ids: List[Ident]): ConnectionIO[Vector[RFileMeta]] =
NonEmptyList.fromList(ids) match {
case Some(nel) =>
run(select(T.all), from(T), T.id.in(nel)).query[FileMeta].to[Vector]
run(select(T.all), from(T), T.id.in(nel)).query[RFileMeta].to[Vector]
case None =>
Vector.empty[FileMeta].pure[ConnectionIO]
Vector.empty[RFileMeta].pure[ConnectionIO]
}
}
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
import bitpeace.sql._
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] =
run(select(T.mimetype), from(T), T.id === fid)
.query[Mimetype]
.query[MimeType]
.option
.map(_.map(_.toLocal))
}
def delete(id: Ident): ConnectionIO[Int] =
DML.delete(T, T.id === id)
}

View File

@ -8,16 +8,8 @@ package docspell.store.syntax
import docspell.common._
import bitpeace.Mimetype
object MimeTypes {
implicit final class BitpeaceMimeTypeOps(bmt: Mimetype) {
def toLocal: MimeType =
MimeType(bmt.primary, bmt.sub, bmt.params)
}
implicit final class EmilMimeTypeOps(emt: emil.MimeType) {
def toLocal: MimeType =
MimeType(emt.primary, emt.sub, emt.params)
@ -26,8 +18,5 @@ object MimeTypes {
implicit final class DocspellMimeTypeOps(mt: MimeType) {
def toEmil: emil.MimeType =
emil.MimeType(mt.primary, mt.sub, mt.params)
def toBitpeace: Mimetype =
Mimetype(mt.primary, mt.sub, mt.params)
}
}

View File

@ -6,10 +6,14 @@
package docspell.store
import javax.sql.DataSource
import cats.effect._
import docspell.common.LenientUri
import docspell.store.file.FileStore
import docspell.store.impl.StoreImpl
import docspell.store.migrate.FlywayMigrate
import doobie._
import munit._
@ -20,18 +24,17 @@ trait StoreFixture extends CatsEffectFunFixtures { self: CatsEffectSuite =>
val xa = ResourceFixture {
val cfg = StoreFixture.memoryDB("test")
for {
xa <- StoreFixture.makeXA(cfg)
store = new StoreImpl[IO](cfg, xa)
_ <- Resource.eval(store.migrate)
ds <- StoreFixture.dataSource(cfg)
xa <- StoreFixture.makeXA(ds)
_ <- Resource.eval(FlywayMigrate.run[IO](cfg))
} yield xa
}
val store = ResourceFixture {
val cfg = StoreFixture.memoryDB("test")
for {
xa <- StoreFixture.makeXA(cfg)
store = new StoreImpl[IO](cfg, xa)
_ <- Resource.eval(store.migrate)
store <- StoreFixture.store(cfg)
_ <- Resource.eval(store.migrate)
} yield store
}
}
@ -47,31 +50,24 @@ object StoreFixture {
""
)
def globalXA(jdbc: JdbcConfig): Transactor[IO] =
Transactor.fromDriverManager(
"org.h2.Driver",
jdbc.url.asString,
jdbc.user,
jdbc.password
)
def makeXA(jdbc: JdbcConfig): Resource[IO, Transactor[IO]] = {
def dataSource(jdbc: JdbcConfig): Resource[IO, JdbcConnectionPool] = {
def jdbcConnPool =
JdbcConnectionPool.create(jdbc.url.asString, jdbc.user, jdbc.password)
val makePool = Resource.make(IO(jdbcConnPool))(cp => IO(cp.dispose()))
for {
ec <- ExecutionContexts.cachedThreadPool[IO]
pool <- makePool
xa = Transactor.fromDataSource[IO].apply(pool, ec)
} yield xa
Resource.make(IO(jdbcConnPool))(cp => IO(cp.dispose()))
}
def store(jdbc: JdbcConfig): Resource[IO, Store[IO]] =
def makeXA(ds: DataSource): Resource[IO, Transactor[IO]] =
for {
xa <- makeXA(jdbc)
store = new StoreImpl[IO](jdbc, xa)
ec <- ExecutionContexts.cachedThreadPool[IO]
xa = Transactor.fromDataSource[IO](ds, ec)
} yield xa
def store(jdbc: JdbcConfig): Resource[IO, StoreImpl[IO]] =
for {
ds <- dataSource(jdbc)
xa <- makeXA(ds)
store = new StoreImpl[IO](FileStore[IO](xa, ds, 64 * 1024), jdbc, xa)
_ <- Resource.eval(store.migrate)
} yield store
}

View File

@ -7,7 +7,7 @@ object Dependencies {
val BcryptVersion = "0.4"
val BetterMonadicForVersion = "0.3.1"
val BitpeaceVersion = "0.9.0-M3"
val BinnyVersion = "0.1.0"
val CalevVersion = "0.6.0"
val CatsParseVersion = "0.3.4"
val CirceVersion = "0.14.1"
@ -273,8 +273,10 @@ object Dependencies {
"org.tpolecat" %% "doobie-hikari" % DoobieVersion
)
val bitpeace = Seq(
"com.github.eikek" %% "bitpeace-core" % BitpeaceVersion
val binny = Seq(
"com.github.eikek" %% "binny-core" % BinnyVersion,
"com.github.eikek" %% "binny-jdbc" % BinnyVersion,
"com.github.eikek" %% "binny-tika-detect" % BinnyVersion
)
// https://github.com/flyway/flyway