mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-25 16:45:05 +00:00
Improve glob and filter archive entries
This commit is contained in:
parent
27eb5d70de
commit
4fd6e02ec0
@ -61,7 +61,7 @@ object OUpload {
|
|||||||
folderId: Option[Ident],
|
folderId: Option[Ident],
|
||||||
validFileTypes: Seq[MimeType],
|
validFileTypes: Seq[MimeType],
|
||||||
skipDuplicates: Boolean,
|
skipDuplicates: Boolean,
|
||||||
fileFilter: Option[Glob],
|
fileFilter: Glob,
|
||||||
tags: List[String]
|
tags: List[String]
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -130,7 +130,7 @@ object OUpload {
|
|||||||
data.meta.folderId,
|
data.meta.folderId,
|
||||||
data.meta.validFileTypes,
|
data.meta.validFileTypes,
|
||||||
data.meta.skipDuplicates,
|
data.meta.skipDuplicates,
|
||||||
data.meta.fileFilter,
|
data.meta.fileFilter.some,
|
||||||
data.meta.tags.some
|
data.meta.tags.some
|
||||||
)
|
)
|
||||||
args =
|
args =
|
||||||
|
@ -1,27 +1,81 @@
|
|||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
import cats.implicits._
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
import io.circe.{Decoder, Encoder}
|
import io.circe.{Decoder, Encoder}
|
||||||
|
|
||||||
/** A very simple glob supporting only `*` and `?`. */
|
trait Glob {
|
||||||
final case class Glob(pattern: Glob.Pattern) {
|
|
||||||
def matches(in: String): Boolean =
|
|
||||||
pattern.parts
|
|
||||||
.zipWith(Glob.split(in, Glob.separator))(_.matches(_))
|
|
||||||
.forall(identity)
|
|
||||||
|
|
||||||
def asString: String =
|
/** Matches the input string against this glob. */
|
||||||
pattern.asString
|
def matches(in: String): Boolean
|
||||||
|
|
||||||
|
/** If this glob consists of multiple segments, it is the same as
|
||||||
|
* `matches`. If it is only a single segment, it is matched against
|
||||||
|
* the last segment of the input string that is assumed to be a
|
||||||
|
* pathname separated by slash.
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* test.* <> "/a/b/test.txt" => true
|
||||||
|
* /test.* <> "/a/b/test.txt" => false
|
||||||
|
*/
|
||||||
|
def matchFilenameOrPath(in: String): Boolean
|
||||||
|
|
||||||
|
def asString: String
|
||||||
}
|
}
|
||||||
|
|
||||||
object Glob {
|
object Glob {
|
||||||
private val separator = '/'
|
private val separator = '/'
|
||||||
|
private val anyChar = '|'
|
||||||
|
|
||||||
def apply(str: String): Glob =
|
val all = new Glob {
|
||||||
Glob(Pattern(split(str, separator).map(makeSegment)))
|
def matches(in: String) = true
|
||||||
|
def matchFilenameOrPath(in: String) = true
|
||||||
|
val asString = "*"
|
||||||
|
}
|
||||||
|
|
||||||
case class Pattern(parts: NonEmptyList[Segment]) {
|
def pattern(pattern: Pattern): Glob =
|
||||||
|
PatternGlob(pattern)
|
||||||
|
|
||||||
|
/** A simple glob supporting `*` and `?`. */
|
||||||
|
final private case class PatternGlob(pattern: Pattern) extends Glob {
|
||||||
|
def matches(in: String): Boolean =
|
||||||
|
pattern.parts
|
||||||
|
.zipWith(Glob.split(in, Glob.separator))(_.matches(_))
|
||||||
|
.forall(identity)
|
||||||
|
|
||||||
|
def matchFilenameOrPath(in: String): Boolean =
|
||||||
|
if (pattern.parts.tail.isEmpty) matches(split(in, separator).last)
|
||||||
|
else matches(in)
|
||||||
|
|
||||||
|
def asString: String =
|
||||||
|
pattern.asString
|
||||||
|
}
|
||||||
|
|
||||||
|
final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
|
||||||
|
def matches(in: String) =
|
||||||
|
globs.exists(_.matches(in))
|
||||||
|
def matchFilenameOrPath(in: String) =
|
||||||
|
globs.exists(_.matchFilenameOrPath(in))
|
||||||
|
def asString =
|
||||||
|
globs.toList.map(_.asString).mkString(anyChar.toString)
|
||||||
|
}
|
||||||
|
|
||||||
|
def apply(in: String): Glob = {
|
||||||
|
def single(str: String) =
|
||||||
|
PatternGlob(Pattern(split(str, separator).map(makeSegment)))
|
||||||
|
|
||||||
|
if (in == "*") all
|
||||||
|
else
|
||||||
|
split(in, anyChar) match {
|
||||||
|
case NonEmptyList(_, Nil) =>
|
||||||
|
single(in)
|
||||||
|
case nel =>
|
||||||
|
AnyGlob(nel.map(_.trim).map(single))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case class Pattern(parts: NonEmptyList[Segment]) {
|
||||||
def asString =
|
def asString =
|
||||||
parts.map(_.asString).toList.mkString(separator.toString)
|
parts.map(_.asString).toList.mkString(separator.toString)
|
||||||
}
|
}
|
||||||
|
@ -6,13 +6,15 @@ import Glob._
|
|||||||
object GlobTest extends SimpleTestSuite {
|
object GlobTest extends SimpleTestSuite {
|
||||||
|
|
||||||
test("literals") {
|
test("literals") {
|
||||||
assert(Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
|
assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
|
||||||
assert(!Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
|
assert(!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
|
||||||
}
|
}
|
||||||
|
|
||||||
test("single wildcards 1") {
|
test("single wildcards 1") {
|
||||||
val glob =
|
val glob =
|
||||||
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
|
Glob.pattern(
|
||||||
|
Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
|
||||||
|
)
|
||||||
|
|
||||||
assert(glob.matches("snapshot"))
|
assert(glob.matches("snapshot"))
|
||||||
assert(!glob.matches("snapshots"))
|
assert(!glob.matches("snapshots"))
|
||||||
@ -20,7 +22,7 @@ object GlobTest extends SimpleTestSuite {
|
|||||||
|
|
||||||
test("single wildcards 2") {
|
test("single wildcards 2") {
|
||||||
val glob =
|
val glob =
|
||||||
Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
|
Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
|
||||||
|
|
||||||
assert(glob.matches("test.txt"))
|
assert(glob.matches("test.txt"))
|
||||||
assert(glob.matches("test.pdf"))
|
assert(glob.matches("test.pdf"))
|
||||||
@ -32,28 +34,29 @@ object GlobTest extends SimpleTestSuite {
|
|||||||
test("single parsing") {
|
test("single parsing") {
|
||||||
assertEquals(
|
assertEquals(
|
||||||
Glob("s*p*t"),
|
Glob("s*p*t"),
|
||||||
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
|
Glob.pattern(
|
||||||
|
Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
|
||||||
|
)
|
||||||
)
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
Glob("s***p*t"),
|
Glob("s***p*t"),
|
||||||
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
|
Glob.pattern(
|
||||||
|
Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
|
||||||
|
)
|
||||||
)
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
Glob("test.*"),
|
Glob("test.*"),
|
||||||
Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
|
Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
|
||||||
)
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
Glob("stop"),
|
Glob("stop"),
|
||||||
Glob(Pattern(Segment(Token.Literal("stop"))))
|
Glob.pattern(Pattern(Segment(Token.Literal("stop"))))
|
||||||
)
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
Glob("*stop"),
|
Glob("*stop"),
|
||||||
Glob(Pattern(Segment(Token.Until("stop"))))
|
Glob.pattern(Pattern(Segment(Token.Until("stop"))))
|
||||||
)
|
|
||||||
assertEquals(
|
|
||||||
Glob("*"),
|
|
||||||
Glob(Pattern(Segment(Token.Until(""))))
|
|
||||||
)
|
)
|
||||||
|
assertEquals(Glob("*"), Glob.all)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("with splitting") {
|
test("with splitting") {
|
||||||
@ -71,5 +74,38 @@ object GlobTest extends SimpleTestSuite {
|
|||||||
assertEquals(Glob("stop").asString, "stop")
|
assertEquals(Glob("stop").asString, "stop")
|
||||||
assertEquals(Glob("*stop").asString, "*stop")
|
assertEquals(Glob("*stop").asString, "*stop")
|
||||||
assertEquals(Glob("/a/b/*").asString, "/a/b/*")
|
assertEquals(Glob("/a/b/*").asString, "/a/b/*")
|
||||||
|
assertEquals(Glob("*").asString, "*")
|
||||||
|
assertEquals(Glob.all.asString, "*")
|
||||||
|
}
|
||||||
|
|
||||||
|
test("simple matches") {
|
||||||
|
assert(Glob("/test.*").matches("/test.pdf"))
|
||||||
|
assert(!Glob("/test.*").matches("test.pdf"))
|
||||||
|
assert(!Glob("test.*").matches("/test.pdf"))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("matchFilenameOrPath") {
|
||||||
|
assert(Glob("test.*").matchFilenameOrPath("/a/b/test.pdf"))
|
||||||
|
assert(!Glob("/test.*").matchFilenameOrPath("/a/b/test.pdf"))
|
||||||
|
assert(Glob("s*p*t").matchFilenameOrPath("snapshot"))
|
||||||
|
assert(Glob("s*p*t").matchFilenameOrPath("/tmp/snapshot"))
|
||||||
|
assert(Glob("/tmp/s*p*t").matchFilenameOrPath("/tmp/snapshot"))
|
||||||
|
|
||||||
|
assert(Glob("a/b/*").matchFilenameOrPath("a/b/hello"))
|
||||||
|
assert(!Glob("a/b/*").matchFilenameOrPath("/a/b/hello"))
|
||||||
|
assert(Glob("/a/b/*").matchFilenameOrPath("/a/b/hello"))
|
||||||
|
assert(!Glob("/a/b/*").matchFilenameOrPath("a/b/hello"))
|
||||||
|
assert(!Glob("*/a/b/*").matchFilenameOrPath("a/b/hello"))
|
||||||
|
assert(Glob("*/a/b/*").matchFilenameOrPath("test/a/b/hello"))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("anyglob") {
|
||||||
|
assert(Glob("*.pdf|*.txt").matches("test.pdf"))
|
||||||
|
assert(Glob("*.pdf|*.txt").matches("test.txt"))
|
||||||
|
assert(!Glob("*.pdf|*.txt").matches("test.xls"))
|
||||||
|
assert(Glob("*.pdf | *.txt").matches("test.pdf"))
|
||||||
|
assert(Glob("*.pdf | mail.html").matches("test.pdf"))
|
||||||
|
assert(Glob("*.pdf | mail.html").matches("mail.html"))
|
||||||
|
assert(!Glob("*.pdf | mail.html").matches("test.docx"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,24 +9,33 @@ import cats.implicits._
|
|||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common.Binary
|
import docspell.common.Binary
|
||||||
|
import docspell.common.Glob
|
||||||
|
|
||||||
object Zip {
|
object Zip {
|
||||||
|
|
||||||
def unzipP[F[_]: ConcurrentEffect: ContextShift](
|
def unzipP[F[_]: ConcurrentEffect: ContextShift](
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker
|
blocker: Blocker,
|
||||||
|
glob: Glob
|
||||||
): Pipe[F, Byte, Binary[F]] =
|
): Pipe[F, Byte, Binary[F]] =
|
||||||
s => unzip[F](chunkSize, blocker)(s)
|
s => unzip[F](chunkSize, blocker, glob)(s)
|
||||||
|
|
||||||
def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)(
|
def unzip[F[_]: ConcurrentEffect: ContextShift](
|
||||||
|
chunkSize: Int,
|
||||||
|
blocker: Blocker,
|
||||||
|
glob: Glob
|
||||||
|
)(
|
||||||
data: Stream[F, Byte]
|
data: Stream[F, Byte]
|
||||||
): Stream[F, Binary[F]] =
|
): Stream[F, Binary[F]] =
|
||||||
data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker))
|
data
|
||||||
|
.through(fs2.io.toInputStream[F])
|
||||||
|
.flatMap(in => unzipJava(in, chunkSize, blocker, glob))
|
||||||
|
|
||||||
def unzipJava[F[_]: Sync: ContextShift](
|
def unzipJava[F[_]: Sync: ContextShift](
|
||||||
in: InputStream,
|
in: InputStream,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker
|
blocker: Blocker,
|
||||||
|
glob: Glob
|
||||||
): Stream[F, Binary[F]] = {
|
): Stream[F, Binary[F]] = {
|
||||||
val zin = new ZipInputStream(in)
|
val zin = new ZipInputStream(in)
|
||||||
|
|
||||||
@ -39,6 +48,7 @@ object Zip {
|
|||||||
.resource(nextEntry)
|
.resource(nextEntry)
|
||||||
.repeat
|
.repeat
|
||||||
.unNoneTerminate
|
.unNoneTerminate
|
||||||
|
.filter(ze => glob.matchFilenameOrPath(ze.getName()))
|
||||||
.map { ze =>
|
.map { ze =>
|
||||||
val name = Paths.get(ze.getName()).getFileName.toString
|
val name = Paths.get(ze.getName()).getFileName.toString
|
||||||
val data =
|
val data =
|
||||||
|
@ -4,6 +4,7 @@ import minitest._
|
|||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import scala.concurrent.ExecutionContext
|
import scala.concurrent.ExecutionContext
|
||||||
|
import docspell.common.Glob
|
||||||
|
|
||||||
object ZipTest extends SimpleTestSuite {
|
object ZipTest extends SimpleTestSuite {
|
||||||
|
|
||||||
@ -12,7 +13,7 @@ object ZipTest extends SimpleTestSuite {
|
|||||||
|
|
||||||
test("unzip") {
|
test("unzip") {
|
||||||
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
|
val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
|
||||||
val uncomp = zipFile.through(Zip.unzip(8192, blocker))
|
val uncomp = zipFile.through(Zip.unzip(8192, blocker, Glob.all))
|
||||||
|
|
||||||
uncomp
|
uncomp
|
||||||
.evalMap { entry =>
|
.evalMap { entry =>
|
||||||
|
@ -16,9 +16,10 @@ import emil.{MimeType => _, _}
|
|||||||
object ReadMail {
|
object ReadMail {
|
||||||
|
|
||||||
def readBytesP[F[_]: ConcurrentEffect: ContextShift](
|
def readBytesP[F[_]: ConcurrentEffect: ContextShift](
|
||||||
logger: Logger[F]
|
logger: Logger[F],
|
||||||
|
glob: Glob
|
||||||
): Pipe[F, Byte, Binary[F]] =
|
): Pipe[F, Byte, Binary[F]] =
|
||||||
_.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger))
|
_.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger, glob))
|
||||||
|
|
||||||
def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] =
|
def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] =
|
||||||
s =>
|
s =>
|
||||||
@ -26,7 +27,8 @@ object ReadMail {
|
|||||||
s.through(Mail.readBytes[F])
|
s.through(Mail.readBytes[F])
|
||||||
|
|
||||||
def mailToEntries[F[_]: ConcurrentEffect: ContextShift](
|
def mailToEntries[F[_]: ConcurrentEffect: ContextShift](
|
||||||
logger: Logger[F]
|
logger: Logger[F],
|
||||||
|
glob: Glob
|
||||||
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
||||||
val bodyEntry: F[Option[Binary[F]]] =
|
val bodyEntry: F[Option[Binary[F]]] =
|
||||||
if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F]
|
if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F]
|
||||||
@ -48,10 +50,12 @@ object ReadMail {
|
|||||||
) >>
|
) >>
|
||||||
(Stream
|
(Stream
|
||||||
.eval(bodyEntry)
|
.eval(bodyEntry)
|
||||||
.flatMap(e => Stream.emits(e.toSeq)) ++
|
.flatMap(e => Stream.emits(e.toSeq))
|
||||||
|
.filter(a => glob.matches(a.name)) ++
|
||||||
Stream
|
Stream
|
||||||
.eval(TnefExtract.replace(mail))
|
.eval(TnefExtract.replace(mail))
|
||||||
.flatMap(m => Stream.emits(m.attachments.all))
|
.flatMap(m => Stream.emits(m.attachments.all))
|
||||||
|
.filter(a => a.filename.exists(glob.matches))
|
||||||
.map(a =>
|
.map(a =>
|
||||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
||||||
))
|
))
|
||||||
|
@ -95,12 +95,12 @@ object ExtractArchive {
|
|||||||
case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
|
case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
|
||||||
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
|
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
|
||||||
extractZip(ctx, archive)(ra, pos)
|
extractZip(ctx, archive)(ra, pos)
|
||||||
.flatTap(_ => cleanupParents(ctx, ra, archive))
|
.flatMap(cleanupParents(ctx, ra, archive))
|
||||||
|
|
||||||
case MimeType.EmailMatch(_) =>
|
case MimeType.EmailMatch(_) =>
|
||||||
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
|
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
|
||||||
extractMail(ctx, archive)(ra, pos)
|
extractMail(ctx, archive)(ra, pos)
|
||||||
.flatTap(_ => cleanupParents(ctx, ra, archive))
|
.flatMap(cleanupParents(ctx, ra, archive))
|
||||||
|
|
||||||
case _ =>
|
case _ =>
|
||||||
ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
|
ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
|
||||||
@ -111,7 +111,7 @@ object ExtractArchive {
|
|||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
ra: RAttachment,
|
ra: RAttachment,
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
): F[Unit] =
|
)(extracted: Extracted): F[Extracted] =
|
||||||
archive match {
|
archive match {
|
||||||
case Some(_) =>
|
case Some(_) =>
|
||||||
for {
|
for {
|
||||||
@ -121,36 +121,37 @@ object ExtractArchive {
|
|||||||
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
|
_ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
|
||||||
_ <- ctx.store.transact(RAttachment.delete(ra.id))
|
_ <- ctx.store.transact(RAttachment.delete(ra.id))
|
||||||
_ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain
|
_ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain
|
||||||
} yield ()
|
} yield extracted
|
||||||
case None =>
|
case None =>
|
||||||
for {
|
for {
|
||||||
_ <- ctx.logger.debug(
|
_ <- ctx.logger.debug(
|
||||||
s"Extracted attachment ${ra.name}. Remove it from the item."
|
s"Extracted attachment ${ra.name}. Remove it from the item."
|
||||||
)
|
)
|
||||||
_ <- ctx.store.transact(RAttachment.delete(ra.id))
|
_ <- ctx.store.transact(RAttachment.delete(ra.id))
|
||||||
} yield ()
|
} yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractZip[F[_]: ConcurrentEffect: ContextShift](
|
def extractZip[F[_]: ConcurrentEffect: ContextShift](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
||||||
val zipData = ctx.store.bitpeace
|
val zipData = ctx.store.bitpeace
|
||||||
.get(ra.fileId.id)
|
.get(ra.fileId.id)
|
||||||
.unNoneTerminate
|
.unNoneTerminate
|
||||||
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
||||||
|
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||||
zipData
|
ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
|
||||||
.through(Zip.unzipP[F](8192, ctx.blocker))
|
zipData
|
||||||
.zipWithIndex
|
.through(Zip.unzipP[F](8192, ctx.blocker, glob))
|
||||||
.flatMap(handleEntry(ctx, ra, pos, archive, None))
|
.zipWithIndex
|
||||||
.foldMonoid
|
.flatMap(handleEntry(ctx, ra, pos, archive, None))
|
||||||
.compile
|
.foldMonoid
|
||||||
.lastOrError
|
.compile
|
||||||
|
.lastOrError
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractMail[F[_]: ConcurrentEffect: ContextShift](
|
def extractMail[F[_]: ConcurrentEffect: ContextShift](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
)(ra: RAttachment, pos: Int): F[Extracted] = {
|
||||||
val email: Stream[F, Byte] = ctx.store.bitpeace
|
val email: Stream[F, Byte] = ctx.store.bitpeace
|
||||||
@ -158,24 +159,26 @@ object ExtractArchive {
|
|||||||
.unNoneTerminate
|
.unNoneTerminate
|
||||||
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
||||||
|
|
||||||
email
|
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||||
.through(ReadMail.bytesToMail[F](ctx.logger))
|
ctx.logger.debug(s"Filtering email attachments with '${glob.asString}'") *>
|
||||||
.flatMap { mail =>
|
email
|
||||||
val mId = mail.header.messageId
|
.through(ReadMail.bytesToMail[F](ctx.logger))
|
||||||
val givenMeta =
|
.flatMap { mail =>
|
||||||
for {
|
val mId = mail.header.messageId
|
||||||
_ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
|
val givenMeta =
|
||||||
s <- Sync[F].delay(extractMailMeta(mail))
|
for {
|
||||||
} yield s
|
_ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
|
||||||
|
s <- Sync[F].delay(extractMailMeta(mail))
|
||||||
|
} yield s
|
||||||
|
|
||||||
ReadMail
|
ReadMail
|
||||||
.mailToEntries(ctx.logger)(mail)
|
.mailToEntries(ctx.logger, glob)(mail)
|
||||||
.zipWithIndex
|
.zipWithIndex
|
||||||
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
|
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
|
||||||
}
|
}
|
||||||
.foldMonoid
|
.foldMonoid
|
||||||
.compile
|
.compile
|
||||||
.lastOrError
|
.lastOrError
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractMailMeta[F[_]](mail: Mail[F]): Extracted =
|
def extractMailMeta[F[_]](mail: Mail[F]): Extracted =
|
||||||
@ -239,6 +242,9 @@ object ExtractArchive {
|
|||||||
positions ++ e.positions
|
positions ++ e.positions
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def filterNames(filter: Glob): Extracted =
|
||||||
|
copy(files = files.filter(ra => filter.matches(ra.name.getOrElse(""))))
|
||||||
|
|
||||||
def setMeta(m: MetaProposal): Extracted =
|
def setMeta(m: MetaProposal): Extracted =
|
||||||
setMeta(MetaProposalList.of(m))
|
setMeta(MetaProposalList.of(m))
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@ object ProcessItem {
|
|||||||
.flatMap(LinkProposal[F])
|
.flatMap(LinkProposal[F])
|
||||||
.flatMap(SetGivenData[F](itemOps))
|
.flatMap(SetGivenData[F](itemOps))
|
||||||
.flatMap(Task.setProgress(99))
|
.flatMap(Task.setProgress(99))
|
||||||
|
.flatMap(RemoveEmptyItem(itemOps))
|
||||||
|
|
||||||
def processAttachments[F[_]: ConcurrentEffect: ContextShift](
|
def processAttachments[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
|
@ -0,0 +1,26 @@
|
|||||||
|
package docspell.joex.process
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
|
import docspell.backend.ops.OItem
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.joex.scheduler.Task
|
||||||
|
|
||||||
|
object RemoveEmptyItem {
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](
|
||||||
|
ops: OItem[F]
|
||||||
|
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
|
if (data.item.state.isInvalid && data.attachments.isEmpty)
|
||||||
|
Task { ctx =>
|
||||||
|
for {
|
||||||
|
_ <- ctx.logger.warn(s"Removing item as it doesn't have any attachments!")
|
||||||
|
n <- ops.deleteItem(data.item.id, data.item.cid)
|
||||||
|
_ <- ctx.logger.warn(s"Removed item ($n). No item has been created!")
|
||||||
|
} yield data
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Task.pure(data)
|
||||||
|
|
||||||
|
}
|
@ -256,7 +256,7 @@ object ScanMailboxTask {
|
|||||||
args.itemFolder,
|
args.itemFolder,
|
||||||
Seq.empty,
|
Seq.empty,
|
||||||
true,
|
true,
|
||||||
args.fileFilter,
|
args.fileFilter.getOrElse(Glob.all),
|
||||||
args.tags.getOrElse(Nil)
|
args.tags.getOrElse(Nil)
|
||||||
)
|
)
|
||||||
data = OUpload.UploadData(
|
data = OUpload.UploadData(
|
||||||
|
@ -311,14 +311,15 @@ trait Conversions {
|
|||||||
m.folder,
|
m.folder,
|
||||||
validFileTypes,
|
validFileTypes,
|
||||||
m.skipDuplicates.getOrElse(false),
|
m.skipDuplicates.getOrElse(false),
|
||||||
m.fileFilter,
|
m.fileFilter.getOrElse(Glob.all),
|
||||||
m.tags.map(_.items).getOrElse(Nil)
|
m.tags.map(_.items).getOrElse(Nil)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.getOrElse(
|
.getOrElse(
|
||||||
(true, UploadMeta(None, sourceName, None, validFileTypes, false, None, Nil)).pure[F]
|
(true, UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil))
|
||||||
|
.pure[F]
|
||||||
)
|
)
|
||||||
|
|
||||||
val files = mp.parts
|
val files = mp.parts
|
||||||
|
Loading…
x
Reference in New Issue
Block a user