From 4fd6e02ec0a9ca2f71d568e9166d37174ea63d05 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Wed, 11 Nov 2020 08:52:21 +0100
Subject: [PATCH] Improve glob and filter archive entries

---
 .../scala/docspell/backend/ops/OUpload.scala  |  4 +-
 .../src/main/scala/docspell/common/Glob.scala | 78 ++++++++++++++++---
 .../test/scala/docspell/common/GlobTest.scala | 62 +++++++++++----
 .../src/main/scala/docspell/files/Zip.scala   | 20 +++--
 .../test/scala/docspell/files/ZipTest.scala   |  3 +-
 .../scala/docspell/joex/mail/ReadMail.scala   | 12 ++-
 .../joex/process/ExtractArchive.scala         | 70 +++++++++--------
 .../docspell/joex/process/ProcessItem.scala   |  1 +
 .../joex/process/RemoveEmptyItem.scala        | 26 +++++++
 .../joex/scanmailbox/ScanMailboxTask.scala    |  2 +-
 .../restserver/conv/Conversions.scala         |  5 +-
 11 files changed, 211 insertions(+), 72 deletions(-)
 create mode 100644 modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala

diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
index 8723ed40..efe84d60 100644
--- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
+++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
@@ -61,7 +61,7 @@ object OUpload {
       folderId: Option[Ident],
       validFileTypes: Seq[MimeType],
       skipDuplicates: Boolean,
-      fileFilter: Option[Glob],
+      fileFilter: Glob,
       tags: List[String]
   )
 
@@ -130,7 +130,7 @@ object OUpload {
             data.meta.folderId,
             data.meta.validFileTypes,
             data.meta.skipDuplicates,
-            data.meta.fileFilter,
+            data.meta.fileFilter.some,
             data.meta.tags.some
           )
           args =
diff --git a/modules/common/src/main/scala/docspell/common/Glob.scala b/modules/common/src/main/scala/docspell/common/Glob.scala
index 5eba3f73..afa04a02 100644
--- a/modules/common/src/main/scala/docspell/common/Glob.scala
+++ b/modules/common/src/main/scala/docspell/common/Glob.scala
@@ -1,27 +1,81 @@
 package docspell.common
 
-import cats.implicits._
 import cats.data.NonEmptyList
+import cats.implicits._
+
 import io.circe.{Decoder, Encoder}
 
-/** A very simple glob supporting only `*` and `?`. */
-final case class Glob(pattern: Glob.Pattern) {
-  def matches(in: String): Boolean =
-    pattern.parts
-      .zipWith(Glob.split(in, Glob.separator))(_.matches(_))
-      .forall(identity)
+trait Glob {
 
-  def asString: String =
-    pattern.asString
+  /** Matches the input string against this glob. */
+  def matches(in: String): Boolean
+
+  /** If this glob consists of multiple segments, it is the same as
+    * `matches`. If it is only a single segment, it is matched against
+    * the last segment of the input string that is assumed to be a
+    * pathname separated by slash.
+    *
+    * Example:
+    *  test.*  <>  "/a/b/test.txt" => true
+    *  /test.* <>  "/a/b/test.txt" => false
+    */
+  def matchFilenameOrPath(in: String): Boolean
+
+  def asString: String
 }
 
 object Glob {
   private val separator = '/'
+  private val anyChar   = '|'
 
-  def apply(str: String): Glob =
-    Glob(Pattern(split(str, separator).map(makeSegment)))
+  val all = new Glob {
+    def matches(in: String)             = true
+    def matchFilenameOrPath(in: String) = true
+    val asString                        = "*"
+  }
 
-  case class Pattern(parts: NonEmptyList[Segment])  {
+  def pattern(pattern: Pattern): Glob =
+    PatternGlob(pattern)
+
+  /** A simple glob supporting `*` and `?`. */
+  final private case class PatternGlob(pattern: Pattern) extends Glob {
+    def matches(in: String): Boolean =
+      pattern.parts
+        .zipWith(Glob.split(in, Glob.separator))(_.matches(_))
+        .forall(identity)
+
+    def matchFilenameOrPath(in: String): Boolean =
+      if (pattern.parts.tail.isEmpty) matches(split(in, separator).last)
+      else matches(in)
+
+    def asString: String =
+      pattern.asString
+  }
+
+  final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
+    def matches(in: String) =
+      globs.exists(_.matches(in))
+    def matchFilenameOrPath(in: String) =
+      globs.exists(_.matchFilenameOrPath(in))
+    def asString =
+      globs.toList.map(_.asString).mkString(anyChar.toString)
+  }
+
+  def apply(in: String): Glob = {
+    def single(str: String) =
+      PatternGlob(Pattern(split(str, separator).map(makeSegment)))
+
+    if (in == "*") all
+    else
+      split(in, anyChar) match {
+        case NonEmptyList(_, Nil) =>
+          single(in)
+        case nel =>
+          AnyGlob(nel.map(_.trim).map(single))
+      }
+  }
+
+  case class Pattern(parts: NonEmptyList[Segment]) {
     def asString =
       parts.map(_.asString).toList.mkString(separator.toString)
   }
diff --git a/modules/common/src/test/scala/docspell/common/GlobTest.scala b/modules/common/src/test/scala/docspell/common/GlobTest.scala
index 05df0699..8f228851 100644
--- a/modules/common/src/test/scala/docspell/common/GlobTest.scala
+++ b/modules/common/src/test/scala/docspell/common/GlobTest.scala
@@ -6,13 +6,15 @@ import Glob._
 object GlobTest extends SimpleTestSuite {
 
   test("literals") {
-    assert(Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
-    assert(!Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
+    assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
+    assert(!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
   }
 
   test("single wildcards 1") {
     val glob =
-      Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
+      Glob.pattern(
+        Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
+      )
 
     assert(glob.matches("snapshot"))
     assert(!glob.matches("snapshots"))
@@ -20,7 +22,7 @@ object GlobTest extends SimpleTestSuite {
 
   test("single wildcards 2") {
     val glob =
-      Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
+      Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
 
     assert(glob.matches("test.txt"))
     assert(glob.matches("test.pdf"))
@@ -32,28 +34,29 @@ object GlobTest extends SimpleTestSuite {
   test("single parsing") {
     assertEquals(
       Glob("s*p*t"),
-      Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
+      Glob.pattern(
+        Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
+      )
     )
     assertEquals(
       Glob("s***p*t"),
-      Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
+      Glob.pattern(
+        Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
+      )
     )
     assertEquals(
       Glob("test.*"),
-      Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
+      Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
     )
     assertEquals(
       Glob("stop"),
-      Glob(Pattern(Segment(Token.Literal("stop"))))
+      Glob.pattern(Pattern(Segment(Token.Literal("stop"))))
     )
     assertEquals(
       Glob("*stop"),
-      Glob(Pattern(Segment(Token.Until("stop"))))
-    )
-    assertEquals(
-      Glob("*"),
-      Glob(Pattern(Segment(Token.Until(""))))
+      Glob.pattern(Pattern(Segment(Token.Until("stop"))))
     )
+    assertEquals(Glob("*"), Glob.all)
   }
 
   test("with splitting") {
@@ -71,5 +74,38 @@ object GlobTest extends SimpleTestSuite {
     assertEquals(Glob("stop").asString, "stop")
     assertEquals(Glob("*stop").asString, "*stop")
     assertEquals(Glob("/a/b/*").asString, "/a/b/*")
+    assertEquals(Glob("*").asString, "*")
+    assertEquals(Glob.all.asString, "*")
+  }
+
+  test("simple matches") {
+    assert(Glob("/test.*").matches("/test.pdf"))
+    assert(!Glob("/test.*").matches("test.pdf"))
+    assert(!Glob("test.*").matches("/test.pdf"))
+  }
+
+  test("matchFilenameOrPath") {
+    assert(Glob("test.*").matchFilenameOrPath("/a/b/test.pdf"))
+    assert(!Glob("/test.*").matchFilenameOrPath("/a/b/test.pdf"))
+    assert(Glob("s*p*t").matchFilenameOrPath("snapshot"))
+    assert(Glob("s*p*t").matchFilenameOrPath("/tmp/snapshot"))
+    assert(Glob("/tmp/s*p*t").matchFilenameOrPath("/tmp/snapshot"))
+
+    assert(Glob("a/b/*").matchFilenameOrPath("a/b/hello"))
+    assert(!Glob("a/b/*").matchFilenameOrPath("/a/b/hello"))
+    assert(Glob("/a/b/*").matchFilenameOrPath("/a/b/hello"))
+    assert(!Glob("/a/b/*").matchFilenameOrPath("a/b/hello"))
+    assert(!Glob("*/a/b/*").matchFilenameOrPath("a/b/hello"))
+    assert(Glob("*/a/b/*").matchFilenameOrPath("test/a/b/hello"))
+  }
+
+  test("anyglob") {
+    assert(Glob("*.pdf|*.txt").matches("test.pdf"))
+    assert(Glob("*.pdf|*.txt").matches("test.txt"))
+    assert(!Glob("*.pdf|*.txt").matches("test.xls"))
+    assert(Glob("*.pdf | *.txt").matches("test.pdf"))
+    assert(Glob("*.pdf | mail.html").matches("test.pdf"))
+    assert(Glob("*.pdf | mail.html").matches("mail.html"))
+    assert(!Glob("*.pdf | mail.html").matches("test.docx"))
   }
 }
diff --git a/modules/files/src/main/scala/docspell/files/Zip.scala b/modules/files/src/main/scala/docspell/files/Zip.scala
index f09bd66d..5450cbf7 100644
--- a/modules/files/src/main/scala/docspell/files/Zip.scala
+++ b/modules/files/src/main/scala/docspell/files/Zip.scala
@@ -9,24 +9,33 @@ import cats.implicits._
 import fs2.{Pipe, Stream}
 
 import docspell.common.Binary
+import docspell.common.Glob
 
 object Zip {
 
   def unzipP[F[_]: ConcurrentEffect: ContextShift](
       chunkSize: Int,
-      blocker: Blocker
+      blocker: Blocker,
+      glob: Glob
   ): Pipe[F, Byte, Binary[F]] =
-    s => unzip[F](chunkSize, blocker)(s)
+    s => unzip[F](chunkSize, blocker, glob)(s)
 
-  def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)(
+  def unzip[F[_]: ConcurrentEffect: ContextShift](
+      chunkSize: Int,
+      blocker: Blocker,
+      glob: Glob
+  )(
       data: Stream[F, Byte]
   ): Stream[F, Binary[F]] =
-    data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker))
+    data
+      .through(fs2.io.toInputStream[F])
+      .flatMap(in => unzipJava(in, chunkSize, blocker, glob))
 
   def unzipJava[F[_]: Sync: ContextShift](
       in: InputStream,
       chunkSize: Int,
-      blocker: Blocker
+      blocker: Blocker,
+      glob: Glob
   ): Stream[F, Binary[F]] = {
     val zin = new ZipInputStream(in)
 
@@ -39,6 +48,7 @@ object Zip {
       .resource(nextEntry)
       .repeat
       .unNoneTerminate
+      .filter(ze => glob.matchFilenameOrPath(ze.getName()))
       .map { ze =>
         val name = Paths.get(ze.getName()).getFileName.toString
         val data =
diff --git a/modules/files/src/test/scala/docspell/files/ZipTest.scala b/modules/files/src/test/scala/docspell/files/ZipTest.scala
index 7fce9d50..05cf1866 100644
--- a/modules/files/src/test/scala/docspell/files/ZipTest.scala
+++ b/modules/files/src/test/scala/docspell/files/ZipTest.scala
@@ -4,6 +4,7 @@ import minitest._
 import cats.effect._
 import cats.implicits._
 import scala.concurrent.ExecutionContext
+import docspell.common.Glob
 
 object ZipTest extends SimpleTestSuite {
 
@@ -12,7 +13,7 @@ object ZipTest extends SimpleTestSuite {
 
   test("unzip") {
     val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
-    val uncomp  = zipFile.through(Zip.unzip(8192, blocker))
+    val uncomp  = zipFile.through(Zip.unzip(8192, blocker, Glob.all))
 
     uncomp
       .evalMap { entry =>
diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
index 0f144ab7..8242df94 100644
--- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
+++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
@@ -16,9 +16,10 @@ import emil.{MimeType => _, _}
 object ReadMail {
 
   def readBytesP[F[_]: ConcurrentEffect: ContextShift](
-      logger: Logger[F]
+      logger: Logger[F],
+      glob: Glob
   ): Pipe[F, Byte, Binary[F]] =
-    _.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger))
+    _.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger, glob))
 
   def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] =
     s =>
@@ -26,7 +27,8 @@ object ReadMail {
         s.through(Mail.readBytes[F])
 
   def mailToEntries[F[_]: ConcurrentEffect: ContextShift](
-      logger: Logger[F]
+      logger: Logger[F],
+      glob: Glob
   )(mail: Mail[F]): Stream[F, Binary[F]] = {
     val bodyEntry: F[Option[Binary[F]]] =
       if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F]
@@ -48,10 +50,12 @@ object ReadMail {
     ) >>
       (Stream
         .eval(bodyEntry)
-        .flatMap(e => Stream.emits(e.toSeq)) ++
+        .flatMap(e => Stream.emits(e.toSeq))
+        .filter(a => glob.matches(a.name)) ++
         Stream
           .eval(TnefExtract.replace(mail))
           .flatMap(m => Stream.emits(m.attachments.all))
+          .filter(a => a.filename.exists(glob.matches))
           .map(a =>
             Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
           ))
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
index f489ae3c..b684ded9 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
@@ -95,12 +95,12 @@ object ExtractArchive {
       case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
         ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
           extractZip(ctx, archive)(ra, pos)
-            .flatTap(_ => cleanupParents(ctx, ra, archive))
+            .flatMap(cleanupParents(ctx, ra, archive))
 
       case MimeType.EmailMatch(_) =>
         ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
           extractMail(ctx, archive)(ra, pos)
-            .flatTap(_ => cleanupParents(ctx, ra, archive))
+            .flatMap(cleanupParents(ctx, ra, archive))
 
       case _ =>
         ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
@@ -111,7 +111,7 @@ object ExtractArchive {
       ctx: Context[F, _],
       ra: RAttachment,
       archive: Option[RAttachmentArchive]
-  ): F[Unit] =
+  )(extracted: Extracted): F[Extracted] =
     archive match {
       case Some(_) =>
         for {
@@ -121,36 +121,37 @@ object ExtractArchive {
           _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
           _ <- ctx.store.transact(RAttachment.delete(ra.id))
           _ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain
-        } yield ()
+        } yield extracted
       case None =>
         for {
           _ <- ctx.logger.debug(
             s"Extracted attachment ${ra.name}. Remove it from the item."
           )
           _ <- ctx.store.transact(RAttachment.delete(ra.id))
-        } yield ()
+        } yield extracted.copy(files = extracted.files.filter(_.id != ra.id))
     }
 
   def extractZip[F[_]: ConcurrentEffect: ContextShift](
-      ctx: Context[F, _],
+      ctx: Context[F, ProcessItemArgs],
       archive: Option[RAttachmentArchive]
   )(ra: RAttachment, pos: Int): F[Extracted] = {
     val zipData = ctx.store.bitpeace
       .get(ra.fileId.id)
       .unNoneTerminate
       .through(ctx.store.bitpeace.fetchData2(RangeDef.all))
-
-    zipData
-      .through(Zip.unzipP[F](8192, ctx.blocker))
-      .zipWithIndex
-      .flatMap(handleEntry(ctx, ra, pos, archive, None))
-      .foldMonoid
-      .compile
-      .lastOrError
+    val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
+    ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
+      zipData
+        .through(Zip.unzipP[F](8192, ctx.blocker, glob))
+        .zipWithIndex
+        .flatMap(handleEntry(ctx, ra, pos, archive, None))
+        .foldMonoid
+        .compile
+        .lastOrError
   }
 
   def extractMail[F[_]: ConcurrentEffect: ContextShift](
-      ctx: Context[F, _],
+      ctx: Context[F, ProcessItemArgs],
       archive: Option[RAttachmentArchive]
   )(ra: RAttachment, pos: Int): F[Extracted] = {
     val email: Stream[F, Byte] = ctx.store.bitpeace
@@ -158,24 +159,26 @@ object ExtractArchive {
       .unNoneTerminate
       .through(ctx.store.bitpeace.fetchData2(RangeDef.all))
 
-    email
-      .through(ReadMail.bytesToMail[F](ctx.logger))
-      .flatMap { mail =>
-        val mId = mail.header.messageId
-        val givenMeta =
-          for {
-            _ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
-            s <- Sync[F].delay(extractMailMeta(mail))
-          } yield s
+    val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
+    ctx.logger.debug(s"Filtering email attachments with '${glob.asString}'") *>
+      email
+        .through(ReadMail.bytesToMail[F](ctx.logger))
+        .flatMap { mail =>
+          val mId = mail.header.messageId
+          val givenMeta =
+            for {
+              _ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
+              s <- Sync[F].delay(extractMailMeta(mail))
+            } yield s
 
-        ReadMail
-          .mailToEntries(ctx.logger)(mail)
-          .zipWithIndex
-          .flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
-      }
-      .foldMonoid
-      .compile
-      .lastOrError
+          ReadMail
+            .mailToEntries(ctx.logger, glob)(mail)
+            .zipWithIndex
+            .flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
+        }
+        .foldMonoid
+        .compile
+        .lastOrError
   }
 
   def extractMailMeta[F[_]](mail: Mail[F]): Extracted =
@@ -239,6 +242,9 @@ object ExtractArchive {
         positions ++ e.positions
       )
 
+    def filterNames(filter: Glob): Extracted =
+      copy(files = files.filter(ra => filter.matches(ra.name.getOrElse(""))))
+
     def setMeta(m: MetaProposal): Extracted =
       setMeta(MetaProposalList.of(m))
 
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
index 56f3cd33..b6cc493e 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
@@ -25,6 +25,7 @@ object ProcessItem {
       .flatMap(LinkProposal[F])
       .flatMap(SetGivenData[F](itemOps))
       .flatMap(Task.setProgress(99))
+      .flatMap(RemoveEmptyItem(itemOps))
 
   def processAttachments[F[_]: ConcurrentEffect: ContextShift](
       cfg: Config,
diff --git a/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala b/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala
new file mode 100644
index 00000000..75751672
--- /dev/null
+++ b/modules/joex/src/main/scala/docspell/joex/process/RemoveEmptyItem.scala
@@ -0,0 +1,26 @@
+package docspell.joex.process
+
+import cats.effect._
+import cats.implicits._
+
+import docspell.backend.ops.OItem
+import docspell.common._
+import docspell.joex.scheduler.Task
+
+object RemoveEmptyItem {
+
+  def apply[F[_]: Sync](
+      ops: OItem[F]
+  )(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
+    if (data.item.state.isInvalid && data.attachments.isEmpty)
+      Task { ctx =>
+        for {
+          _ <- ctx.logger.warn(s"Removing item as it doesn't have any attachments!")
+          n <- ops.deleteItem(data.item.id, data.item.cid)
+          _ <- ctx.logger.warn(s"Removed item ($n). No item has been created!")
+        } yield data
+      }
+    else
+      Task.pure(data)
+
+}
diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala
index a01b1676..7a746b9f 100644
--- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala
+++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala
@@ -256,7 +256,7 @@ object ScanMailboxTask {
           args.itemFolder,
           Seq.empty,
           true,
-          args.fileFilter,
+          args.fileFilter.getOrElse(Glob.all),
           args.tags.getOrElse(Nil)
         )
         data = OUpload.UploadData(
diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala
index e5191e07..2638dc28 100644
--- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala
+++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala
@@ -311,14 +311,15 @@ trait Conversions {
               m.folder,
               validFileTypes,
               m.skipDuplicates.getOrElse(false),
-              m.fileFilter,
+              m.fileFilter.getOrElse(Glob.all),
               m.tags.map(_.items).getOrElse(Nil)
             )
           )
         )
       )
       .getOrElse(
-        (true, UploadMeta(None, sourceName, None, validFileTypes, false, None, Nil)).pure[F]
+        (true, UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil))
+          .pure[F]
       )
 
     val files = mp.parts