diff --git a/modules/common/src/main/scala/docspell/common/Glob.scala b/modules/common/src/main/scala/docspell/common/Glob.scala index afa04a02..6a41057e 100644 --- a/modules/common/src/main/scala/docspell/common/Glob.scala +++ b/modules/common/src/main/scala/docspell/common/Glob.scala @@ -8,7 +8,7 @@ import io.circe.{Decoder, Encoder} trait Glob { /** Matches the input string against this glob. */ - def matches(in: String): Boolean + def matches(caseSensitive: Boolean)(in: String): Boolean /** If this glob consists of multiple segments, it is the same as * `matches`. If it is only a single segment, it is matched against @@ -25,42 +25,6 @@ trait Glob { } object Glob { - private val separator = '/' - private val anyChar = '|' - - val all = new Glob { - def matches(in: String) = true - def matchFilenameOrPath(in: String) = true - val asString = "*" - } - - def pattern(pattern: Pattern): Glob = - PatternGlob(pattern) - - /** A simple glob supporting `*` and `?`. */ - final private case class PatternGlob(pattern: Pattern) extends Glob { - def matches(in: String): Boolean = - pattern.parts - .zipWith(Glob.split(in, Glob.separator))(_.matches(_)) - .forall(identity) - - def matchFilenameOrPath(in: String): Boolean = - if (pattern.parts.tail.isEmpty) matches(split(in, separator).last) - else matches(in) - - def asString: String = - pattern.asString - } - - final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob { - def matches(in: String) = - globs.exists(_.matches(in)) - def matchFilenameOrPath(in: String) = - globs.exists(_.matchFilenameOrPath(in)) - def asString = - globs.toList.map(_.asString).mkString(anyChar.toString) - } - def apply(in: String): Glob = { def single(str: String) = PatternGlob(Pattern(split(str, separator).map(makeSegment))) @@ -75,6 +39,42 @@ object Glob { } } + private val separator = '/' + private val anyChar = '|' + + val all = new Glob { + def matches(caseSensitive: Boolean)(in: String) = true + def matchFilenameOrPath(in: String) = true + val asString = "*" + } + + def pattern(pattern: Pattern): Glob = + PatternGlob(pattern) + + /** A simple glob supporting `*` and `?`. */ + final private case class PatternGlob(pattern: Pattern) extends Glob { + def matches(caseSensitive: Boolean)(in: String): Boolean = + pattern.parts + .zipWith(Glob.split(in, Glob.separator))(_.matches(caseSensitive)(_)) + .forall(identity) + + def matchFilenameOrPath(in: String): Boolean = + if (pattern.parts.tail.isEmpty) matches(true)(split(in, separator).last) + else matches(true)(in) + + def asString: String = + pattern.asString + } + + final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob { + def matches(caseSensitive: Boolean)(in: String) = + globs.exists(_.matches(caseSensitive)(in)) + def matchFilenameOrPath(in: String) = + globs.exists(_.matchFilenameOrPath(in)) + def asString = + globs.toList.map(_.asString).mkString(anyChar.toString) + } + case class Pattern(parts: NonEmptyList[Segment]) { def asString = parts.map(_.asString).toList.mkString(separator.toString) @@ -86,12 +86,12 @@ object Glob { } case class Segment(tokens: NonEmptyList[Token]) { - def matches(in: String): Boolean = - consume(in).exists(_.isEmpty) + def matches(caseSensitive: Boolean)(in: String): Boolean = + consume(in, caseSensitive).exists(_.isEmpty) - def consume(in: String): Option[String] = + def consume(in: String, caseSensitive: Boolean): Option[String] = tokens.foldLeft(in.some) { (rem, token) => - rem.flatMap(token.consume) + rem.flatMap(token.consume(caseSensitive)) } def asString: String = @@ -103,34 +103,47 @@ object Glob { } sealed trait Token { - def consume(str: String): Option[String] + def consume(caseSensitive: Boolean)(str: String): Option[String] def asString: String } object Token { case class Literal(asString: String) extends Token { - def consume(str: String): Option[String] = - if (str.startsWith(asString)) str.drop(asString.length).some + def consume(caseSensitive: Boolean)(str: String): Option[String] = + if (str.startsWith(asString, caseSensitive)) str.drop(asString.length).some else None } case class Until(value: String) extends Token { - def consume(str: String): Option[String] = + def consume(caseSensitive: Boolean)(str: String): Option[String] = if (value.isEmpty) Some("") else - str.indexOf(value) match { - case -1 => None - case n => str.substring(n + value.length).some - } + str + .findFirst(value, caseSensitive) + .map(n => str.substring(n + value.length)) val asString = s"*$value" } case object Single extends Token { - def consume(str: String): Option[String] = - if (str.isEmpty()) None + def consume(caseSensitive: Boolean)(str: String): Option[String] = + if (str.isEmpty) None else Some(str.drop(1)) val asString = "?" } + + implicit final class StringHelper(val str: String) extends AnyVal { + def findFirst(sub: String, caseSensitive: Boolean): Option[Int] = { + val vstr = if (caseSensitive) str else str.toLowerCase + val vsub = if (caseSensitive) sub else sub.toLowerCase + Option(vstr.indexOf(vsub)).filter(_ >= 0) + } + + def startsWith(prefix: String, caseSensitive: Boolean): Boolean = { + val vstr = if (caseSensitive) str else str.toLowerCase + val vprefix = if (caseSensitive) prefix else prefix.toLowerCase + vstr.startsWith(vprefix) + } + } } private def split(str: String, sep: Char): NonEmptyList[String] = @@ -139,6 +152,7 @@ object Glob { .getOrElse(NonEmptyList.of(str)) private def makeSegment(str: String): Segment = { + @annotation.tailrec def loop(rem: String, res: List[Token]): List[Token] = if (rem.isEmpty) res else diff --git a/modules/common/src/test/scala/docspell/common/GlobTest.scala b/modules/common/src/test/scala/docspell/common/GlobTest.scala index 8f228851..2e650174 100644 --- a/modules/common/src/test/scala/docspell/common/GlobTest.scala +++ b/modules/common/src/test/scala/docspell/common/GlobTest.scala @@ -6,8 +6,10 @@ import Glob._ object GlobTest extends SimpleTestSuite { test("literals") { - assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello")) - assert(!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello1")) + assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello")) + assert( + !Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello1") + ) } test("single wildcards 1") { @@ -16,19 +18,19 @@ object GlobTest extends SimpleTestSuite { Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))) ) - assert(glob.matches("snapshot")) - assert(!glob.matches("snapshots")) + assert(glob.matches(true)("snapshot")) + assert(!glob.matches(true)("snapshots")) } test("single wildcards 2") { val glob = Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until("")))) - assert(glob.matches("test.txt")) - assert(glob.matches("test.pdf")) - assert(glob.matches("test.converted.pdf")) - assert(!glob.matches("test1.txt")) - assert(!glob.matches("atest.txt")) + assert(glob.matches(true)("test.txt")) + assert(glob.matches(true)("test.pdf")) + assert(glob.matches(true)("test.converted.pdf")) + assert(!glob.matches(true)("test1.txt")) + assert(!glob.matches(true)("atest.txt")) } test("single parsing") { @@ -60,12 +62,12 @@ object GlobTest extends SimpleTestSuite { } test("with splitting") { - assert(Glob("a/b/*").matches("a/b/hello")) - assert(!Glob("a/b/*").matches("/a/b/hello")) - assert(Glob("/a/b/*").matches("/a/b/hello")) - assert(!Glob("/a/b/*").matches("a/b/hello")) - assert(!Glob("*/a/b/*").matches("a/b/hello")) - assert(Glob("*/a/b/*").matches("test/a/b/hello")) + assert(Glob("a/b/*").matches(true)("a/b/hello")) + assert(!Glob("a/b/*").matches(true)("/a/b/hello")) + assert(Glob("/a/b/*").matches(true)("/a/b/hello")) + assert(!Glob("/a/b/*").matches(true)("a/b/hello")) + assert(!Glob("*/a/b/*").matches(true)("a/b/hello")) + assert(Glob("*/a/b/*").matches(true)("test/a/b/hello")) } test("asString") { @@ -79,9 +81,9 @@ object GlobTest extends SimpleTestSuite { } test("simple matches") { - assert(Glob("/test.*").matches("/test.pdf")) - assert(!Glob("/test.*").matches("test.pdf")) - assert(!Glob("test.*").matches("/test.pdf")) + assert(Glob("/test.*").matches(true)("/test.pdf")) + assert(!Glob("/test.*").matches(true)("test.pdf")) + assert(!Glob("test.*").matches(true)("/test.pdf")) } test("matchFilenameOrPath") { @@ -100,12 +102,24 @@ object GlobTest extends SimpleTestSuite { } test("anyglob") { - assert(Glob("*.pdf|*.txt").matches("test.pdf")) - assert(Glob("*.pdf|*.txt").matches("test.txt")) - assert(!Glob("*.pdf|*.txt").matches("test.xls")) - assert(Glob("*.pdf | *.txt").matches("test.pdf")) - assert(Glob("*.pdf | mail.html").matches("test.pdf")) - assert(Glob("*.pdf | mail.html").matches("mail.html")) - assert(!Glob("*.pdf | mail.html").matches("test.docx")) + assert(Glob("*.pdf|*.txt").matches(true)("test.pdf")) + assert(Glob("*.pdf|*.txt").matches(true)("test.txt")) + assert(!Glob("*.pdf|*.txt").matches(true)("test.xls")) + assert(Glob("*.pdf | *.txt").matches(true)("test.pdf")) + assert(Glob("*.pdf | mail.html").matches(true)("test.pdf")) + assert(Glob("*.pdf | mail.html").matches(true)("mail.html")) + assert(!Glob("*.pdf | mail.html").matches(true)("test.docx")) + } + + test("case insensitive") { + assert(Glob("*hello*").matches(false)("hello world")) + assert(Glob("*hello*").matches(false)("world hello")) + assert(Glob("*hello*").matches(false)("Hello world")) + assert(Glob("*hello*").matches(false)("world Hello")) + assert(Glob("*hello*").matches(false)("World Hello")) + assert(Glob("*hello*").matches(false)("Hello World")) + assert(Glob("*Hello*").matches(false)("world hello")) + assert(Glob("*heLLo*").matches(false)("Hello world")) + assert(Glob("*hellO*").matches(false)("world Hello")) } } diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala index 8242df94..ce5bd3ca 100644 --- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala +++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala @@ -51,11 +51,11 @@ object ReadMail { (Stream .eval(bodyEntry) .flatMap(e => Stream.emits(e.toSeq)) - .filter(a => glob.matches(a.name)) ++ + .filter(a => glob.matches(caseSensitive = false)(a.name)) ++ Stream .eval(TnefExtract.replace(mail)) .flatMap(m => Stream.emits(m.attachments.all)) - .filter(a => a.filename.exists(glob.matches)) + .filter(a => a.filename.exists(glob.matches(caseSensitive = false))) .map(a => Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) )) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index b684ded9..c48952e2 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -243,7 +243,9 @@ object ExtractArchive { ) def filterNames(filter: Glob): Extracted = - copy(files = files.filter(ra => filter.matches(ra.name.getOrElse("")))) + copy(files = + files.filter(ra => filter.matches(caseSensitive = false)(ra.name.getOrElse(""))) + ) def setMeta(m: MetaProposal): Extracted = setMeta(MetaProposalList.of(m)) diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 4390da3a..ed038926 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -182,7 +182,7 @@ object ScanMailboxTask { ctx.args.subjectFilter match { case Some(sf) => def check(mh: MailHeader): F[Option[MailHeader]] = - if (sf.matches(mh.subject)) + if (sf.matches(caseSensitive = false)(mh.subject)) ctx.logger.debug( s"Including mail '${mh.subject}', it matches the filter." ) *> Option(mh).pure[F]