Make glob matching case-insensitive by default

This commit is contained in:
Eike Kettner 2021-01-09 13:23:15 +01:00
parent 5fe727e522
commit d712f8303d
5 changed files with 110 additions and 80 deletions

View File

@ -8,7 +8,7 @@ import io.circe.{Decoder, Encoder}
trait Glob { trait Glob {
/** Matches the input string against this glob. */ /** Matches the input string against this glob. */
def matches(in: String): Boolean def matches(caseSensitive: Boolean)(in: String): Boolean
/** If this glob consists of multiple segments, it is the same as /** If this glob consists of multiple segments, it is the same as
* `matches`. If it is only a single segment, it is matched against * `matches`. If it is only a single segment, it is matched against
@ -25,42 +25,6 @@ trait Glob {
} }
object Glob { object Glob {
private val separator = '/'
private val anyChar = '|'
val all = new Glob {
def matches(in: String) = true
def matchFilenameOrPath(in: String) = true
val asString = "*"
}
def pattern(pattern: Pattern): Glob =
PatternGlob(pattern)
/** A simple glob supporting `*` and `?`. */
final private case class PatternGlob(pattern: Pattern) extends Glob {
def matches(in: String): Boolean =
pattern.parts
.zipWith(Glob.split(in, Glob.separator))(_.matches(_))
.forall(identity)
def matchFilenameOrPath(in: String): Boolean =
if (pattern.parts.tail.isEmpty) matches(split(in, separator).last)
else matches(in)
def asString: String =
pattern.asString
}
final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
def matches(in: String) =
globs.exists(_.matches(in))
def matchFilenameOrPath(in: String) =
globs.exists(_.matchFilenameOrPath(in))
def asString =
globs.toList.map(_.asString).mkString(anyChar.toString)
}
def apply(in: String): Glob = { def apply(in: String): Glob = {
def single(str: String) = def single(str: String) =
PatternGlob(Pattern(split(str, separator).map(makeSegment))) PatternGlob(Pattern(split(str, separator).map(makeSegment)))
@ -75,6 +39,42 @@ object Glob {
} }
} }
private val separator = '/'
private val anyChar = '|'
val all = new Glob {
def matches(caseSensitive: Boolean)(in: String) = true
def matchFilenameOrPath(in: String) = true
val asString = "*"
}
def pattern(pattern: Pattern): Glob =
PatternGlob(pattern)
/** A simple glob supporting `*` and `?`. */
final private case class PatternGlob(pattern: Pattern) extends Glob {
def matches(caseSensitive: Boolean)(in: String): Boolean =
pattern.parts
.zipWith(Glob.split(in, Glob.separator))(_.matches(caseSensitive)(_))
.forall(identity)
def matchFilenameOrPath(in: String): Boolean =
if (pattern.parts.tail.isEmpty) matches(true)(split(in, separator).last)
else matches(true)(in)
def asString: String =
pattern.asString
}
final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
def matches(caseSensitive: Boolean)(in: String) =
globs.exists(_.matches(caseSensitive)(in))
def matchFilenameOrPath(in: String) =
globs.exists(_.matchFilenameOrPath(in))
def asString =
globs.toList.map(_.asString).mkString(anyChar.toString)
}
case class Pattern(parts: NonEmptyList[Segment]) { case class Pattern(parts: NonEmptyList[Segment]) {
def asString = def asString =
parts.map(_.asString).toList.mkString(separator.toString) parts.map(_.asString).toList.mkString(separator.toString)
@ -86,12 +86,12 @@ object Glob {
} }
case class Segment(tokens: NonEmptyList[Token]) { case class Segment(tokens: NonEmptyList[Token]) {
def matches(in: String): Boolean = def matches(caseSensitive: Boolean)(in: String): Boolean =
consume(in).exists(_.isEmpty) consume(in, caseSensitive).exists(_.isEmpty)
def consume(in: String): Option[String] = def consume(in: String, caseSensitive: Boolean): Option[String] =
tokens.foldLeft(in.some) { (rem, token) => tokens.foldLeft(in.some) { (rem, token) =>
rem.flatMap(token.consume) rem.flatMap(token.consume(caseSensitive))
} }
def asString: String = def asString: String =
@ -103,34 +103,47 @@ object Glob {
} }
sealed trait Token { sealed trait Token {
def consume(str: String): Option[String] def consume(caseSensitive: Boolean)(str: String): Option[String]
def asString: String def asString: String
} }
object Token { object Token {
case class Literal(asString: String) extends Token { case class Literal(asString: String) extends Token {
def consume(str: String): Option[String] = def consume(caseSensitive: Boolean)(str: String): Option[String] =
if (str.startsWith(asString)) str.drop(asString.length).some if (str.startsWith(asString, caseSensitive)) str.drop(asString.length).some
else None else None
} }
case class Until(value: String) extends Token { case class Until(value: String) extends Token {
def consume(str: String): Option[String] = def consume(caseSensitive: Boolean)(str: String): Option[String] =
if (value.isEmpty) Some("") if (value.isEmpty) Some("")
else else
str.indexOf(value) match { str
case -1 => None .findFirst(value, caseSensitive)
case n => str.substring(n + value.length).some .map(n => str.substring(n + value.length))
}
val asString = val asString =
s"*$value" s"*$value"
} }
case object Single extends Token { case object Single extends Token {
def consume(str: String): Option[String] = def consume(caseSensitive: Boolean)(str: String): Option[String] =
if (str.isEmpty()) None if (str.isEmpty) None
else Some(str.drop(1)) else Some(str.drop(1))
val asString = "?" val asString = "?"
} }
implicit final class StringHelper(val str: String) extends AnyVal {
def findFirst(sub: String, caseSensitive: Boolean): Option[Int] = {
val vstr = if (caseSensitive) str else str.toLowerCase
val vsub = if (caseSensitive) sub else sub.toLowerCase
Option(vstr.indexOf(vsub)).filter(_ >= 0)
}
def startsWith(prefix: String, caseSensitive: Boolean): Boolean = {
val vstr = if (caseSensitive) str else str.toLowerCase
val vprefix = if (caseSensitive) prefix else prefix.toLowerCase
vstr.startsWith(vprefix)
}
}
} }
private def split(str: String, sep: Char): NonEmptyList[String] = private def split(str: String, sep: Char): NonEmptyList[String] =
@ -139,6 +152,7 @@ object Glob {
.getOrElse(NonEmptyList.of(str)) .getOrElse(NonEmptyList.of(str))
private def makeSegment(str: String): Segment = { private def makeSegment(str: String): Segment = {
@annotation.tailrec
def loop(rem: String, res: List[Token]): List[Token] = def loop(rem: String, res: List[Token]): List[Token] =
if (rem.isEmpty) res if (rem.isEmpty) res
else else

View File

@ -6,8 +6,10 @@ import Glob._
object GlobTest extends SimpleTestSuite { object GlobTest extends SimpleTestSuite {
test("literals") { test("literals") {
assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello")) assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello"))
assert(!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello1")) assert(
!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello1")
)
} }
test("single wildcards 1") { test("single wildcards 1") {
@ -16,19 +18,19 @@ object GlobTest extends SimpleTestSuite {
Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))) Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
) )
assert(glob.matches("snapshot")) assert(glob.matches(true)("snapshot"))
assert(!glob.matches("snapshots")) assert(!glob.matches(true)("snapshots"))
} }
test("single wildcards 2") { test("single wildcards 2") {
val glob = val glob =
Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until("")))) Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
assert(glob.matches("test.txt")) assert(glob.matches(true)("test.txt"))
assert(glob.matches("test.pdf")) assert(glob.matches(true)("test.pdf"))
assert(glob.matches("test.converted.pdf")) assert(glob.matches(true)("test.converted.pdf"))
assert(!glob.matches("test1.txt")) assert(!glob.matches(true)("test1.txt"))
assert(!glob.matches("atest.txt")) assert(!glob.matches(true)("atest.txt"))
} }
test("single parsing") { test("single parsing") {
@ -60,12 +62,12 @@ object GlobTest extends SimpleTestSuite {
} }
test("with splitting") { test("with splitting") {
assert(Glob("a/b/*").matches("a/b/hello")) assert(Glob("a/b/*").matches(true)("a/b/hello"))
assert(!Glob("a/b/*").matches("/a/b/hello")) assert(!Glob("a/b/*").matches(true)("/a/b/hello"))
assert(Glob("/a/b/*").matches("/a/b/hello")) assert(Glob("/a/b/*").matches(true)("/a/b/hello"))
assert(!Glob("/a/b/*").matches("a/b/hello")) assert(!Glob("/a/b/*").matches(true)("a/b/hello"))
assert(!Glob("*/a/b/*").matches("a/b/hello")) assert(!Glob("*/a/b/*").matches(true)("a/b/hello"))
assert(Glob("*/a/b/*").matches("test/a/b/hello")) assert(Glob("*/a/b/*").matches(true)("test/a/b/hello"))
} }
test("asString") { test("asString") {
@ -79,9 +81,9 @@ object GlobTest extends SimpleTestSuite {
} }
test("simple matches") { test("simple matches") {
assert(Glob("/test.*").matches("/test.pdf")) assert(Glob("/test.*").matches(true)("/test.pdf"))
assert(!Glob("/test.*").matches("test.pdf")) assert(!Glob("/test.*").matches(true)("test.pdf"))
assert(!Glob("test.*").matches("/test.pdf")) assert(!Glob("test.*").matches(true)("/test.pdf"))
} }
test("matchFilenameOrPath") { test("matchFilenameOrPath") {
@ -100,12 +102,24 @@ object GlobTest extends SimpleTestSuite {
} }
test("anyglob") { test("anyglob") {
assert(Glob("*.pdf|*.txt").matches("test.pdf")) assert(Glob("*.pdf|*.txt").matches(true)("test.pdf"))
assert(Glob("*.pdf|*.txt").matches("test.txt")) assert(Glob("*.pdf|*.txt").matches(true)("test.txt"))
assert(!Glob("*.pdf|*.txt").matches("test.xls")) assert(!Glob("*.pdf|*.txt").matches(true)("test.xls"))
assert(Glob("*.pdf | *.txt").matches("test.pdf")) assert(Glob("*.pdf | *.txt").matches(true)("test.pdf"))
assert(Glob("*.pdf | mail.html").matches("test.pdf")) assert(Glob("*.pdf | mail.html").matches(true)("test.pdf"))
assert(Glob("*.pdf | mail.html").matches("mail.html")) assert(Glob("*.pdf | mail.html").matches(true)("mail.html"))
assert(!Glob("*.pdf | mail.html").matches("test.docx")) assert(!Glob("*.pdf | mail.html").matches(true)("test.docx"))
}
test("case insensitive") {
assert(Glob("*hello*").matches(false)("hello world"))
assert(Glob("*hello*").matches(false)("world hello"))
assert(Glob("*hello*").matches(false)("Hello world"))
assert(Glob("*hello*").matches(false)("world Hello"))
assert(Glob("*hello*").matches(false)("World Hello"))
assert(Glob("*hello*").matches(false)("Hello World"))
assert(Glob("*Hello*").matches(false)("world hello"))
assert(Glob("*heLLo*").matches(false)("Hello world"))
assert(Glob("*hellO*").matches(false)("world Hello"))
} }
} }

View File

@ -51,11 +51,11 @@ object ReadMail {
(Stream (Stream
.eval(bodyEntry) .eval(bodyEntry)
.flatMap(e => Stream.emits(e.toSeq)) .flatMap(e => Stream.emits(e.toSeq))
.filter(a => glob.matches(a.name)) ++ .filter(a => glob.matches(caseSensitive = false)(a.name)) ++
Stream Stream
.eval(TnefExtract.replace(mail)) .eval(TnefExtract.replace(mail))
.flatMap(m => Stream.emits(m.attachments.all)) .flatMap(m => Stream.emits(m.attachments.all))
.filter(a => a.filename.exists(glob.matches)) .filter(a => a.filename.exists(glob.matches(caseSensitive = false)))
.map(a => .map(a =>
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
)) ))

View File

@ -243,7 +243,9 @@ object ExtractArchive {
) )
def filterNames(filter: Glob): Extracted = def filterNames(filter: Glob): Extracted =
copy(files = files.filter(ra => filter.matches(ra.name.getOrElse("")))) copy(files =
files.filter(ra => filter.matches(caseSensitive = false)(ra.name.getOrElse("")))
)
def setMeta(m: MetaProposal): Extracted = def setMeta(m: MetaProposal): Extracted =
setMeta(MetaProposalList.of(m)) setMeta(MetaProposalList.of(m))

View File

@ -182,7 +182,7 @@ object ScanMailboxTask {
ctx.args.subjectFilter match { ctx.args.subjectFilter match {
case Some(sf) => case Some(sf) =>
def check(mh: MailHeader): F[Option[MailHeader]] = def check(mh: MailHeader): F[Option[MailHeader]] =
if (sf.matches(mh.subject)) if (sf.matches(caseSensitive = false)(mh.subject))
ctx.logger.debug( ctx.logger.debug(
s"Including mail '${mh.subject}', it matches the filter." s"Including mail '${mh.subject}', it matches the filter."
) *> Option(mh).pure[F] ) *> Option(mh).pure[F]