Add a simple glob data type

This commit is contained in:
Eike Kettner 2020-11-10 20:51:49 +01:00
parent ffaded9718
commit a21a97f7d5
2 changed files with 189 additions and 0 deletions

View File

@ -0,0 +1,114 @@
package docspell.common
import cats.implicits._
import cats.data.NonEmptyList
import io.circe.{Decoder, Encoder}
/** A very simple glob supporting only `*` and `?`. */
final case class Glob(pattern: Glob.Pattern) {
def matches(in: String): Boolean =
pattern.parts
.zipWith(Glob.split(in, Glob.separator))(_.matches(_))
.forall(identity)
def asString: String =
pattern.asString
}
object Glob {
private val separator = '/'
def apply(str: String): Glob =
Glob(Pattern(split(str, separator).map(makeSegment)))
case class Pattern(parts: NonEmptyList[Segment]) {
def asString =
parts.map(_.asString).toList.mkString(separator.toString)
}
object Pattern {
def apply(s0: Segment, sm: Segment*): Pattern =
Pattern(NonEmptyList.of(s0, sm: _*))
}
case class Segment(tokens: NonEmptyList[Token]) {
def matches(in: String): Boolean =
consume(in).exists(_.isEmpty)
def consume(in: String): Option[String] =
tokens.foldLeft(in.some) { (rem, token) =>
rem.flatMap(token.consume)
}
def asString: String =
tokens.toList.map(_.asString).mkString
}
object Segment {
def apply(t0: Token, ts: Token*): Segment =
Segment(NonEmptyList.of(t0, ts: _*))
}
sealed trait Token {
def consume(str: String): Option[String]
def asString: String
}
object Token {
case class Literal(asString: String) extends Token {
def consume(str: String): Option[String] =
if (str.startsWith(asString)) str.drop(asString.length).some
else None
}
case class Until(value: String) extends Token {
def consume(str: String): Option[String] =
if (value.isEmpty) Some("")
else
str.indexOf(value) match {
case -1 => None
case n => str.substring(n + value.length).some
}
val asString =
s"*$value"
}
case object Single extends Token {
def consume(str: String): Option[String] =
if (str.isEmpty()) None
else Some(str.drop(1))
val asString = "?"
}
}
private def split(str: String, sep: Char): NonEmptyList[String] =
NonEmptyList
.fromList(str.split(sep).toList)
.getOrElse(NonEmptyList.of(str))
private def makeSegment(str: String): Segment = {
def loop(rem: String, res: List[Token]): List[Token] =
if (rem.isEmpty) res
else
rem.charAt(0) match {
case '*' =>
val stop = rem.drop(1).takeWhile(c => c != '*' && c != '?')
loop(rem.drop(1 + stop.length), Token.Until(stop) :: res)
case '?' =>
loop(rem.drop(1), Token.Single :: res)
case _ =>
val lit = rem.takeWhile(c => c != '*' && c != '?')
loop(rem.drop(lit.length), Token.Literal(lit) :: res)
}
val fixed = str.replaceAll("\\*+", "*")
NonEmptyList
.fromList(loop(fixed, Nil).reverse)
.map(Segment.apply)
.getOrElse(Segment(Token.Literal(str)))
}
implicit val jsonEncoder: Encoder[Glob] =
Encoder.encodeString.contramap(_.asString)
implicit val jsonDecoder: Decoder[Glob] =
Decoder.decodeString.map(Glob.apply)
}

View File

@ -0,0 +1,75 @@
package docspell.common
import minitest._
import Glob._
object GlobTest extends SimpleTestSuite {
test("literals") {
assert(Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
assert(!Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
}
test("single wildcards 1") {
val glob =
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
assert(glob.matches("snapshot"))
assert(!glob.matches("snapshots"))
}
test("single wildcards 2") {
val glob =
Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
assert(glob.matches("test.txt"))
assert(glob.matches("test.pdf"))
assert(glob.matches("test.converted.pdf"))
assert(!glob.matches("test1.txt"))
assert(!glob.matches("atest.txt"))
}
test("single parsing") {
assertEquals(
Glob("s*p*t"),
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
)
assertEquals(
Glob("s***p*t"),
Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t"))))
)
assertEquals(
Glob("test.*"),
Glob(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
)
assertEquals(
Glob("stop"),
Glob(Pattern(Segment(Token.Literal("stop"))))
)
assertEquals(
Glob("*stop"),
Glob(Pattern(Segment(Token.Until("stop"))))
)
assertEquals(
Glob("*"),
Glob(Pattern(Segment(Token.Until(""))))
)
}
test("with splitting") {
assert(Glob("a/b/*").matches("a/b/hello"))
assert(!Glob("a/b/*").matches("/a/b/hello"))
assert(Glob("/a/b/*").matches("/a/b/hello"))
assert(!Glob("/a/b/*").matches("a/b/hello"))
assert(!Glob("*/a/b/*").matches("a/b/hello"))
assert(Glob("*/a/b/*").matches("test/a/b/hello"))
}
test("asString") {
assertEquals(Glob("test.*").asString, "test.*")
assertEquals(Glob("s***p*t").asString, "s*p*t")
assertEquals(Glob("stop").asString, "stop")
assertEquals(Glob("*stop").asString, "*stop")
assertEquals(Glob("/a/b/*").asString, "/a/b/*")
}
}