diff --git a/modules/common/src/main/scala/docspell/common/Glob.scala b/modules/common/src/main/scala/docspell/common/Glob.scala new file mode 100644 index 00000000..5eba3f73 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/Glob.scala @@ -0,0 +1,114 @@ +package docspell.common + +import cats.implicits._ +import cats.data.NonEmptyList +import io.circe.{Decoder, Encoder} + +/** A very simple glob supporting only `*` and `?`. */ +final case class Glob(pattern: Glob.Pattern) { + def matches(in: String): Boolean = + pattern.parts + .zipWith(Glob.split(in, Glob.separator))(_.matches(_)) + .forall(identity) + + def asString: String = + pattern.asString +} + +object Glob { + private val separator = '/' + + def apply(str: String): Glob = + Glob(Pattern(split(str, separator).map(makeSegment))) + + case class Pattern(parts: NonEmptyList[Segment]) { + def asString = + parts.map(_.asString).toList.mkString(separator.toString) + } + + object Pattern { + def apply(s0: Segment, sm: Segment*): Pattern = + Pattern(NonEmptyList.of(s0, sm: _*)) + } + + case class Segment(tokens: NonEmptyList[Token]) { + def matches(in: String): Boolean = + consume(in).exists(_.isEmpty) + + def consume(in: String): Option[String] = + tokens.foldLeft(in.some) { (rem, token) => + rem.flatMap(token.consume) + } + + def asString: String = + tokens.toList.map(_.asString).mkString + } + object Segment { + def apply(t0: Token, ts: Token*): Segment = + Segment(NonEmptyList.of(t0, ts: _*)) + } + + sealed trait Token { + def consume(str: String): Option[String] + + def asString: String + } + object Token { + case class Literal(asString: String) extends Token { + def consume(str: String): Option[String] = + if (str.startsWith(asString)) str.drop(asString.length).some + else None + } + case class Until(value: String) extends Token { + def consume(str: String): Option[String] = + if (value.isEmpty) Some("") + else + str.indexOf(value) match { + case -1 => None + case n => str.substring(n + value.length).some + } + val asString = + s"*$value" + } + case object Single extends Token { + def consume(str: String): Option[String] = + if (str.isEmpty()) None + else Some(str.drop(1)) + + val asString = "?" + } + } + + private def split(str: String, sep: Char): NonEmptyList[String] = + NonEmptyList + .fromList(str.split(sep).toList) + .getOrElse(NonEmptyList.of(str)) + + private def makeSegment(str: String): Segment = { + def loop(rem: String, res: List[Token]): List[Token] = + if (rem.isEmpty) res + else + rem.charAt(0) match { + case '*' => + val stop = rem.drop(1).takeWhile(c => c != '*' && c != '?') + loop(rem.drop(1 + stop.length), Token.Until(stop) :: res) + case '?' => + loop(rem.drop(1), Token.Single :: res) + case _ => + val lit = rem.takeWhile(c => c != '*' && c != '?') + loop(rem.drop(lit.length), Token.Literal(lit) :: res) + } + + val fixed = str.replaceAll("\\*+", "*") + NonEmptyList + .fromList(loop(fixed, Nil).reverse) + .map(Segment.apply) + .getOrElse(Segment(Token.Literal(str))) + } + + implicit val jsonEncoder: Encoder[Glob] = + Encoder.encodeString.contramap(_.asString) + + implicit val jsonDecoder: Decoder[Glob] = + Decoder.decodeString.map(Glob.apply) +} diff --git a/modules/common/src/test/scala/docspell/common/GlobTest.scala b/modules/common/src/test/scala/docspell/common/GlobTest.scala new file mode 100644 index 00000000..05df0699 --- /dev/null +++ b/modules/common/src/test/scala/docspell/common/GlobTest.scala @@ -0,0 +1,75 @@ +package docspell.common + +import minitest._ +import Glob._ + +object GlobTest extends SimpleTestSuite { + + test("literals") { + assert(Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello")) + assert(!Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello1")) + } + + test("single wildcards 1") { + val glob = + Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) + + assert(glob.matches("snapshot")) + assert(!glob.matches("snapshots")) + } + + test("single wildcards 2") { + val glob = + Glob(Pattern(Segment(Token.Literal("test."), Token.Until("")))) + + assert(glob.matches("test.txt")) + assert(glob.matches("test.pdf")) + assert(glob.matches("test.converted.pdf")) + assert(!glob.matches("test1.txt")) + assert(!glob.matches("atest.txt")) + } + + test("single parsing") { + assertEquals( + Glob("s*p*t"), + Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) + ) + assertEquals( + Glob("s***p*t"), + Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) + ) + assertEquals( + Glob("test.*"), + Glob(Pattern(Segment(Token.Literal("test."), Token.Until("")))) + ) + assertEquals( + Glob("stop"), + Glob(Pattern(Segment(Token.Literal("stop")))) + ) + assertEquals( + Glob("*stop"), + Glob(Pattern(Segment(Token.Until("stop")))) + ) + assertEquals( + Glob("*"), + Glob(Pattern(Segment(Token.Until("")))) + ) + } + + test("with splitting") { + assert(Glob("a/b/*").matches("a/b/hello")) + assert(!Glob("a/b/*").matches("/a/b/hello")) + assert(Glob("/a/b/*").matches("/a/b/hello")) + assert(!Glob("/a/b/*").matches("a/b/hello")) + assert(!Glob("*/a/b/*").matches("a/b/hello")) + assert(Glob("*/a/b/*").matches("test/a/b/hello")) + } + + test("asString") { + assertEquals(Glob("test.*").asString, "test.*") + assertEquals(Glob("s***p*t").asString, "s*p*t") + assertEquals(Glob("stop").asString, "stop") + assertEquals(Glob("*stop").asString, "*stop") + assertEquals(Glob("/a/b/*").asString, "/a/b/*") + } +}