mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-10-31 09:30:12 +00:00 
			
		
		
		
	Add a simple glob data type
This commit is contained in:
		
							
								
								
									
										114
									
								
								modules/common/src/main/scala/docspell/common/Glob.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								modules/common/src/main/scala/docspell/common/Glob.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| package docspell.common | ||||
|  | ||||
| import cats.implicits._ | ||||
| import cats.data.NonEmptyList | ||||
| import io.circe.{Decoder, Encoder} | ||||
|  | ||||
| /** A very simple glob supporting only `*` and `?`. */ | ||||
| final case class Glob(pattern: Glob.Pattern) { | ||||
|   def matches(in: String): Boolean = | ||||
|     pattern.parts | ||||
|       .zipWith(Glob.split(in, Glob.separator))(_.matches(_)) | ||||
|       .forall(identity) | ||||
|  | ||||
|   def asString: String = | ||||
|     pattern.asString | ||||
| } | ||||
|  | ||||
| object Glob { | ||||
|   private val separator = '/' | ||||
|  | ||||
|   def apply(str: String): Glob = | ||||
|     Glob(Pattern(split(str, separator).map(makeSegment))) | ||||
|  | ||||
|   case class Pattern(parts: NonEmptyList[Segment])  { | ||||
|     def asString = | ||||
|       parts.map(_.asString).toList.mkString(separator.toString) | ||||
|   } | ||||
|  | ||||
|   object Pattern { | ||||
|     def apply(s0: Segment, sm: Segment*): Pattern = | ||||
|       Pattern(NonEmptyList.of(s0, sm: _*)) | ||||
|   } | ||||
|  | ||||
|   case class Segment(tokens: NonEmptyList[Token]) { | ||||
|     def matches(in: String): Boolean = | ||||
|       consume(in).exists(_.isEmpty) | ||||
|  | ||||
|     def consume(in: String): Option[String] = | ||||
|       tokens.foldLeft(in.some) { (rem, token) => | ||||
|         rem.flatMap(token.consume) | ||||
|       } | ||||
|  | ||||
|     def asString: String = | ||||
|       tokens.toList.map(_.asString).mkString | ||||
|   } | ||||
|   object Segment { | ||||
|     def apply(t0: Token, ts: Token*): Segment = | ||||
|       Segment(NonEmptyList.of(t0, ts: _*)) | ||||
|   } | ||||
|  | ||||
|   sealed trait Token { | ||||
|     def consume(str: String): Option[String] | ||||
|  | ||||
|     def asString: String | ||||
|   } | ||||
|   object Token { | ||||
|     case class Literal(asString: String) extends Token { | ||||
|       def consume(str: String): Option[String] = | ||||
|         if (str.startsWith(asString)) str.drop(asString.length).some | ||||
|         else None | ||||
|     } | ||||
|     case class Until(value: String) extends Token { | ||||
|       def consume(str: String): Option[String] = | ||||
|         if (value.isEmpty) Some("") | ||||
|         else | ||||
|           str.indexOf(value) match { | ||||
|             case -1 => None | ||||
|             case n  => str.substring(n + value.length).some | ||||
|           } | ||||
|       val asString = | ||||
|         s"*$value" | ||||
|     } | ||||
|     case object Single extends Token { | ||||
|       def consume(str: String): Option[String] = | ||||
|         if (str.isEmpty()) None | ||||
|         else Some(str.drop(1)) | ||||
|  | ||||
|       val asString = "?" | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   private def split(str: String, sep: Char): NonEmptyList[String] = | ||||
|     NonEmptyList | ||||
|       .fromList(str.split(sep).toList) | ||||
|       .getOrElse(NonEmptyList.of(str)) | ||||
|  | ||||
|   private def makeSegment(str: String): Segment = { | ||||
|     def loop(rem: String, res: List[Token]): List[Token] = | ||||
|       if (rem.isEmpty) res | ||||
|       else | ||||
|         rem.charAt(0) match { | ||||
|           case '*' => | ||||
|             val stop = rem.drop(1).takeWhile(c => c != '*' && c != '?') | ||||
|             loop(rem.drop(1 + stop.length), Token.Until(stop) :: res) | ||||
|           case '?' => | ||||
|             loop(rem.drop(1), Token.Single :: res) | ||||
|           case _ => | ||||
|             val lit = rem.takeWhile(c => c != '*' && c != '?') | ||||
|             loop(rem.drop(lit.length), Token.Literal(lit) :: res) | ||||
|         } | ||||
|  | ||||
|     val fixed = str.replaceAll("\\*+", "*") | ||||
|     NonEmptyList | ||||
|       .fromList(loop(fixed, Nil).reverse) | ||||
|       .map(Segment.apply) | ||||
|       .getOrElse(Segment(Token.Literal(str))) | ||||
|   } | ||||
|  | ||||
|   implicit val jsonEncoder: Encoder[Glob] = | ||||
|     Encoder.encodeString.contramap(_.asString) | ||||
|  | ||||
|   implicit val jsonDecoder: Decoder[Glob] = | ||||
|     Decoder.decodeString.map(Glob.apply) | ||||
| } | ||||
							
								
								
									
										75
									
								
								modules/common/src/test/scala/docspell/common/GlobTest.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								modules/common/src/test/scala/docspell/common/GlobTest.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| package docspell.common | ||||
|  | ||||
| import minitest._ | ||||
| import Glob._ | ||||
|  | ||||
| object GlobTest extends SimpleTestSuite { | ||||
|  | ||||
|   test("literals") { | ||||
|     assert(Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello")) | ||||
|     assert(!Glob(Pattern(Segment(Token.Literal("hello")))).matches("hello1")) | ||||
|   } | ||||
|  | ||||
|   test("single wildcards 1") { | ||||
|     val glob = | ||||
|       Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) | ||||
|  | ||||
|     assert(glob.matches("snapshot")) | ||||
|     assert(!glob.matches("snapshots")) | ||||
|   } | ||||
|  | ||||
|   test("single wildcards 2") { | ||||
|     val glob = | ||||
|       Glob(Pattern(Segment(Token.Literal("test."), Token.Until("")))) | ||||
|  | ||||
|     assert(glob.matches("test.txt")) | ||||
|     assert(glob.matches("test.pdf")) | ||||
|     assert(glob.matches("test.converted.pdf")) | ||||
|     assert(!glob.matches("test1.txt")) | ||||
|     assert(!glob.matches("atest.txt")) | ||||
|   } | ||||
|  | ||||
|   test("single parsing") { | ||||
|     assertEquals( | ||||
|       Glob("s*p*t"), | ||||
|       Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) | ||||
|     ) | ||||
|     assertEquals( | ||||
|       Glob("s***p*t"), | ||||
|       Glob(Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))) | ||||
|     ) | ||||
|     assertEquals( | ||||
|       Glob("test.*"), | ||||
|       Glob(Pattern(Segment(Token.Literal("test."), Token.Until("")))) | ||||
|     ) | ||||
|     assertEquals( | ||||
|       Glob("stop"), | ||||
|       Glob(Pattern(Segment(Token.Literal("stop")))) | ||||
|     ) | ||||
|     assertEquals( | ||||
|       Glob("*stop"), | ||||
|       Glob(Pattern(Segment(Token.Until("stop")))) | ||||
|     ) | ||||
|     assertEquals( | ||||
|       Glob("*"), | ||||
|       Glob(Pattern(Segment(Token.Until("")))) | ||||
|     ) | ||||
|   } | ||||
|  | ||||
|   test("with splitting") { | ||||
|     assert(Glob("a/b/*").matches("a/b/hello")) | ||||
|     assert(!Glob("a/b/*").matches("/a/b/hello")) | ||||
|     assert(Glob("/a/b/*").matches("/a/b/hello")) | ||||
|     assert(!Glob("/a/b/*").matches("a/b/hello")) | ||||
|     assert(!Glob("*/a/b/*").matches("a/b/hello")) | ||||
|     assert(Glob("*/a/b/*").matches("test/a/b/hello")) | ||||
|   } | ||||
|  | ||||
|   test("asString") { | ||||
|     assertEquals(Glob("test.*").asString, "test.*") | ||||
|     assertEquals(Glob("s***p*t").asString, "s*p*t") | ||||
|     assertEquals(Glob("stop").asString, "stop") | ||||
|     assertEquals(Glob("*stop").asString, "*stop") | ||||
|     assertEquals(Glob("/a/b/*").asString, "/a/b/*") | ||||
|   } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user