From df6b6aba2cbb7a0106fd13011593f96ec3903fa9 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Fri, 12 Mar 2021 22:31:19 +0100
Subject: [PATCH] Fix reading uri from a string

The read routine did not report all errors in the return type but some
raised exceptions.
---
 .../scala/docspell/common/LenientUri.scala    | 66 ++++++++++++-------
 .../docspell/common/LenientUriTest.scala      | 25 +++++++
 2 files changed, 69 insertions(+), 22 deletions(-)
 create mode 100644 modules/common/src/test/scala/docspell/common/LenientUriTest.scala

diff --git a/modules/common/src/main/scala/docspell/common/LenientUri.scala b/modules/common/src/main/scala/docspell/common/LenientUri.scala
index ca1b5069..6b82e001 100644
--- a/modules/common/src/main/scala/docspell/common/LenientUri.scala
+++ b/modules/common/src/main/scala/docspell/common/LenientUri.scala
@@ -148,16 +148,20 @@ object LenientUri {
     unsafe(u.toExternalForm)
 
   def parse(str: String): Either[String, LenientUri] = {
-    def makePath(str: String): Path =
+    def makePath(str: String): Either[String, Path] =
       str.trim match {
-        case "/" => RootPath
-        case ""  => EmptyPath
+        case "/" => Right(RootPath)
+        case ""  => Right(EmptyPath)
         case _ =>
-          NonEmptyList
-            .fromList(stripLeading(str, '/').split('/').toList.map(percentDecode)) match {
-            case Some(nl) => NonEmptyPath(nl)
-            case None     => sys.error(s"Invalid url: $str")
-          }
+          Either.fromOption(
+            stripLeading(str, '/')
+              .split('/')
+              .toList
+              .traverse(percentDecode)
+              .flatMap(NonEmptyList.fromList)
+              .map(NonEmptyPath.apply),
+            s"Invalid path: $str"
+          )
       }
 
     def makeNonEmpty(str: String): Option[String] =
@@ -165,7 +169,7 @@ object LenientUri {
     def makeScheme(s: String): Option[NonEmptyList[String]] =
       NonEmptyList.fromList(s.split(':').toList.filter(_.nonEmpty).map(_.toLowerCase))
 
-    def splitPathQF(pqf: String): (Path, Option[String], Option[String]) =
+    def splitPathQF(pqf: String): (Either[String, Path], Option[String], Option[String]) =
       pqf.indexOf('?') match {
         case -1 =>
           pqf.indexOf('#') match {
@@ -200,7 +204,7 @@ object LenientUri {
           case None =>
             Left(s"No scheme found: $str")
           case Some(nl) =>
-            Right(LenientUri(nl, auth, path, query, frag))
+            path.map(p => LenientUri(nl, auth, p, query, frag))
         }
       case Array(p0) =>
         // scheme:scheme:path
@@ -214,10 +218,11 @@ object LenientUri {
               case None =>
                 Left(s"No scheme found: $str")
               case Some(nl) =>
-                Right(LenientUri(nl, None, path, query, frag))
+                path.map(p => LenientUri(nl, None, p, query, frag))
             }
         }
       case _ =>
+        // str.split(…, 2) returns either array of length 2 or 1, never empty
         sys.error("Unreachable code")
     }
   }
@@ -230,17 +235,34 @@ object LenientUri {
   def percentEncode(s: String): String =
     s.flatMap(c => if (delims.contains(c)) percent(c.toString) else c.toString)
 
-  def percentDecode(s: String): String =
-    if (!s.contains("%")) s
-    else
-      s.foldLeft(("", ByteVector.empty)) { case ((acc, res), c) =>
-        if (acc.length == 2) ("", res ++ ByteVector.fromValidHex(acc.drop(1) + c))
-        else if (acc.startsWith("%")) (acc :+ c, res)
-        else if (c == '%') ("%", res)
-        else (acc, res :+ c.toByte)
-      }._2
-        .decodeUtf8
-        .fold(throw _, identity)
+  def percentDecode(s: String): Option[String] = {
+    @annotation.tailrec
+    def go(pos: Int, acc: Option[String], result: ByteVector): Option[ByteVector] =
+      if (pos >= s.length) Some(result)
+      else {
+        val c = s.charAt(pos)
+        acc match {
+          case Some(enc) if enc.length == 1 =>
+            ByteVector.fromHex(enc + c) match {
+              case Some(next) =>
+                go(pos + 1, None, result ++ next)
+              case None =>
+                None
+            }
+
+          case Some(enc) =>
+            go(pos + 1, Some(enc + c), result)
+
+          case None if c == '%' =>
+            go(pos + 1, Some(""), result)
+
+          case None =>
+            go(pos + 1, acc, result :+ c.toByte)
+        }
+      }
+
+    go(0, None, ByteVector.empty).flatMap(bv => bv.decodeUtf8.toOption)
+  }
 
   private def stripLeading(s: String, c: Char): String =
     if (s.length > 0 && s.charAt(0) == c) s.substring(1)
diff --git a/modules/common/src/test/scala/docspell/common/LenientUriTest.scala b/modules/common/src/test/scala/docspell/common/LenientUriTest.scala
new file mode 100644
index 00000000..1d814791
--- /dev/null
+++ b/modules/common/src/test/scala/docspell/common/LenientUriTest.scala
@@ -0,0 +1,25 @@
+package docspell.common
+
+import cats.implicits._
+import munit._
+
+class LenientUriTest extends FunSuite {
+
+  test("do not throw on invalid hex decoding") {
+    assert(LenientUri.parse("h:%x39005").isLeft)
+  }
+
+  test("percent-decode invalid codes") {
+    assertEquals(LenientUri.percentDecode("h:%x39-2"), None)
+  }
+
+  test("percent-decode valid codes") {
+    assertEquals(LenientUri.percentDecode("a%20b"), "a b".some)
+    assertEquals(LenientUri.percentDecode("a%3Fb"), "a?b".some)
+    assertEquals(
+      LenientUri.percentDecode("0%2F%3A%7B%7D%29%28%3A-%2F%29%7D-%7B%2F%7D"),
+      "0/:{})(:-/)}-{/}".some
+    )
+    assertEquals(LenientUri.percentDecode("a%25b%5Cc%7Cd%23e"), "a%b\\c|d#e".some)
+  }
+}