From df6b6aba2cbb7a0106fd13011593f96ec3903fa9 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 12 Mar 2021 22:31:19 +0100 Subject: [PATCH] Fix reading uri from a string The read routine did not report all errors in the return type but some raised exceptions. --- .../scala/docspell/common/LenientUri.scala | 66 ++++++++++++------- .../docspell/common/LenientUriTest.scala | 25 +++++++ 2 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 modules/common/src/test/scala/docspell/common/LenientUriTest.scala diff --git a/modules/common/src/main/scala/docspell/common/LenientUri.scala b/modules/common/src/main/scala/docspell/common/LenientUri.scala index ca1b5069..6b82e001 100644 --- a/modules/common/src/main/scala/docspell/common/LenientUri.scala +++ b/modules/common/src/main/scala/docspell/common/LenientUri.scala @@ -148,16 +148,20 @@ object LenientUri { unsafe(u.toExternalForm) def parse(str: String): Either[String, LenientUri] = { - def makePath(str: String): Path = + def makePath(str: String): Either[String, Path] = str.trim match { - case "/" => RootPath - case "" => EmptyPath + case "/" => Right(RootPath) + case "" => Right(EmptyPath) case _ => - NonEmptyList - .fromList(stripLeading(str, '/').split('/').toList.map(percentDecode)) match { - case Some(nl) => NonEmptyPath(nl) - case None => sys.error(s"Invalid url: $str") - } + Either.fromOption( + stripLeading(str, '/') + .split('/') + .toList + .traverse(percentDecode) + .flatMap(NonEmptyList.fromList) + .map(NonEmptyPath.apply), + s"Invalid path: $str" + ) } def makeNonEmpty(str: String): Option[String] = @@ -165,7 +169,7 @@ object LenientUri { def makeScheme(s: String): Option[NonEmptyList[String]] = NonEmptyList.fromList(s.split(':').toList.filter(_.nonEmpty).map(_.toLowerCase)) - def splitPathQF(pqf: String): (Path, Option[String], Option[String]) = + def splitPathQF(pqf: String): (Either[String, Path], Option[String], Option[String]) = pqf.indexOf('?') match { case -1 => pqf.indexOf('#') match { @@ -200,7 +204,7 @@ object LenientUri { case None => Left(s"No scheme found: $str") case Some(nl) => - Right(LenientUri(nl, auth, path, query, frag)) + path.map(p => LenientUri(nl, auth, p, query, frag)) } case Array(p0) => // scheme:scheme:path @@ -214,10 +218,11 @@ object LenientUri { case None => Left(s"No scheme found: $str") case Some(nl) => - Right(LenientUri(nl, None, path, query, frag)) + path.map(p => LenientUri(nl, None, p, query, frag)) } } case _ => + // str.split(…, 2) returns either array of length 2 or 1, never empty sys.error("Unreachable code") } } @@ -230,17 +235,34 @@ object LenientUri { def percentEncode(s: String): String = s.flatMap(c => if (delims.contains(c)) percent(c.toString) else c.toString) - def percentDecode(s: String): String = - if (!s.contains("%")) s - else - s.foldLeft(("", ByteVector.empty)) { case ((acc, res), c) => - if (acc.length == 2) ("", res ++ ByteVector.fromValidHex(acc.drop(1) + c)) - else if (acc.startsWith("%")) (acc :+ c, res) - else if (c == '%') ("%", res) - else (acc, res :+ c.toByte) - }._2 - .decodeUtf8 - .fold(throw _, identity) + def percentDecode(s: String): Option[String] = { + @annotation.tailrec + def go(pos: Int, acc: Option[String], result: ByteVector): Option[ByteVector] = + if (pos >= s.length) Some(result) + else { + val c = s.charAt(pos) + acc match { + case Some(enc) if enc.length == 1 => + ByteVector.fromHex(enc + c) match { + case Some(next) => + go(pos + 1, None, result ++ next) + case None => + None + } + + case Some(enc) => + go(pos + 1, Some(enc + c), result) + + case None if c == '%' => + go(pos + 1, Some(""), result) + + case None => + go(pos + 1, acc, result :+ c.toByte) + } + } + + go(0, None, ByteVector.empty).flatMap(bv => bv.decodeUtf8.toOption) + } private def stripLeading(s: String, c: Char): String = if (s.length > 0 && s.charAt(0) == c) s.substring(1) diff --git a/modules/common/src/test/scala/docspell/common/LenientUriTest.scala b/modules/common/src/test/scala/docspell/common/LenientUriTest.scala new file mode 100644 index 00000000..1d814791 --- /dev/null +++ b/modules/common/src/test/scala/docspell/common/LenientUriTest.scala @@ -0,0 +1,25 @@ +package docspell.common + +import cats.implicits._ +import munit._ + +class LenientUriTest extends FunSuite { + + test("do not throw on invalid hex decoding") { + assert(LenientUri.parse("h:%x39005").isLeft) + } + + test("percent-decode invalid codes") { + assertEquals(LenientUri.percentDecode("h:%x39-2"), None) + } + + test("percent-decode valid codes") { + assertEquals(LenientUri.percentDecode("a%20b"), "a b".some) + assertEquals(LenientUri.percentDecode("a%3Fb"), "a?b".some) + assertEquals( + LenientUri.percentDecode("0%2F%3A%7B%7D%29%28%3A-%2F%29%7D-%7B%2F%7D"), + "0/:{})(:-/)}-{/}".some + ) + assertEquals(LenientUri.percentDecode("a%25b%5Cc%7Cd%23e"), "a%b\\c|d#e".some) + } +}