Merge pull request #711 from eikek/fix-1

Fix reading uri from a string
This commit is contained in:
mergify[bot] 2021-03-12 21:45:51 +00:00 committed by GitHub
commit dfa25322b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 69 additions and 22 deletions

View File

@ -148,16 +148,20 @@ object LenientUri {
unsafe(u.toExternalForm) unsafe(u.toExternalForm)
def parse(str: String): Either[String, LenientUri] = { def parse(str: String): Either[String, LenientUri] = {
def makePath(str: String): Path = def makePath(str: String): Either[String, Path] =
str.trim match { str.trim match {
case "/" => RootPath case "/" => Right(RootPath)
case "" => EmptyPath case "" => Right(EmptyPath)
case _ => case _ =>
NonEmptyList Either.fromOption(
.fromList(stripLeading(str, '/').split('/').toList.map(percentDecode)) match { stripLeading(str, '/')
case Some(nl) => NonEmptyPath(nl) .split('/')
case None => sys.error(s"Invalid url: $str") .toList
} .traverse(percentDecode)
.flatMap(NonEmptyList.fromList)
.map(NonEmptyPath.apply),
s"Invalid path: $str"
)
} }
def makeNonEmpty(str: String): Option[String] = def makeNonEmpty(str: String): Option[String] =
@ -165,7 +169,7 @@ object LenientUri {
def makeScheme(s: String): Option[NonEmptyList[String]] = def makeScheme(s: String): Option[NonEmptyList[String]] =
NonEmptyList.fromList(s.split(':').toList.filter(_.nonEmpty).map(_.toLowerCase)) NonEmptyList.fromList(s.split(':').toList.filter(_.nonEmpty).map(_.toLowerCase))
def splitPathQF(pqf: String): (Path, Option[String], Option[String]) = def splitPathQF(pqf: String): (Either[String, Path], Option[String], Option[String]) =
pqf.indexOf('?') match { pqf.indexOf('?') match {
case -1 => case -1 =>
pqf.indexOf('#') match { pqf.indexOf('#') match {
@ -200,7 +204,7 @@ object LenientUri {
case None => case None =>
Left(s"No scheme found: $str") Left(s"No scheme found: $str")
case Some(nl) => case Some(nl) =>
Right(LenientUri(nl, auth, path, query, frag)) path.map(p => LenientUri(nl, auth, p, query, frag))
} }
case Array(p0) => case Array(p0) =>
// scheme:scheme:path // scheme:scheme:path
@ -214,10 +218,11 @@ object LenientUri {
case None => case None =>
Left(s"No scheme found: $str") Left(s"No scheme found: $str")
case Some(nl) => case Some(nl) =>
Right(LenientUri(nl, None, path, query, frag)) path.map(p => LenientUri(nl, None, p, query, frag))
} }
} }
case _ => case _ =>
// str.split(, 2) returns either array of length 2 or 1, never empty
sys.error("Unreachable code") sys.error("Unreachable code")
} }
} }
@ -230,17 +235,34 @@ object LenientUri {
def percentEncode(s: String): String = def percentEncode(s: String): String =
s.flatMap(c => if (delims.contains(c)) percent(c.toString) else c.toString) s.flatMap(c => if (delims.contains(c)) percent(c.toString) else c.toString)
def percentDecode(s: String): String = def percentDecode(s: String): Option[String] = {
if (!s.contains("%")) s @annotation.tailrec
else def go(pos: Int, acc: Option[String], result: ByteVector): Option[ByteVector] =
s.foldLeft(("", ByteVector.empty)) { case ((acc, res), c) => if (pos >= s.length) Some(result)
if (acc.length == 2) ("", res ++ ByteVector.fromValidHex(acc.drop(1) + c)) else {
else if (acc.startsWith("%")) (acc :+ c, res) val c = s.charAt(pos)
else if (c == '%') ("%", res) acc match {
else (acc, res :+ c.toByte) case Some(enc) if enc.length == 1 =>
}._2 ByteVector.fromHex(enc + c) match {
.decodeUtf8 case Some(next) =>
.fold(throw _, identity) go(pos + 1, None, result ++ next)
case None =>
None
}
case Some(enc) =>
go(pos + 1, Some(enc + c), result)
case None if c == '%' =>
go(pos + 1, Some(""), result)
case None =>
go(pos + 1, acc, result :+ c.toByte)
}
}
go(0, None, ByteVector.empty).flatMap(bv => bv.decodeUtf8.toOption)
}
private def stripLeading(s: String, c: Char): String = private def stripLeading(s: String, c: Char): String =
if (s.length > 0 && s.charAt(0) == c) s.substring(1) if (s.length > 0 && s.charAt(0) == c) s.substring(1)

View File

@ -0,0 +1,25 @@
package docspell.common
import cats.implicits._
import munit._
class LenientUriTest extends FunSuite {
test("do not throw on invalid hex decoding") {
assert(LenientUri.parse("h:%x39005").isLeft)
}
test("percent-decode invalid codes") {
assertEquals(LenientUri.percentDecode("h:%x39-2"), None)
}
test("percent-decode valid codes") {
assertEquals(LenientUri.percentDecode("a%20b"), "a b".some)
assertEquals(LenientUri.percentDecode("a%3Fb"), "a?b".some)
assertEquals(
LenientUri.percentDecode("0%2F%3A%7B%7D%29%28%3A-%2F%29%7D-%7B%2F%7D"),
"0/:{})(:-/)}-{/}".some
)
assertEquals(LenientUri.percentDecode("a%25b%5Cc%7Cd%23e"), "a%b\\c|d#e".some)
}
}