Merge pull request #2378 from eikek/2376-processing-bin-files

Fix potential infinite loop
This commit is contained in:
mergify[bot] 2023-11-12 12:18:43 +00:00 committed by GitHub
commit b181c57424
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 60 deletions

View File

@ -12,8 +12,8 @@ import java.nio.charset.StandardCharsets
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.syntax.all._ import cats.syntax.all._
import fs2._
import fs2.io.file.{Files, Path} import fs2.io.file.{Files, Path}
import fs2.{Chunk, Pipe, Stream}
import docspell.logging.Logger import docspell.logging.Logger
@ -64,11 +64,11 @@ object Binary {
def html[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] = def html[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] =
Binary(name, MimeType.html.withCharset(cs), Stream.chunk(Chunk.byteVector(content))) Binary(name, MimeType.html.withCharset(cs), Stream.chunk(Chunk.byteVector(content)))
def decode[F[_]](cs: Charset): Pipe[F, Byte, String] = def decode[F[_]: RaiseThrowable](cs: Charset): Pipe[F, Byte, String] =
if (cs == StandardCharsets.UTF_8) if (cs == StandardCharsets.UTF_8)
fs2.text.utf8.decode fs2.text.utf8.decode
else else
util.decode[F](cs) fs2.text.decodeWithCharset(cs)
def loadAllBytes[F[_]: Sync](data: Stream[F, Byte]): F[ByteVector] = def loadAllBytes[F[_]: Sync](data: Stream[F, Byte]): F[ByteVector] =
data.chunks.map(_.toByteVector).compile.fold(ByteVector.empty)((r, e) => r ++ e) data.chunks.map(_.toByteVector).compile.fold(ByteVector.empty)((r, e) => r ++ e)
@ -104,49 +104,4 @@ object Binary {
} }
.drain .drain
.as(targetDir) .as(targetDir)
// This is a copy from org.http4s.util
// Http4s is licensed under the Apache License 2.0
private object util {
import fs2._
import java.nio._
private val utf8Bom: Chunk[Byte] = Chunk(0xef.toByte, 0xbb.toByte, 0xbf.toByte)
def decode[F[_]](charset: Charset): Pipe[F, Byte, String] = {
val decoder = charset.newDecoder
val maxCharsPerByte = math.ceil(decoder.maxCharsPerByte().toDouble).toInt
val avgBytesPerChar = math.ceil(1.0 / decoder.averageCharsPerByte().toDouble).toInt
val charBufferSize = 128
_.repeatPull[String] {
_.unconsN(charBufferSize * avgBytesPerChar, allowFewer = true).flatMap {
case None =>
val charBuffer = CharBuffer.allocate(1)
decoder.decode(ByteBuffer.allocate(0), charBuffer, true)
decoder.flush(charBuffer)
val outputString = charBuffer.flip().toString
if (outputString.isEmpty) Pull.done.as(None)
else Pull.output1(outputString).as(None)
case Some((chunk, stream)) =>
if (chunk.nonEmpty) {
val chunkWithoutBom = skipByteOrderMark(chunk)
val bytes = chunkWithoutBom.toArray
val byteBuffer = ByteBuffer.wrap(bytes)
val charBuffer = CharBuffer.allocate(bytes.length * maxCharsPerByte)
decoder.decode(byteBuffer, charBuffer, false)
val nextStream = stream.consChunk(Chunk.byteBuffer(byteBuffer.slice()))
Pull.output1(charBuffer.flip().toString).as(Some(nextStream))
} else
Pull.output(Chunk.empty[String]).as(Some(stream))
}
}
}
private def skipByteOrderMark[F[_]](chunk: Chunk[Byte]): Chunk[Byte] =
if (chunk.size >= 3 && chunk.take(3) == utf8Bom)
chunk.drop(3)
else chunk
}
} }

View File

@ -20,21 +20,20 @@ object ResourceUse {
object Implicits { object Implicits {
implicit final class UseSyntax[F[_]: Concurrent, A](resource: Resource[F, A]) { implicit final class UseSyntax[F[_]: Concurrent, A](resource: Resource[F, A]) {
/** Evaluates `resource` endlessly or until the signal turns `true`. */ def useWhile(
def useUntil(
signal: Signal[F, Boolean], signal: Signal[F, Boolean],
returnValue: Ref[F, ExitCode] returnValue: Ref[F, ExitCode]
): F[ExitCode] = { ): Stream[F, ExitCode] = {
val server = Stream.resource(resource) val server = Stream.resource(resource)
val blockUntilTrue = signal.discrete.takeWhile(_ == false).drain val blockUntilTrue = signal.discrete.takeWhile(_ == false).drain
val exit = fs2.Stream.eval(returnValue.get) val exit = Stream.eval(returnValue.get)
(server *> (blockUntilTrue ++ exit)).compile.lastOrError server *> (blockUntilTrue ++ exit)
} }
def useForever(implicit ev: Async[F]): F[ExitCode] = for { def useForever(implicit ev: Async[F]): Stream[F, ExitCode] = for {
termSignal <- SignallingRef.of[F, Boolean](false) termSignal <- Stream.eval(SignallingRef.of[F, Boolean](false))
exitValue <- Ref.of(ExitCode.Success) exitValue <- Stream.eval(Ref.of(ExitCode.Success))
rc <- useUntil(termSignal, exitValue) rc <- useWhile(termSignal, exitValue)
} yield rc } yield rc
} }
} }

View File

@ -52,7 +52,7 @@ object Markdown {
cfg: MarkdownConfig, cfg: MarkdownConfig,
cs: Charset cs: Charset
): F[String] = ): F[String] =
data.through(Binary.decode(cs)).compile.foldMonoid.map(str => toHtml(str, cfg)) data.through(Binary.decode(cs)).compile.string.map(str => toHtml(str, cfg))
private def wrapHtml(body: String, cfg: MarkdownConfig): String = private def wrapHtml(body: String, cfg: MarkdownConfig): String =
s"""<!DOCTYPE html> s"""<!DOCTYPE html>

View File

@ -75,14 +75,14 @@ object JoexServer {
Stream Stream
.resource(app) .resource(app)
.evalMap { app => .flatMap { app =>
EmberServerBuilder EmberServerBuilder
.default[F] .default[F]
.withHost(cfg.bind.address) .withHost(cfg.bind.address)
.withPort(cfg.bind.port) .withPort(cfg.bind.port)
.withHttpApp(app.httpApp) .withHttpApp(app.httpApp)
.build .build
.useUntil(app.termSig, app.exitRef) .useWhile(app.termSig, app.exitRef)
} }
}.drain }.drain
} }