From dd763e7796857a48dde399e38d39c670d73cc7ed Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 12 Nov 2023 13:04:03 +0100 Subject: [PATCH] Fix potential infinite loop The code removed here was copied from another project some years back. Now there is an improved version in fs2 that can be used. Fixes: #2376 --- .../main/scala/docspell/common/Binary.scala | 51 ++----------------- .../docspell/common/util/ResourceUse.scala | 17 +++---- .../docspell/convert/flexmark/Markdown.scala | 2 +- .../main/scala/docspell/joex/JoexServer.scala | 4 +- 4 files changed, 14 insertions(+), 60 deletions(-) diff --git a/modules/common/src/main/scala/docspell/common/Binary.scala b/modules/common/src/main/scala/docspell/common/Binary.scala index 5fd83e2f..35a94b31 100644 --- a/modules/common/src/main/scala/docspell/common/Binary.scala +++ b/modules/common/src/main/scala/docspell/common/Binary.scala @@ -12,8 +12,8 @@ import java.nio.charset.StandardCharsets import cats.data.OptionT import cats.effect._ import cats.syntax.all._ +import fs2._ import fs2.io.file.{Files, Path} -import fs2.{Chunk, Pipe, Stream} import docspell.logging.Logger @@ -64,11 +64,11 @@ object Binary { def html[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] = Binary(name, MimeType.html.withCharset(cs), Stream.chunk(Chunk.byteVector(content))) - def decode[F[_]](cs: Charset): Pipe[F, Byte, String] = + def decode[F[_]: RaiseThrowable](cs: Charset): Pipe[F, Byte, String] = if (cs == StandardCharsets.UTF_8) fs2.text.utf8.decode else - util.decode[F](cs) + fs2.text.decodeWithCharset(cs) def loadAllBytes[F[_]: Sync](data: Stream[F, Byte]): F[ByteVector] = data.chunks.map(_.toByteVector).compile.fold(ByteVector.empty)((r, e) => r ++ e) @@ -104,49 +104,4 @@ object Binary { } .drain .as(targetDir) - - // This is a copy from org.http4s.util - // Http4s is licensed under the Apache License 2.0 - private object util { - import fs2._ - import java.nio._ - - private val utf8Bom: Chunk[Byte] = Chunk(0xef.toByte, 0xbb.toByte, 0xbf.toByte) - - def decode[F[_]](charset: Charset): Pipe[F, Byte, String] = { - val decoder = charset.newDecoder - val maxCharsPerByte = math.ceil(decoder.maxCharsPerByte().toDouble).toInt - val avgBytesPerChar = math.ceil(1.0 / decoder.averageCharsPerByte().toDouble).toInt - val charBufferSize = 128 - - _.repeatPull[String] { - _.unconsN(charBufferSize * avgBytesPerChar, allowFewer = true).flatMap { - case None => - val charBuffer = CharBuffer.allocate(1) - decoder.decode(ByteBuffer.allocate(0), charBuffer, true) - decoder.flush(charBuffer) - val outputString = charBuffer.flip().toString - if (outputString.isEmpty) Pull.done.as(None) - else Pull.output1(outputString).as(None) - case Some((chunk, stream)) => - if (chunk.nonEmpty) { - val chunkWithoutBom = skipByteOrderMark(chunk) - val bytes = chunkWithoutBom.toArray - val byteBuffer = ByteBuffer.wrap(bytes) - val charBuffer = CharBuffer.allocate(bytes.length * maxCharsPerByte) - decoder.decode(byteBuffer, charBuffer, false) - val nextStream = stream.consChunk(Chunk.byteBuffer(byteBuffer.slice())) - Pull.output1(charBuffer.flip().toString).as(Some(nextStream)) - } else - Pull.output(Chunk.empty[String]).as(Some(stream)) - } - } - } - - private def skipByteOrderMark[F[_]](chunk: Chunk[Byte]): Chunk[Byte] = - if (chunk.size >= 3 && chunk.take(3) == utf8Bom) - chunk.drop(3) - else chunk - - } } diff --git a/modules/common/src/main/scala/docspell/common/util/ResourceUse.scala b/modules/common/src/main/scala/docspell/common/util/ResourceUse.scala index 3389e344..e13e789c 100644 --- a/modules/common/src/main/scala/docspell/common/util/ResourceUse.scala +++ b/modules/common/src/main/scala/docspell/common/util/ResourceUse.scala @@ -20,21 +20,20 @@ object ResourceUse { object Implicits { implicit final class UseSyntax[F[_]: Concurrent, A](resource: Resource[F, A]) { - /** Evaluates `resource` endlessly or until the signal turns `true`. */ - def useUntil( + def useWhile( signal: Signal[F, Boolean], returnValue: Ref[F, ExitCode] - ): F[ExitCode] = { + ): Stream[F, ExitCode] = { val server = Stream.resource(resource) val blockUntilTrue = signal.discrete.takeWhile(_ == false).drain - val exit = fs2.Stream.eval(returnValue.get) - (server *> (blockUntilTrue ++ exit)).compile.lastOrError + val exit = Stream.eval(returnValue.get) + server *> (blockUntilTrue ++ exit) } - def useForever(implicit ev: Async[F]): F[ExitCode] = for { - termSignal <- SignallingRef.of[F, Boolean](false) - exitValue <- Ref.of(ExitCode.Success) - rc <- useUntil(termSignal, exitValue) + def useForever(implicit ev: Async[F]): Stream[F, ExitCode] = for { + termSignal <- Stream.eval(SignallingRef.of[F, Boolean](false)) + exitValue <- Stream.eval(Ref.of(ExitCode.Success)) + rc <- useWhile(termSignal, exitValue) } yield rc } } diff --git a/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala b/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala index fcc19927..cd18e4e6 100644 --- a/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala +++ b/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala @@ -52,7 +52,7 @@ object Markdown { cfg: MarkdownConfig, cs: Charset ): F[String] = - data.through(Binary.decode(cs)).compile.foldMonoid.map(str => toHtml(str, cfg)) + data.through(Binary.decode(cs)).compile.string.map(str => toHtml(str, cfg)) private def wrapHtml(body: String, cfg: MarkdownConfig): String = s""" diff --git a/modules/joex/src/main/scala/docspell/joex/JoexServer.scala b/modules/joex/src/main/scala/docspell/joex/JoexServer.scala index 64d4c9a3..dc5362ce 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexServer.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexServer.scala @@ -75,14 +75,14 @@ object JoexServer { Stream .resource(app) - .evalMap { app => + .flatMap { app => EmberServerBuilder .default[F] .withHost(cfg.bind.address) .withPort(cfg.bind.port) .withHttpApp(app.httpApp) .build - .useUntil(app.termSig, app.exitRef) + .useWhile(app.termSig, app.exitRef) } }.drain }