Adding extraction primitives

This commit is contained in:
Eike Kettner
2020-02-16 21:37:26 +01:00
parent 851ee7ef0f
commit 8143a4edcc
46 changed files with 2731 additions and 83 deletions

View File

@ -0,0 +1,7 @@
package docspell.files
case class Dimension(width: Int, height: Int) {
def toAwtDimension: java.awt.Dimension =
new java.awt.Dimension(width, height)
}

View File

@ -0,0 +1,61 @@
package docspell.files
import java.io.{ByteArrayInputStream, InputStream}
import java.nio.file.Path
import cats.implicits._
import cats.effect._
import fs2.Stream
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
import javax.imageio.{ImageIO, ImageReader}
import scala.jdk.CollectionConverters._
import scala.util.{Try, Using}
object ImageSize {
/** Return the image size from its header without reading
* the whole image into memory.
*/
def get(file: Path): Option[Dimension] =
Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten
/** Return the image size from its header without reading
* the whole image into memory.
*/
def get(in: InputStream): Option[Dimension] =
Option(ImageIO.createImageInputStream(in)).flatMap(getDimension)
/** Return the image size from its header without reading
* the whole image into memory.
*/
def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = {
data.take(768).compile.to(Array).map(ar => {
val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
if (iis == null) sys.error("no reader given for the array")
else getDimension(iis)
})
}
private def getDimension(in: ImageInputStream): Option[Dimension] =
ImageIO
.getImageReaders(in)
.asScala
.to(LazyList)
.collectFirst(Function.unlift { reader =>
val dim = getDimension(in, reader).toOption
reader.dispose()
dim
})
private def getDimension(
in: ImageInputStream,
reader: ImageReader
): Either[Throwable, Dimension] =
Try {
reader.setInput(in)
val width = reader.getWidth(reader.getMinIndex)
val height = reader.getHeight(reader.getMinIndex)
Dimension(width, height)
}.toEither
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

View File

@ -2,18 +2,18 @@ Derek Jeter
123 Elm Ave.
Treesville, ON MI1N 2P3
Treesville, ON M1N 2P3
November 7, 2016
Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016
Mr. M. Leat
Mr. M. Leaf
Chief of Syrup Production
Old Sticky Pancake Company
456 Maple Lane
Forest, ON 7TW8 9Y0
Forest, ON 7W8 9Y0
Hemptown, September 3, 2019
Dear Mr. Leaf,

View File

@ -0,0 +1,14 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<withJansi>true</withJansi>
<encoder>
<pattern>%highlight(%-5level) %cyan(%logger{15}) - %msg %n</pattern>
</encoder>
</appender>
<logger name="docspell" level="debug" />
<root level="error">
<appender-ref ref="STDOUT" />
</root>
</configuration>

View File

@ -0,0 +1,46 @@
package docspell.files
import cats.implicits._
import cats.effect.{Blocker, IO}
import minitest.SimpleTestSuite
import scala.concurrent.ExecutionContext
import scala.util.Using
object ImageSizeTest extends SimpleTestSuite {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
//tiff files are not supported on the jdk by default
//requires an external library
val files = List(
ExampleFiles.camera_letter_en_jpg -> Dimension(1695, 2378),
ExampleFiles.camera_letter_en_png -> Dimension(1695, 2378),
// ExampleFiles.camera_letter_en_tiff -> Dimension(1695, 2378),
ExampleFiles.scanner_jfif_jpg -> Dimension(2480, 3514),
ExampleFiles.bombs_20K_gray_jpeg -> Dimension(20000, 20000),
ExampleFiles.bombs_20K_gray_png -> Dimension(20000, 20000),
ExampleFiles.bombs_20K_rgb_jpeg -> Dimension(20000, 20000),
ExampleFiles.bombs_20K_rgb_png -> Dimension(20000, 20000)
)
test("get sizes from input-stream") {
files.foreach {
case (uri, expect) =>
val url = uri.toJavaUrl.fold(sys.error, identity)
Using.resource(url.openStream()) { in =>
val dim = ImageSize.get(in)
assertEquals(dim, expect.some)
}
}
}
test("get sizes from stream") {
files.foreach {
case (uri, expect) =>
val stream = uri.readURL[IO](8192, blocker)
val dim = ImageSize.get(stream).unsafeRunSync()
assertEquals(dim, expect.some)
}
}
}

View File

@ -0,0 +1,29 @@
package docspell.files
import cats.effect.{Blocker, IO}
import fs2.Stream
import scala.concurrent.ExecutionContext
object TestFiles {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
implicit val CS = IO.contextShift(ExecutionContext.global)
val letterSourceDE: Stream[IO, Byte] =
ExampleFiles.letter_de_pdf
.readURL[IO](8 * 1024, blocker)
val letterSourceEN: Stream[IO, Byte] =
ExampleFiles.letter_en_pdf
.readURL[IO](8 * 1024, blocker)
lazy val letterDEText =
ExampleFiles.letter_de_txt
.readText[IO](8 * 1024, blocker)
.unsafeRunSync
lazy val letterENText =
ExampleFiles.letter_en_txt
.readText[IO](8 * 1024, blocker)
.unsafeRunSync
}