mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Adding extraction primitives
This commit is contained in:
@ -0,0 +1,7 @@
|
||||
package docspell.files
|
||||
|
||||
case class Dimension(width: Int, height: Int) {
|
||||
|
||||
def toAwtDimension: java.awt.Dimension =
|
||||
new java.awt.Dimension(width, height)
|
||||
}
|
61
modules/files/src/main/scala/docspell/files/ImageSize.scala
Normal file
61
modules/files/src/main/scala/docspell/files/ImageSize.scala
Normal file
@ -0,0 +1,61 @@
|
||||
package docspell.files
|
||||
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
|
||||
import javax.imageio.{ImageIO, ImageReader}
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import scala.util.{Try, Using}
|
||||
|
||||
object ImageSize {
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get(file: Path): Option[Dimension] =
|
||||
Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get(in: InputStream): Option[Dimension] =
|
||||
Option(ImageIO.createImageInputStream(in)).flatMap(getDimension)
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = {
|
||||
data.take(768).compile.to(Array).map(ar => {
|
||||
val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
|
||||
if (iis == null) sys.error("no reader given for the array")
|
||||
else getDimension(iis)
|
||||
})
|
||||
}
|
||||
|
||||
private def getDimension(in: ImageInputStream): Option[Dimension] =
|
||||
ImageIO
|
||||
.getImageReaders(in)
|
||||
.asScala
|
||||
.to(LazyList)
|
||||
.collectFirst(Function.unlift { reader =>
|
||||
val dim = getDimension(in, reader).toOption
|
||||
reader.dispose()
|
||||
dim
|
||||
})
|
||||
|
||||
private def getDimension(
|
||||
in: ImageInputStream,
|
||||
reader: ImageReader
|
||||
): Either[Throwable, Dimension] =
|
||||
Try {
|
||||
reader.setInput(in)
|
||||
val width = reader.getWidth(reader.getMinIndex)
|
||||
val height = reader.getHeight(reader.getMinIndex)
|
||||
Dimension(width, height)
|
||||
}.toEither
|
||||
}
|
BIN
modules/files/src/test/resources/bombs/20K-gray.jpeg
Normal file
BIN
modules/files/src/test/resources/bombs/20K-gray.jpeg
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.5 MiB |
BIN
modules/files/src/test/resources/bombs/20K-gray.png
Normal file
BIN
modules/files/src/test/resources/bombs/20K-gray.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 48 KiB |
BIN
modules/files/src/test/resources/bombs/20K-rgb.jpeg
Normal file
BIN
modules/files/src/test/resources/bombs/20K-rgb.jpeg
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.2 MiB |
BIN
modules/files/src/test/resources/bombs/20K-rgb.png
Normal file
BIN
modules/files/src/test/resources/bombs/20K-rgb.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 MiB |
@ -2,18 +2,18 @@ Derek Jeter
|
||||
|
||||
123 Elm Ave.
|
||||
|
||||
Treesville, ON MI1N 2P3
|
||||
Treesville, ON M1N 2P3
|
||||
November 7, 2016
|
||||
|
||||
Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016
|
||||
|
||||
Mr. M. Leat
|
||||
Mr. M. Leaf
|
||||
|
||||
Chief of Syrup Production
|
||||
Old Sticky Pancake Company
|
||||
456 Maple Lane
|
||||
|
||||
Forest, ON 7TW8 9Y0
|
||||
Forest, ON 7W8 9Y0
|
||||
|
||||
Hemptown, September 3, 2019
|
||||
Dear Mr. Leaf,
|
||||
|
14
modules/files/src/test/resources/logback-test.xml
Normal file
14
modules/files/src/test/resources/logback-test.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<withJansi>true</withJansi>
|
||||
|
||||
<encoder>
|
||||
<pattern>%highlight(%-5level) %cyan(%logger{15}) - %msg %n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<logger name="docspell" level="debug" />
|
||||
<root level="error">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
@ -0,0 +1,46 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.{Blocker, IO}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
import scala.util.Using
|
||||
|
||||
object ImageSizeTest extends SimpleTestSuite {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
||||
|
||||
//tiff files are not supported on the jdk by default
|
||||
//requires an external library
|
||||
val files = List(
|
||||
ExampleFiles.camera_letter_en_jpg -> Dimension(1695, 2378),
|
||||
ExampleFiles.camera_letter_en_png -> Dimension(1695, 2378),
|
||||
// ExampleFiles.camera_letter_en_tiff -> Dimension(1695, 2378),
|
||||
ExampleFiles.scanner_jfif_jpg -> Dimension(2480, 3514),
|
||||
ExampleFiles.bombs_20K_gray_jpeg -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_gray_png -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_rgb_jpeg -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_rgb_png -> Dimension(20000, 20000)
|
||||
)
|
||||
|
||||
test("get sizes from input-stream") {
|
||||
files.foreach {
|
||||
case (uri, expect) =>
|
||||
val url = uri.toJavaUrl.fold(sys.error, identity)
|
||||
Using.resource(url.openStream()) { in =>
|
||||
val dim = ImageSize.get(in)
|
||||
assertEquals(dim, expect.some)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("get sizes from stream") {
|
||||
files.foreach {
|
||||
case (uri, expect) =>
|
||||
val stream = uri.readURL[IO](8192, blocker)
|
||||
val dim = ImageSize.get(stream).unsafeRunSync()
|
||||
assertEquals(dim, expect.some)
|
||||
}
|
||||
}
|
||||
}
|
29
modules/files/src/test/scala/docspell/files/TestFiles.scala
Normal file
29
modules/files/src/test/scala/docspell/files/TestFiles.scala
Normal file
@ -0,0 +1,29 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.effect.{Blocker, IO}
|
||||
import fs2.Stream
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
|
||||
object TestFiles {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
||||
|
||||
val letterSourceDE: Stream[IO, Byte] =
|
||||
ExampleFiles.letter_de_pdf
|
||||
.readURL[IO](8 * 1024, blocker)
|
||||
|
||||
val letterSourceEN: Stream[IO, Byte] =
|
||||
ExampleFiles.letter_en_pdf
|
||||
.readURL[IO](8 * 1024, blocker)
|
||||
|
||||
lazy val letterDEText =
|
||||
ExampleFiles.letter_de_txt
|
||||
.readText[IO](8 * 1024, blocker)
|
||||
.unsafeRunSync
|
||||
|
||||
lazy val letterENText =
|
||||
ExampleFiles.letter_en_txt
|
||||
.readText[IO](8 * 1024, blocker)
|
||||
.unsafeRunSync
|
||||
}
|
Reference in New Issue
Block a user