diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala
index 4663d1c8..eb450ae9 100644
--- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala
+++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala
@@ -8,7 +8,8 @@ final case class PdfMetaData(
     subject: Option[String],
     keywords: Option[String],
     creator: Option[String],
-    creationDate: Option[Timestamp]
+    creationDate: Option[Timestamp],
+    pageCount: Int
 ) {
 
   def isEmpty: Boolean =
@@ -17,7 +18,8 @@ final case class PdfMetaData(
       subject.isEmpty &&
       keywords.isEmpty &&
       creator.isEmpty &&
-      creationDate.isEmpty
+      creationDate.isEmpty &&
+      pageCount <= 0
 
   def nonEmpty: Boolean =
     !isEmpty
@@ -36,5 +38,5 @@ final case class PdfMetaData(
 }
 
 object PdfMetaData {
-  val empty = PdfMetaData(None, None, None, None, None, None)
+  val empty = PdfMetaData(None, None, None, None, None, None, 0)
 }
diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
index def9c8ee..d3267503 100644
--- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
@@ -20,21 +20,23 @@ object PdfboxExtract {
   def getTextAndMetaData[F[_]: Sync](
       data: Stream[F, Byte]
   ): F[Either[Throwable, (Text, Option[PdfMetaData])]] =
-    data.compile
-      .to(Array)
-      .map(bytes =>
-        Using(PDDocument.load(bytes)) { doc =>
-          for {
-            txt <- readText(doc)
-            md  <- readMetaData(doc)
-          } yield (txt, Some(md).filter(_.nonEmpty))
-        }.toEither.flatten
-      )
+    PdfLoader
+      .withDocumentStream(data) { doc =>
+        (for {
+          txt <- readText(doc)
+          md  <- readMetaData(doc)
+        } yield (txt, Some(md).filter(_.nonEmpty))).pure[F]
+      }
+      .attempt
+      .map(_.flatten)
 
   def getText[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
-    data.compile
-      .to(Array)
-      .map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten)
+    PdfLoader
+      .withDocumentStream(data) { doc =>
+        readText(doc).pure[F]
+      }
+      .attempt
+      .map(_.flatten)
 
   def getText(is: InputStream): Either[Throwable, Text] =
     Using(PDDocument.load(is))(readText).toEither.flatten
@@ -51,9 +53,10 @@ object PdfboxExtract {
     }.toEither
 
   def getMetaData[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, PdfMetaData]] =
-    data.compile
-      .to(Array)
-      .map(bytes => Using(PDDocument.load(bytes))(readMetaData).toEither.flatten)
+    PdfLoader
+      .withDocumentStream(data)(doc => readMetaData(doc).pure[F])
+      .attempt
+      .map(_.flatten)
 
   def getMetaData(is: InputStream): Either[Throwable, PdfMetaData] =
     Using(PDDocument.load(is))(readMetaData).toEither.flatten
@@ -73,7 +76,8 @@ object PdfboxExtract {
         mkValue(info.getSubject),
         mkValue(info.getKeywords),
         mkValue(info.getCreator),
-        Option(info.getCreationDate).map(c => Timestamp(c.toInstant))
+        Option(info.getCreationDate).map(c => Timestamp(c.toInstant)),
+        doc.getNumberOfPages()
       )
     }.toEither
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
new file mode 100644
index 00000000..c1dbe7e4
--- /dev/null
+++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
@@ -0,0 +1,83 @@
+package docspell.joex.process
+
+import cats.Functor
+import cats.data.OptionT
+import cats.effect._
+import cats.implicits._
+import fs2.Stream
+
+import docspell.common._
+import docspell.extract.pdfbox.PdfMetaData
+import docspell.extract.pdfbox.PdfboxExtract
+import docspell.joex.scheduler._
+import docspell.store.records.RAttachment
+import docspell.store.records._
+import docspell.store.syntax.MimeTypes._
+
+import bitpeace.{Mimetype, RangeDef}
+
+/** Goes through all attachments that must be already converted into a
+  * pdf. If it is a pdf, the number of pages are retrieved and stored
+  * in the attachment metadata.
+  */
+object AttachmentPageCount {
+
+  def apply[F[_]: Sync: ContextShift]()(
+      item: ItemData
+  ): Task[F, ProcessItemArgs, ItemData] =
+    Task { ctx =>
+      for {
+        _ <- ctx.logger.info(
+          s"Retrieving page count for ${item.attachments.size} files…"
+        )
+        _ <- item.attachments
+          .traverse(createPageCount(ctx))
+          .attempt
+          .flatMap {
+            case Right(_) => ().pure[F]
+            case Left(ex) =>
+              ctx.logger.error(ex)(
+                s"Retrieving page counts failed, continuing without it."
+              )
+          }
+      } yield item
+    }
+
+  def createPageCount[F[_]: Sync](
+      ctx: Context[F, _]
+  )(ra: RAttachment): F[Option[PdfMetaData]] =
+    findMime[F](ctx)(ra).flatMap {
+      case MimeType.PdfMatch(_) =>
+        PdfboxExtract.getMetaData(loadFile(ctx)(ra)).flatMap {
+          case Right(md) =>
+            updatePageCount(ctx, md, ra).map(_.some)
+          case Left(ex) =>
+            ctx.logger.warn(s"Error obtaining pages count: ${ex.getMessage}") *>
+              (None: Option[PdfMetaData]).pure[F]
+        }
+
+      case _ =>
+        (None: Option[PdfMetaData]).pure[F]
+    }
+
+  private def updatePageCount[F[_]: Sync](
+      ctx: Context[F, _],
+      md: PdfMetaData,
+      ra: RAttachment
+  ): F[PdfMetaData] =
+    ctx.store.transact(RAttachmentMeta.updatePageCount(ra.id, md.pageCount.some)) *> md
+      .pure[F]
+
+  def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] =
+    OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
+      .map(_.mimetype)
+      .getOrElse(Mimetype.`application/octet-stream`)
+      .map(_.toLocal)
+
+  def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
+    ctx.store.bitpeace
+      .get(ra.fileId.id)
+      .unNoneTerminate
+      .through(ctx.store.bitpeace.fetchData2(RangeDef.all))
+
+}
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
index 8caf25fb..56f3cd33 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala
@@ -55,6 +55,7 @@ object ProcessItem {
       .flatMap(Task.setProgress(progress._1))
       .flatMap(TextExtraction(cfg.extraction, fts))
       .flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview))
+      .flatMap(AttachmentPageCount())
       .flatMap(Task.setProgress(progress._2))
       .flatMap(analysisOnly[F](cfg, analyser, regexNer))
       .flatMap(Task.setProgress(progress._3))
diff --git a/modules/store/src/main/resources/db/migration/h2/V1.11.0__pdf_pages.sql b/modules/store/src/main/resources/db/migration/h2/V1.11.0__pdf_pages.sql
new file mode 100644
index 00000000..ca347ea6
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/h2/V1.11.0__pdf_pages.sql
@@ -0,0 +1,2 @@
+ALTER TABLE "attachmentmeta"
+ADD COLUMN "page_count" smallint;
diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.11.0__pdf_pages.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.11.0__pdf_pages.sql
new file mode 100644
index 00000000..fd580127
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/mariadb/V1.11.0__pdf_pages.sql
@@ -0,0 +1,2 @@
+ALTER TABLE `attachmentmeta`
+ADD COLUMN (`page_count` SMALLINT);
diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.11.0__pdf_pages.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.11.0__pdf_pages.sql
new file mode 100644
index 00000000..ca347ea6
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/postgresql/V1.11.0__pdf_pages.sql
@@ -0,0 +1,2 @@
+ALTER TABLE "attachmentmeta"
+ADD COLUMN "page_count" smallint;
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
index d1cb79ea..833bfeca 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
@@ -13,17 +13,21 @@ case class RAttachmentMeta(
     id: Ident, //same as RAttachment.id
     content: Option[String],
     nerlabels: List[NerLabel],
-    proposals: MetaProposalList
+    proposals: MetaProposalList,
+    pages: Option[Int]
 ) {
 
   def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
     if (content.forall(_.trim.isEmpty)) copy(content = txt)
     else this
+
+  def withPageCount(count: Option[Int]): RAttachmentMeta =
+    copy(pages = count)
 }
 
 object RAttachmentMeta {
   def empty(attachId: Ident) =
-    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty)
+    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None)
 
   val table = fr"attachmentmeta"
 
@@ -32,7 +36,8 @@ object RAttachmentMeta {
     val content   = Column("content")
     val nerlabels = Column("nerlabels")
     val proposals = Column("itemproposals")
-    val all       = List(id, content, nerlabels, proposals)
+    val pages     = Column("page_count")
+    val all       = List(id, content, nerlabels, proposals, pages)
   }
   import Columns._
 
@@ -40,7 +45,7 @@ object RAttachmentMeta {
     insertRow(
       table,
       all,
-      fr"${v.id},${v.content},${v.nerlabels},${v.proposals}"
+      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages}"
     ).update.run
 
   def exists(attachId: Ident): ConnectionIO[Boolean] =
@@ -84,6 +89,9 @@ object RAttachmentMeta {
       )
     ).update.run
 
+  def updatePageCount(mid: Ident, pageCount: Option[Int]): ConnectionIO[Int] =
+    updateRow(table, id.is(mid), pages.setTo(pageCount)).update.run
+
   def delete(attachId: Ident): ConnectionIO[Int] =
     deleteFrom(table, id.is(attachId)).update.run
 }