diff --git a/docker/joex.dockerfile b/docker/joex.dockerfile
index 636926bb..2ef463c1 100644
--- a/docker/joex.dockerfile
+++ b/docker/joex.dockerfile
@@ -19,6 +19,17 @@ RUN apk add --no-cache openjdk11-jre \
     ttf-dejavu \
     ttf-freefont \
     ttf-liberation \
+    libxml2-dev \
+    libxslt-dev \
+    pngquant \
+    zlib-dev \
+    g++ \
+    qpdf \
+    python3-dev \
+    libffi-dev\
+    qpdf-dev \
+  && pip3 install --upgrade pip \
+  && pip3 install ocrmypdf \
   && curl -Ls $UNO_URL -o /usr/local/bin/unoconv \
   && chmod +x /usr/local/bin/unoconv \
   && ln -s /usr/bin/python3 /usr/bin/python \
@@ -27,7 +38,7 @@ RUN apk add --no-cache openjdk11-jre \
   && curl -L -o docspell.zip https://github.com/eikek/docspell/releases/download/v0.8.0/docspell-joex-0.8.0.zip \
   && unzip docspell.zip \
   && rm docspell.zip \
-  && apk del curl unzip
+  && apk del curl unzip libxml2-dev libxslt-dev zlib-dev g++ python3-dev libffi-dev qpdf-dev
 
 COPY entrypoint-joex.sh /opt/entrypoint.sh
 
diff --git a/modules/convert/src/main/scala/docspell/convert/Conversion.scala b/modules/convert/src/main/scala/docspell/convert/Conversion.scala
index 518340f3..589e9db7 100644
--- a/modules/convert/src/main/scala/docspell/convert/Conversion.scala
+++ b/modules/convert/src/main/scala/docspell/convert/Conversion.scala
@@ -8,7 +8,7 @@ import fs2._
 
 import docspell.common._
 import docspell.convert.ConversionResult.Handler
-import docspell.convert.extern.{Tesseract, Unoconv, WkHtmlPdf}
+import docspell.convert.extern._
 import docspell.convert.flexmark.Markdown
 import docspell.files.{ImageSize, TikaMimetype}
 
@@ -35,7 +35,8 @@ object Conversion {
       ): F[A] =
         TikaMimetype.resolve(dataType, in).flatMap {
           case MimeType.PdfMatch(_) =>
-            handler.run(ConversionResult.successPdf(in))
+            OcrMyPdf
+              .toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, blocker, logger)(in, handler)
 
           case MimeType.HtmlMatch(mt) =>
             val cs = mt.charsetOrUtf8
diff --git a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala
index b268190c..f51791c0 100644
--- a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala
+++ b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala
@@ -1,5 +1,6 @@
 package docspell.convert
 
+import docspell.convert.extern.OcrMyPdfConfig
 import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
 import docspell.convert.flexmark.MarkdownConfig
 
@@ -9,5 +10,6 @@ case class ConvertConfig(
     markdown: MarkdownConfig,
     wkhtmlpdf: WkHtmlPdfConfig,
     tesseract: TesseractConfig,
-    unoconv: UnoconvConfig
+    unoconv: UnoconvConfig,
+    ocrmypdf: OcrMyPdfConfig
 )
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
index 677affdf..dcb02206 100644
--- a/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
+++ b/modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
@@ -41,7 +41,7 @@ private[extern] object ExternConv {
 
         in.through(createInput).flatMap { _ =>
           SystemCommand
-            .execSuccess[F](
+            .exec[F](
               sysCfg,
               blocker,
               logger,
@@ -65,11 +65,20 @@ private[extern] object ExternConv {
       logger: Logger[F]
   )(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
     File.existsNonEmpty[F](out).flatMap {
-      case true =>
-        if (result.rc == 0) successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
-        else
-          logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
+      case true if result.rc == 0 =>
+        val outTxt = out.resolveSibling(out.getFileName.toString + ".txt")
+        File.existsNonEmpty[F](outTxt).flatMap {
+          case true =>
+            successPdfTxt(
+              File.readAll(out, blocker, chunkSize),
+              File.readText(outTxt, blocker)
+            ).pure[F]
+          case false =>
             successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
+        }
+      case true if result.rc != 0 =>
+        logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
+          successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
 
       case false =>
         ConversionResult
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala
new file mode 100644
index 00000000..c57170d8
--- /dev/null
+++ b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdf.scala
@@ -0,0 +1,37 @@
+package docspell.convert.extern
+
+import java.nio.file.Path
+
+import cats.effect._
+import fs2.Stream
+
+import docspell.common._
+import docspell.convert.ConversionResult
+import docspell.convert.ConversionResult.Handler
+
+object OcrMyPdf {
+
+  def toPDF[F[_]: Sync: ContextShift, A](
+      cfg: OcrMyPdfConfig,
+      lang: Language,
+      chunkSize: Int,
+      blocker: Blocker,
+      logger: Logger[F]
+  )(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
+    if (cfg.enabled) {
+      val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
+        ExternConv.readResult[F](blocker, chunkSize, logger)
+
+      ExternConv.toPDF[F, A](
+        "ocrmypdf",
+        cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
+        cfg.workingDir,
+        false,
+        blocker,
+        logger,
+        reader
+      )(in, handler)
+    } else
+      handler(ConversionResult.unsupportedFormat(MimeType.pdf))
+
+}
diff --git a/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala
new file mode 100644
index 00000000..218e52ad
--- /dev/null
+++ b/modules/convert/src/main/scala/docspell/convert/extern/OcrMyPdfConfig.scala
@@ -0,0 +1,11 @@
+package docspell.convert.extern
+
+import java.nio.file.Path
+
+import docspell.common.SystemCommand
+
+case class OcrMyPdfConfig(
+    enabled: Boolean,
+    command: SystemCommand.Config,
+    workingDir: Path
+)
diff --git a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
index e018ec8e..ab2bbc1a 100644
--- a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
+++ b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala
@@ -12,6 +12,7 @@ import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
 import docspell.convert.flexmark.MarkdownConfig
 import docspell.files.{ExampleFiles, TestFiles}
 import minitest.SimpleTestSuite
+import docspell.convert.extern.OcrMyPdfConfig
 
 object ConversionTest extends SimpleTestSuite with FileChecks {
   val blocker     = TestFiles.blocker
@@ -47,6 +48,24 @@ object ConversionTest extends SimpleTestSuite with FileChecks {
         Duration.seconds(20)
       ),
       target
+    ),
+    OcrMyPdfConfig(
+      true,
+      SystemCommand.Config(
+        "ocrmypdf",
+        Seq(
+          "-l",
+          "{{lang}}",
+          "--skip-text",
+          "--deskew",
+          "-j",
+          "1",
+          "{{infile}}",
+          "{{outfile}}"
+        ),
+        Duration.seconds(20)
+      ),
+      target
     )
   )
 
diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf
index f9d51cae..059e6d05 100644
--- a/modules/joex/src/main/resources/reference.conf
+++ b/modules/joex/src/main/resources/reference.conf
@@ -339,6 +339,39 @@ docspell.joex {
       }
       working-dir = ${java.io.tmpdir}"/docspell-convert"
     }
+
+    # The tool ocrmypdf can be used to convert pdf files to pdf files
+    # in order to add extracted text as a separate layer. This makes
+    # image-only pdfs searchable and you can select and copy/paste the
+    # text. It also converts pdfs into pdf/a type pdfs, which are best
+    # suited for archiving. So it makes sense to use this even for
+    # text-only pdfs.
+    #
+    # It is recommended to install ocrympdf, but it also is optional.
+    # If it is enabled but fails, the error is not fatal and the
+    # processing will continue using the original pdf for extracting
+    # text. You can also disable it to remove the errors from the
+    # processing logs.
+    #
+    # The `--skip-text` option is necessary to not fail on "text" pdfs
+    # (where ocr is not necessary). In this case, the pdf will be
+    # converted to PDF/A.
+    ocrmypdf = {
+      enabled = true
+      command = {
+        program = "ocrmypdf"
+        args = [
+          "-l", "{{lang}}",
+          "--skip-text",
+          "--deskew",
+          "-j", "1",
+          "{{infile}}",
+          "{{outfile}}"
+        ]
+        timeout = "5 minutes"
+      }
+      working-dir = ${java.io.tmpdir}"/docspell-convert"
+    }
   }
 
   # General config for processing documents
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
index b571a306..ba75ec3a 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
@@ -64,10 +64,6 @@ object ConvertPdf {
   )(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
     Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv =>
       mime.toLocal match {
-        case MimeType.PdfMatch(_) =>
-          ctx.logger.debug(s"Not going to convert a PDF file ${ra.name} into a PDF.") *>
-            (ra, None: Option[RAttachmentMeta]).pure[F]
-
         case mt =>
           val data = ctx.store.bitpeace
             .get(ra.fileId.id)
diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
index 912507a5..384741e2 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
@@ -85,9 +85,10 @@ object TextExtraction {
       item: ItemData
   )(ra: RAttachment): F[RAttachmentMeta] =
     for {
-      _   <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}")
-      dst <- Duration.stopTime[F]
-      txt <- extractTextFallback(ctx, cfg, ra, lang)(filesToExtract(item, ra))
+      _    <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}")
+      dst  <- Duration.stopTime[F]
+      fids <- filesToExtract(ctx)(item, ra)
+      txt  <- extractTextFallback(ctx, cfg, ra, lang)(fids)
       meta = item.changeMeta(
         ra.id,
         rm => rm.setContentIfEmpty(txt.map(_.trim).filter(_.nonEmpty))
@@ -151,11 +152,24 @@ object TextExtraction {
 
   /** Returns the fileIds to extract text from. First, the source file
     * is tried. If that fails, the converted file is tried.
+    *
+    * If the source file is a PDF, then use the converted file. This
+    * may then already contain the text if ocrmypdf is enabled. If it
+    * is disabled, both files are the same.
     */
-  private def filesToExtract(item: ItemData, ra: RAttachment): List[Ident] =
+  private def filesToExtract[F[_]: Sync](ctx: Context[F, _])(
+      item: ItemData,
+      ra: RAttachment
+  ): F[List[Ident]] =
     item.originFile.get(ra.id) match {
-      case Some(sid) => List(sid, ra.fileId).distinct
-      case None      => List(ra.fileId)
+      case Some(sid) =>
+        ctx.store.transact(RFileMeta.findMime(sid)).map {
+          case Some(MimeType.PdfMatch(_)) =>
+            List(ra.fileId)
+          case _ =>
+            List(sid, ra.fileId).distinct
+        }
+      case None => List(ra.fileId).pure[F]
     }
 
   private def stripAttachmentName(ra: RAttachment): String =
diff --git a/modules/microsite/docs/dev/adr.md b/modules/microsite/docs/dev/adr.md
index 67872229..8410f065 100644
--- a/modules/microsite/docs/dev/adr.md
+++ b/modules/microsite/docs/dev/adr.md
@@ -23,3 +23,4 @@ Some early information about certain details can be found in a few
 - [0012 Periodic Tasks](adr/0012_periodic_tasks)
 - [0013 Archive Files](adr/0013_archive_files)
 - [0014 Full-Text Search](adr/0014_fulltext_search_engine)
+- [0015 Convert PDF files](adr/0015_convert_pdf_files)
diff --git a/modules/microsite/docs/dev/adr/0015_convert_pdf_files.md b/modules/microsite/docs/dev/adr/0015_convert_pdf_files.md
new file mode 100644
index 00000000..b2f3ec02
--- /dev/null
+++ b/modules/microsite/docs/dev/adr/0015_convert_pdf_files.md
@@ -0,0 +1,67 @@
+---
+layout: docs
+title: Convert PDF Files
+permalink: dev/adr/0015_convert_pdf_files
+---
+
+# {{ page.title }}
+
+## Context and Problem Statement
+
+Some PDFs contain only images (when coming from a scanner) and
+therefore one is not able to click into the pdf and select text for
+copy&paste. Also it is not searchable in a PDF viewer. These are
+really shortcomings that can be fixed, especially when there is
+already OCR build in.
+
+For images, this works already as tesseract is used to create the PDF
+files. Tesseract creates the files with an additional text layer
+containing the OCRed text.
+
+## Considered Options
+
+* [ocrmypdf](https://github.com/jbarlow83/OCRmyPDF) OCRmyPDF adds an
+  OCR text layer to scanned PDF files, allowing them to be searched
+
+
+### ocrmypdf
+
+This is a very nice python tool, that uses tesseract to do OCR on each
+page and add the extracted text as a pdf text layer to the page.
+Additionally it creates PDF/A type pdfs, which are great for
+archiving. This fixes exactly the things stated above.
+
+#### Integration
+
+Docspell already has this built in for images. When converting images
+to a PDF (which is done early in processing), the process creates a
+text and a PDF file. Docspell then sets the text in this step and the
+text extraction step skips doing its work, if there is already text
+available.
+
+It would be possible to use the `--sidecar` option with ocrmypdf to
+create a text file of the extracted text with one run, too (exactly
+like it works for tesseract). But for "text" pdfs, ocrmypdf writes
+some info-message into this text file:
+
+```
+[OCR skipped on page 1][OCR skipped on page 2]
+```
+
+Docspell cannot reliably tell, wether this is extracted text or not.
+It would be reqiured to load the pdf and check its contents. This is a
+bit of bad luck, because everything would just work already. So it
+requires a (small) change in the text-extraction step. By default,
+text extraction happens on the source file. For PDFs, text extraction
+should now be run on the converted file, to avoid running OCR twice.
+
+The converted pdf file is either be a text-pdf in the first place,
+where ocrmypdf would only convert it to a PDF/A file; or it may be a
+converted file containing the OCR-ed text as a pdf layer. If ocrmypdf
+is disabled, the converted file and the source file are the same for
+PDFs.
+
+## Decision Outcome
+
+Add ocrmypdf as an optional conversion from PDF to PDF. Ocrmypdf is
+distributed under the GPL-3 license.
diff --git a/modules/microsite/docs/doc/install.md b/modules/microsite/docs/doc/install.md
index ae84ae5b..90236233 100644
--- a/modules/microsite/docs/doc/install.md
+++ b/modules/microsite/docs/doc/install.md
@@ -77,6 +77,10 @@ component.
   office documents into PDF files. It uses libreoffice/openoffice.
 - [wkhtmltopdf](https://wkhtmltopdf.org/) is used to convert HTML into
   PDF files.
+- [OCRmyPDF](https://github.com/jbarlow83/OCRmyPDF) can be optionally
+  used to convert PDF to PDF files. It adds an OCR layer to scanned
+  PDF files to make them searchable. It also creates PDF/A files from
+  the input pdf.
 
 The performance of `unoconv` can be improved by starting `unoconv -l`
 in a separate process. This runs a libreoffice/openoffice listener
@@ -87,7 +91,7 @@ therefore avoids starting one each time `unoconv` is called.
 On Debian this should install all joex requirements:
 
 ``` bash
-sudo apt-get install ghostscript tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng unpaper unoconv wkhtmltopdf
+sudo apt-get install ghostscript tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng unpaper unoconv wkhtmltopdf ocrmypdf
 ```
 
 
diff --git a/modules/microsite/docs/features.md b/modules/microsite/docs/features.md
index 0adbe905..85af705a 100644
--- a/modules/microsite/docs/features.md
+++ b/modules/microsite/docs/features.md
@@ -13,7 +13,9 @@ permalink: features
 - OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
 - [Full-Text Search](doc/finding#full-text-search) based on [Apache
   SOLR](https://lucene.apache.org/solr)
-- Conversion to PDF: all files are converted into a PDF file
+- Conversion to PDF: all files are converted into a PDF file. PDFs
+  with only images (as often returned from scanners) are converted
+  into searchable PDF/A pdfs.
 - Non-destructive: all your uploaded files are never modified and can
   always be downloaded untouched
 - Text is analysed to find and attach meta data automatically
diff --git a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
index dc04054d..076bfd68 100644
--- a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
@@ -3,8 +3,10 @@ package docspell.store.records
 import docspell.common._
 import docspell.store.impl.Implicits._
 import docspell.store.impl._
+import docspell.store.syntax.MimeTypes._
 
 import bitpeace.FileMeta
+import bitpeace.Mimetype
 import doobie._
 import doobie.implicits._
 
@@ -30,4 +32,13 @@ object RFileMeta {
 
     selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
   }
+
+  def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
+    import bitpeace.sql._
+
+    selectSimple(Seq(Columns.mimetype), table, Columns.id.is(fid))
+      .query[Mimetype]
+      .option
+      .map(_.map(_.toLocal))
+  }
 }
diff --git a/nix/module-joex.nix b/nix/module-joex.nix
index d92afc02..6e16581f 100644
--- a/nix/module-joex.nix
+++ b/nix/module-joex.nix
@@ -131,6 +131,23 @@ let
         };
         working-dir = "/tmp/docspell-convert";
       };
+
+      ocrmypdf = {
+        enabled = true;
+        command = {
+          program = "${pkgs.ocrmypdf}/bin/ocrmypdf";
+          args = [
+          "-l" "{{lang}}"
+          "--skip-text"
+          "--deskew"
+          "-j" "1"
+          "{{infile}}"
+          "{{outfile}}"
+          ];
+          timeout = "5 minutes";
+        };
+        working-dir = "/tmp/docspell-convert";
+      };
     };
     files = {
       chunk-size = 524288;
@@ -860,6 +877,66 @@ in {
                 process.
               '';
             };
+
+            ocrmypdf = mkOption {
+              type = types.submodule({
+                options = {
+                  enabled = mkOption {
+                    type = types.bool;
+                    default = defaults.convert.ocrmypdf.enabled;
+                    description = "Whether to use ocrmypdf to convert pdf to pdf/a.";
+                  };
+                  working-dir = mkOption {
+                    type = types.str;
+                    default = defaults.convert.ocrmypdf.working-dir;
+                    description = "Directory where the conversion processes can put their temp files";
+                  };
+                  command = mkOption {
+                    type = types.submodule({
+                      options = {
+                        program = mkOption {
+                          type = types.str;
+                          default = defaults.convert.ocrmypdf.command.program;
+                          description = "The path to the executable.";
+                        };
+                        args = mkOption {
+                          type = types.listOf types.str;
+                          default = defaults.convert.ocrmypdf.command.args;
+                          description = "The arguments to the program";
+                        };
+                        timeout = mkOption {
+                          type = types.str;
+                          default = defaults.convert.ocrmypdf.command.timeout;
+                          description = "The timeout when executing the command";
+                        };
+                      };
+                    });
+                    default = defaults.convert.ocrmypdf.command;
+                    description = "The system command";
+                  };
+                };
+              });
+              default = defaults.convert.orcmypdf;
+              description = ''
+                The tool ocrmypdf can be used to convert pdf files to pdf files
+                in order to add extracted text as a separate layer. This makes
+                image-only pdfs searchable and you can select and copy/paste the
+                text. It also converts pdfs into pdf/a type pdfs, which are best
+                suited for archiving. So it makes sense to use this even for
+                text-only pdfs.
+
+                It is recommended to install ocrympdf, but it also is optional.
+                If it is enabled but fails, the error is not fatal and the
+                processing will continue using the original pdf for extracting
+                text. You can also disable it to remove the errors from the
+                processing logs.
+
+                The `--skip-text` option is necessary to not fail on "text" pdfs
+                (where ocr is not necessary). In this case, the pdf will be
+                converted to PDF/A.
+              '';
+            };
+
           };
         });
         default = defaults.convert;