From a36c499fb1fe058faaa0f08fab1058ca21176644 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Fri, 25 Sep 2020 21:58:02 +0200
Subject: [PATCH] Update docs on rpi regarding requirements for joex

---
 nix/configuration-test.nix               |  2 +-
 website/site/content/docs/install/rpi.md | 35 +++++++++++++++---------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/nix/configuration-test.nix b/nix/configuration-test.nix
index 7c70d9cf..33d3e9a3 100644
--- a/nix/configuration-test.nix
+++ b/nix/configuration-test.nix
@@ -35,7 +35,7 @@ in
     waitForTarget = "solr-init.target";
     bind.address = "0.0.0.0";
     base-url = "http://localhost:7878";
-    jvmArgs = [ "-J-Xmx2g" ];
+    jvmArgs = [ "-J-Xmx1536M" ];
     inherit full-text-search;
   };
   services.docspell-restserver = {
diff --git a/website/site/content/docs/install/rpi.md b/website/site/content/docs/install/rpi.md
index 3fb19450..ebbf9771 100644
--- a/website/site/content/docs/install/rpi.md
+++ b/website/site/content/docs/install/rpi.md
@@ -19,19 +19,28 @@ work is done by the joex components.
 ## Joex
 
 Running the joex component on the Raspberry Pi is possible, but will
-result in long processing times for OCR. Files that don't require OCR
-are no problem.
+result in long processing times for OCR and text analysis. The board
+should provide 4G of RAM (like the current RPi4), especially if also a
+database and solr are running next to it. I recommend to give joex a
+heap of 1.5G (`-J-Xmx1536M`). You should also set the joex pool size
+to 1.
 
-Tested on a RPi model 3 (4 cores, 1G RAM) processing a PDF (scanned
-with 300dpi) with two pages took 9:52. You can speed it up
-considerably by uninstalling the `unpaper` command, because this step
-takes quite long. This, of course, reduces the quality of OCR. But
-without `unpaper` the same sample pdf was then processed in 1:24, a
-speedup of 8 minutes.
+When joex processes the first file, some models are built loaded into
+memory which can take a while. Subsequent processing times are faster
+then.
 
-You should limit the joex pool size to 1 and, depending on your model
-and the amount of RAM, set a heap size of at least 500M
-(`-J-Xmx500M`).
+An example: on this [UP
+board](https://up-board.org/up/specifications/) with an Intel Atom
+x5-Z8350 CPU (@1.44Ghz) and 4G RAM, a scanned (300dpi) pdf file with 6
+pages took *3:20 min* to process. This board also runs the SOLR and a
+postgresql database.
 
-For personal setups, when you don't need the processing results asap,
-this can work well enough.
+The same file was processed in 55s on a qemu virtual machine on my i7
+notebook, using 1 CPU and 4G RAM (and identical config for joex). The
+virtual machine only had to host docspell (joex and restserver, but
+the restserver is very lightweight).
+
+The learning task for text classification can also use high amount of
+memory, but this depends on the amount of data you have in docspell.
+If you encounter problems here, you can set the maximum amount of
+items to consider in the collective settings page.