10
.gitignore
vendored
@ -1,7 +1,15 @@
|
||||
#artwork/*.png
|
||||
target/
|
||||
node_modules/
|
||||
dev.conf
|
||||
elm-stuff
|
||||
result
|
||||
_site/
|
||||
*.qcow2
|
||||
*.qcow2
|
||||
/website/site/content/docs/changelog/
|
||||
/website/site/public/
|
||||
/website/site/static/openapi/
|
||||
/website/site/static/js/bundle.js
|
||||
/website/site/templates/shortcodes/server.conf
|
||||
/website/site/templates/shortcodes/sample-exim.conf
|
||||
/website/site/templates/shortcodes/joex.conf
|
||||
|
200
artwork/poster.svg
Normal file
@ -0,0 +1,200 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="2000mm"
|
||||
height="1000mm"
|
||||
viewBox="0 0 2000 1000"
|
||||
version="1.1"
|
||||
id="svg8"
|
||||
inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
|
||||
sodipodi:docname="poster.svg">
|
||||
<defs
|
||||
id="defs2" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="0.12374369"
|
||||
inkscape:cx="2551.9645"
|
||||
inkscape:cy="1546.712"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
inkscape:window-width="1896"
|
||||
inkscape:window-height="1041"
|
||||
inkscape:window-x="3844"
|
||||
inkscape:window-y="1099"
|
||||
inkscape:window-maximized="0">
|
||||
<sodipodi:guide
|
||||
position="1000.6571,0"
|
||||
orientation="0,1"
|
||||
id="guide1183"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="1372.6963,999.99999"
|
||||
orientation="0,1"
|
||||
id="guide1185"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Layer 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(0,703)">
|
||||
<rect
|
||||
ry="4.4855337"
|
||||
y="-703"
|
||||
x="-2.1381574"
|
||||
height="1000"
|
||||
width="2003.4524"
|
||||
id="rect1207"
|
||||
style="opacity:1;fill:#f0f8ff;fill-opacity:1;fill-rule:evenodd;stroke:#2a1fda;stroke-width:0.40000001;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<g
|
||||
transform="matrix(3.0441013,0,0,3.0441013,291.09407,-503.8566)"
|
||||
id="layer1-3"
|
||||
inkscape:label="Ebene 1">
|
||||
<path
|
||||
d="M 168.49635,73.552185 A 67.257568,67.257568 0 0 1 119.72327,155.19352 67.257568,67.257568 0 0 1 38.065555,106.44787 67.257568,67.257568 0 0 1 86.783766,24.773782 67.257568,67.257568 0 0 1 168.47422,73.46455"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="6.0351217"
|
||||
sodipodi:start="6.0364656"
|
||||
sodipodi:ry="67.257568"
|
||||
sodipodi:rx="67.257568"
|
||||
sodipodi:cy="89.978119"
|
||||
sodipodi:cx="103.27543"
|
||||
sodipodi:type="arc"
|
||||
id="path4746"
|
||||
style="opacity:1;fill:#1c2d61;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.48060441;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
d="M 150.97869,78.020673 A 48.911774,48.911774 0 0 1 115.50941,137.39277 48.911774,48.911774 0 0 1 56.1254,101.94343 48.911774,48.911774 0 0 1 91.554782,42.547517 48.911774,48.911774 0 0 1 150.9626,77.956942"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="6.0351217"
|
||||
sodipodi:start="6.0364656"
|
||||
sodipodi:ry="48.911774"
|
||||
sodipodi:rx="48.911774"
|
||||
sodipodi:cy="89.966118"
|
||||
sodipodi:cx="103.54803"
|
||||
sodipodi:type="arc"
|
||||
id="path4746-6"
|
||||
style="opacity:1;fill:#f0f8ff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.34951028;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<g
|
||||
style="fill:#9c0011;fill-opacity:1;stroke:none"
|
||||
transform="translate(8.5571306,109.20034)"
|
||||
id="g4744">
|
||||
<path
|
||||
style="opacity:1;fill:#9c0011;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.12413885;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
|
||||
id="path4677-7-3-9"
|
||||
sodipodi:type="arc"
|
||||
sodipodi:cx="94.276443"
|
||||
sodipodi:cy="-19.170753"
|
||||
sodipodi:rx="34.40884"
|
||||
sodipodi:ry="35.990559"
|
||||
sodipodi:start="6.0364643"
|
||||
sodipodi:end="6.0351217"
|
||||
d="M 127.64333,-27.960568 A 34.40884,35.990559 0 0 1 102.69116,15.727003 34.40884,35.990559 0 0 1 60.915205,-10.357512 34.40884,35.990559 0 0 1 85.83933,-54.062594 34.40884,35.990559 0 0 1 127.63202,-28.007418"
|
||||
sodipodi:open="true" />
|
||||
<rect
|
||||
style="opacity:1;fill:#9c0011;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.15839411;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
|
||||
id="rect4736-4"
|
||||
width="15.056865"
|
||||
height="97.792213"
|
||||
x="112.29646"
|
||||
y="-93.584991"
|
||||
ry="6.490634"
|
||||
transform="matrix(0.9932084,0.11634897,-0.12978749,0.99154183,0,0)" />
|
||||
<flowRoot
|
||||
transform="matrix(0.26458333,0,0,0.26458333,-8.5571306,-109.20034)"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:192px;line-height:1.25;font-family:'Liberation Sans';-inkscape-font-specification:'Liberation Sans';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
|
||||
id="flowRoot4789"
|
||||
xml:space="preserve"><flowRegion
|
||||
style="font-size:192px"
|
||||
id="flowRegion4791"><rect
|
||||
style="font-size:192px"
|
||||
y="733.94824"
|
||||
x="480"
|
||||
height="308.57144"
|
||||
width="200"
|
||||
id="rect4793" /></flowRegion><flowPara
|
||||
id="flowPara4795" /></flowRoot> </g>
|
||||
</g>
|
||||
<flowRoot
|
||||
xml:space="preserve"
|
||||
id="flowRoot1151"
|
||||
style="fill:black;fill-opacity:1;stroke:none;font-family:'Liberation Serif';font-style:normal;font-weight:normal;font-size:16px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Liberation Serif';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><flowRegion
|
||||
id="flowRegion1153"
|
||||
style="-inkscape-font-specification:'Liberation Serif';font-family:'Liberation Serif';font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;"><rect
|
||||
id="rect1155"
|
||||
width="3777.1428"
|
||||
height="1451.4286"
|
||||
x="2051.4285"
|
||||
y="956.67047"
|
||||
style="-inkscape-font-specification:'Liberation Serif';font-family:'Liberation Serif';font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;" /></flowRegion><flowPara
|
||||
id="flowPara1157"></flowPara></flowRoot> <flowRoot
|
||||
xml:space="preserve"
|
||||
id="flowRoot1159"
|
||||
style="fill:black;fill-opacity:1;stroke:none;font-family:'Liberation Serif';font-style:normal;font-weight:normal;font-size:64px;line-height:1.25;letter-spacing:0px;word-spacing:0px;-inkscape-font-specification:'Liberation Serif';font-stretch:normal;font-variant:normal;text-anchor:start;text-align:start;writing-mode:lr;font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;"><flowRegion
|
||||
id="flowRegion1161"
|
||||
style="-inkscape-font-specification:'Liberation Serif';font-family:'Liberation Serif';font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;font-size:64px;"><rect
|
||||
id="rect1163"
|
||||
width="2725.7144"
|
||||
height="651.42859"
|
||||
x="1668.5714"
|
||||
y="699.52759"
|
||||
style="-inkscape-font-specification:'Liberation Serif';font-family:'Liberation Serif';font-weight:normal;font-style:normal;font-stretch:normal;font-variant:normal;font-size:64px;" /></flowRegion><flowPara
|
||||
id="flowPara1165"></flowPara></flowRoot> <g
|
||||
transform="translate(-21.48901,-40.195179)"
|
||||
id="g1205">
|
||||
<text
|
||||
id="text1169"
|
||||
y="-194.97508"
|
||||
x="940.54144"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12.43396187px;line-height:1.25;font-family:'Anonymous Pro';-inkscape-font-specification:'Anonymous Pro';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.77712262"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:149.20755005px;font-family:'Liberation Serif';-inkscape-font-specification:'Liberation Serif';stroke-width:0.77712262"
|
||||
y="-194.97508"
|
||||
x="940.54144"
|
||||
id="tspan1167"
|
||||
sodipodi:role="line">Docspell</tspan></text>
|
||||
<text
|
||||
id="text1173"
|
||||
y="-70.511375"
|
||||
x="862.69733"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.96039152px;line-height:1.25;font-family:'Anonymous Pro';-inkscape-font-specification:'Anonymous Pro';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.31002447"
|
||||
xml:space="preserve"><tspan
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:59.52470016px;font-family:'Liberation Sans';-inkscape-font-specification:'Liberation Sans';stroke-width:0.31002447"
|
||||
y="-70.511375"
|
||||
x="862.69733"
|
||||
id="tspan1171"
|
||||
sodipodi:role="line">Simple Document Organizer</tspan></text>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 11 KiB |
@ -1,159 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="14.60939mm"
|
||||
height="14.738198mm"
|
||||
viewBox="0 0 14.60939 14.738198"
|
||||
version="1.1"
|
||||
id="svg5908"
|
||||
inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
|
||||
sodipodi:docname="search.svg">
|
||||
<defs
|
||||
id="defs5902">
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient6185">
|
||||
<stop
|
||||
style="stop-color:#90b4bc;stop-opacity:1"
|
||||
offset="0"
|
||||
id="stop6181" />
|
||||
<stop
|
||||
style="stop-color:#cbf4f2;stop-opacity:0"
|
||||
offset="1"
|
||||
id="stop6183" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5984">
|
||||
<stop
|
||||
style="stop-color:#172651;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5980" />
|
||||
<stop
|
||||
style="stop-color:#172651;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop5982" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5984"
|
||||
id="linearGradient5986"
|
||||
x1="83.017578"
|
||||
y1="47.484482"
|
||||
x2="124.26878"
|
||||
y2="46.241692"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5984"
|
||||
id="linearGradient5993"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="83.017578"
|
||||
y1="47.484482"
|
||||
x2="124.26878"
|
||||
y2="46.241692" />
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient6185"
|
||||
id="linearGradient6195"
|
||||
x1="13.525695"
|
||||
y1="77.657806"
|
||||
x2="52.648178"
|
||||
y2="59.159847"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="10.24"
|
||||
inkscape:cx="-7.723604"
|
||||
inkscape:cy="30.408526"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="g5991"
|
||||
showgrid="false"
|
||||
inkscape:window-width="3838"
|
||||
inkscape:window-height="2141"
|
||||
inkscape:window-x="1"
|
||||
inkscape:window-y="18"
|
||||
inkscape:window-maximized="0" />
|
||||
<metadata
|
||||
id="metadata5905">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Ebene 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(-13.241106,-79.788687)">
|
||||
<g
|
||||
id="g5991"
|
||||
transform="matrix(0.20833464,0,0,0.2961247,10.471793,65.245691)">
|
||||
<rect
|
||||
transform="rotate(27.148744)"
|
||||
ry="4.4855337"
|
||||
y="40.750698"
|
||||
x="70.522202"
|
||||
height="12.160764"
|
||||
width="45.302185"
|
||||
id="rect5929"
|
||||
style="opacity:1;fill:url(#linearGradient5993);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
d="M 35.603664,83.599529 A 17.29241,17.29241 0 0 1 14.40183,71.443624 17.29241,17.29241 0 0 1 26.530387,50.226135 17.29241,17.29241 0 0 1 47.763498,62.327325 17.29241,17.29241 0 0 1 35.689695,83.57602"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="1.3014728"
|
||||
sodipodi:start="1.3066303"
|
||||
sodipodi:ry="17.29241"
|
||||
sodipodi:rx="17.29241"
|
||||
sodipodi:cy="66.906982"
|
||||
sodipodi:cx="31.088541"
|
||||
sodipodi:type="arc"
|
||||
id="path5912"
|
||||
style="opacity:1;fill:#172651;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
d="M 35.622879,82.440124 A 15.823282,15.823283 0 0 1 16.22231,71.316959 15.823282,15.823283 0 0 1 27.32045,51.902063 15.823282,15.823283 0 0 1 46.749639,62.975161 15.823282,15.823283 0 0 1 35.701602,82.418612"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="1.3014728"
|
||||
sodipodi:start="1.3066303"
|
||||
sodipodi:ry="15.823283"
|
||||
sodipodi:rx="15.823282"
|
||||
sodipodi:cy="67.165741"
|
||||
sodipodi:cx="31.491352"
|
||||
sodipodi:type="arc"
|
||||
id="path5912-7-3"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.91504204;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
d="M 35.048067,82.374388 A 15.823282,15.823282 0 0 1 15.647498,71.251224 15.823282,15.823282 0 0 1 26.745638,51.836329 15.823282,15.823282 0 0 1 46.174827,62.909427 15.823282,15.823282 0 0 1 35.12679,82.352877"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="1.3014728"
|
||||
sodipodi:start="1.3066303"
|
||||
sodipodi:ry="15.823282"
|
||||
sodipodi:rx="15.823282"
|
||||
sodipodi:cy="67.100006"
|
||||
sodipodi:cx="30.91654"
|
||||
sodipodi:type="arc"
|
||||
id="path5912-7"
|
||||
style="opacity:1;fill:url(#linearGradient6195);fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.91504204;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
Before Width: | Height: | Size: 5.9 KiB |
161
artwork/stow.svg
@ -1,161 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="14.522366mm"
|
||||
height="14.70852mm"
|
||||
viewBox="0 0 14.522366 14.70852"
|
||||
version="1.1"
|
||||
id="svg5293"
|
||||
inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
|
||||
sodipodi:docname="stow.svg">
|
||||
<defs
|
||||
id="defs5287">
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
id="linearGradient5805">
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:1;"
|
||||
offset="0"
|
||||
id="stop5801" />
|
||||
<stop
|
||||
style="stop-color:#000000;stop-opacity:0;"
|
||||
offset="1"
|
||||
id="stop5803" />
|
||||
</linearGradient>
|
||||
<linearGradient
|
||||
inkscape:collect="always"
|
||||
xlink:href="#linearGradient5805"
|
||||
id="linearGradient5807"
|
||||
x1="97.004578"
|
||||
y1="152.66513"
|
||||
x2="97.004578"
|
||||
y2="138.76712"
|
||||
gradientUnits="userSpaceOnUse" />
|
||||
</defs>
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="3.959798"
|
||||
inkscape:cx="167.88961"
|
||||
inkscape:cy="92.281344"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:window-width="3838"
|
||||
inkscape:window-height="2141"
|
||||
inkscape:window-x="1"
|
||||
inkscape:window-y="18"
|
||||
inkscape:window-maximized="0" />
|
||||
<metadata
|
||||
id="metadata5290">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Ebene 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(-76.696143,-143.14098)">
|
||||
<flowRoot
|
||||
xml:space="preserve"
|
||||
id="flowRoot5295"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:16px;line-height:1.25;font-family:'Anonymous Pro';-inkscape-font-specification:'Anonymous Pro';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none"
|
||||
transform="scale(0.26458333)"><flowRegion
|
||||
id="flowRegion5297"><rect
|
||||
id="rect5299"
|
||||
width="302.85715"
|
||||
height="200"
|
||||
x="151.42857"
|
||||
y="396.80536" /></flowRegion><flowPara
|
||||
id="flowPara5301" /></flowRoot> <g
|
||||
id="g5862"
|
||||
transform="matrix(0.38327373,0,0,0.33829508,47.227117,104.41207)">
|
||||
<g
|
||||
transform="rotate(25.881625,86.124624,321.22969)"
|
||||
id="g5777">
|
||||
<g
|
||||
id="g5763">
|
||||
<g
|
||||
id="g5750">
|
||||
<g
|
||||
id="g5738">
|
||||
<g
|
||||
id="g5727">
|
||||
<g
|
||||
id="g5717">
|
||||
<g
|
||||
id="g5708">
|
||||
<g
|
||||
id="g5700">
|
||||
<rect
|
||||
ry="0.077411793"
|
||||
y="136.3042"
|
||||
x="9.8106909"
|
||||
height="20.178848"
|
||||
width="14.499372"
|
||||
id="rect5317"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.04044545;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path5319"
|
||||
d="m 12.160764,141.11504 c 9.68852,-0.13364 9.755337,0 9.755337,0 v -0.13364 0"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path5319-3"
|
||||
d="m 11.961895,151.16736 c 9.688519,-0.13364 9.755338,0 9.755338,0 v -0.13364 0"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path5319-6"
|
||||
d="m 11.82826,147.8265 c 9.68852,-0.13364 9.755338,0 9.755338,0 v -0.13364 0"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path5319-7"
|
||||
d="m 12.09553,144.21835 c 9.688519,-0.13364 9.755338,0 9.755338,0 v -0.13364 0"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<rect
|
||||
ry="0.097748853"
|
||||
y="138.67198"
|
||||
x="77.277542"
|
||||
height="18.899141"
|
||||
width="37.110588"
|
||||
id="rect5636"
|
||||
style="opacity:1;fill:url(#linearGradient5807);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.77973491;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path5693"
|
||||
d="m 77.432168,138.91961 23.336802,-24.13695 v 0 0"
|
||||
style="fill:none;stroke:#000000;stroke-width:0.86332273;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
Before Width: | Height: | Size: 6.4 KiB |
@ -1,90 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="14.836508mm"
|
||||
height="14.954134mm"
|
||||
viewBox="0 0 14.836508 14.954134"
|
||||
version="1.1"
|
||||
id="svg5870"
|
||||
inkscape:version="0.92.4 (5da689c313, 2019-01-14)"
|
||||
sodipodi:docname="tag.svg">
|
||||
<defs
|
||||
id="defs5864" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="1.4"
|
||||
inkscape:cx="98.18385"
|
||||
inkscape:cy="-142.24281"
|
||||
inkscape:document-units="mm"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="false"
|
||||
inkscape:window-width="3838"
|
||||
inkscape:window-height="2141"
|
||||
inkscape:window-x="1"
|
||||
inkscape:window-y="18"
|
||||
inkscape:window-maximized="0" />
|
||||
<metadata
|
||||
id="metadata5867">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Ebene 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(-62.826115,-84.959244)">
|
||||
<g
|
||||
id="g5881"
|
||||
transform="matrix(0.16069429,0.16229614,-0.16069429,0.16229614,65.169697,65.422356)">
|
||||
<rect
|
||||
transform="matrix(0.77499969,-0.63196161,0.77499969,0.63196161,0,0)"
|
||||
ry="4.4855332"
|
||||
y="87.517372"
|
||||
x="-19.262943"
|
||||
height="32.940388"
|
||||
width="33.972054"
|
||||
id="rect5874"
|
||||
style="opacity:1;fill:#172651;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.86083698;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<rect
|
||||
ry="12.185672"
|
||||
y="47.226063"
|
||||
x="70.695381"
|
||||
height="40.323494"
|
||||
width="60.637527"
|
||||
id="rect5872"
|
||||
style="opacity:1;fill:#172651;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.08249819;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
<path
|
||||
d="m 80.186024,74.676744 a 7.0870538,7.5595236 0 0 1 -8.689277,-5.314057 7.0870538,7.5595236 0 0 1 4.970721,-9.275405 7.0870538,7.5595236 0 0 1 8.702095,5.290137 7.0870538,7.5595236 0 0 1 -4.94828,9.289048"
|
||||
sodipodi:open="true"
|
||||
sodipodi:end="1.3014728"
|
||||
sodipodi:start="1.3066303"
|
||||
sodipodi:ry="7.5595236"
|
||||
sodipodi:rx="7.0870538"
|
||||
sodipodi:cy="67.379456"
|
||||
sodipodi:cx="78.335564"
|
||||
sodipodi:type="arc"
|
||||
id="path5876"
|
||||
style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
Before Width: | Height: | Size: 3.4 KiB |
@ -1,22 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Demo
|
||||
permalink: demo
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Finding Items
|
||||
|
||||
<video width="100%" controls>
|
||||
<source src="../static/docspell-search-2020-06-24.webm" type="video/webm">
|
||||
Your browser does not support the video tag.
|
||||
</video>
|
||||
|
||||
|
||||
## Basic Idea (First Version)
|
||||
|
||||
<video width="100%" controls>
|
||||
<source src="../static/docspell-demo.webm" type="video/webm">
|
||||
Your browser does not support the video tag.
|
||||
</video>
|
@ -1,26 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: ADRs
|
||||
permalink: dev/adr
|
||||
---
|
||||
|
||||
# ADR
|
||||
|
||||
Some early information about certain details can be found in a few
|
||||
[ADR](https://adr.github.io/) that exist:
|
||||
|
||||
- [0001 Components](adr/0001_components)
|
||||
- [0002 Component Interaction](adr/0002_component_interaction)
|
||||
- [0003 Encryption](adr/0003_encryption)
|
||||
- [0004 ISO8601 vs Unix](adr/0004_iso8601vsEpoch)
|
||||
- [0005 Job Executor](adr/0005_job-executor)
|
||||
- [0006 More File Types](adr/0006_more-file-types)
|
||||
- [0007 Convert HTML](adr/0007_convert_html_files)
|
||||
- [0008 Convert Text](adr/0008_convert_plain_text)
|
||||
- [0009 Convert Office Files](adr/0009_convert_office_docs)
|
||||
- [0010 Convert Image Files](adr/0010_convert_image_files)
|
||||
- [0011 Extract Text](adr/0011_extract_text)
|
||||
- [0012 Periodic Tasks](adr/0012_periodic_tasks)
|
||||
- [0013 Archive Files](adr/0013_archive_files)
|
||||
- [0014 Full-Text Search](adr/0014_fulltext_search_engine)
|
||||
- [0015 Convert PDF files](adr/0015_convert_pdf_files)
|
@ -1,95 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Documentation
|
||||
permalink: doc
|
||||
---
|
||||
|
||||
# {{page.title}}
|
||||
|
||||
This is the documentation for Docspell @VERSION@.
|
||||
|
||||
Docspell assists in organizing large amounts of files that are
|
||||
typically scanned paper documents. You can associate tags, set
|
||||
correspondends, what a document is concerned with, a name, a date and
|
||||
some more. If your documents are associated with this meta data, you
|
||||
should be able to quickly find them later using the search feature.
|
||||
But adding this manually to each document is a tedious task. What if
|
||||
most of it could be attached automatically?
|
||||
|
||||
## How it works
|
||||
|
||||
Documents have two main properties: a correspondent (sender or
|
||||
receiver that is not you) and something the document is about. Usually
|
||||
it is about a person or some thing – maybe your car, or contracts
|
||||
concerning some familiy member, etc.
|
||||
|
||||
1. You maintain a kind of address book. It should list all possible
|
||||
correspondents and the concerning people/things. This grows
|
||||
incrementally with each new unknown document.
|
||||
2. When docspell analyzes a document, it tries to find matches within
|
||||
your address book. It can detect the correspondent and a concerning
|
||||
person or thing. It will then associate this data to your
|
||||
documents.
|
||||
3. You can inspect what docspell has done and correct it. If docspell
|
||||
has found multiple suggestions, they will be shown for you to
|
||||
select one. If it is not correctly associated, very often the
|
||||
correct one is just one click away.
|
||||
|
||||
The set of meta data that docspell uses to draw suggestions from, must
|
||||
be maintained manually. But usually, this data doesn't grow as fast as
|
||||
the documents. After a while there is a quite complete address book
|
||||
and only once in a while it has to be revisited.
|
||||
|
||||
Besides extracting the text from documents to analyze, docspell also
|
||||
converts all files into PDF files. This unifies the different formats
|
||||
your documents may be in originally and makes them more accessible
|
||||
from other systems and the future.
|
||||
|
||||
## Terms
|
||||
|
||||
In order to better understand these pages, some terms should be
|
||||
explained first.
|
||||
|
||||
### Item
|
||||
|
||||
An **Item** is roughly your (pdf) document, only that an item may span
|
||||
multiple files, which are called **attachments**. And an item has
|
||||
**meta data** associated:
|
||||
|
||||
- a **correspondent**: the other side of the communication. It can be
|
||||
an organization or a person.
|
||||
- a **concerning person** or **equipment**: a person or thing that
|
||||
this item is about. Maybe it is an insurance contract about your
|
||||
car.
|
||||
- **tag**: an item can be tagged with custom tags. A tag can have a
|
||||
*category*. This is intended for grouping tags, for example a
|
||||
category `doctype` could be used to group tags like `bill`,
|
||||
`contract`, `receipt` etc. Usually an item is not tagged with more
|
||||
than one tag of a category.
|
||||
- a **item date**: this is the date of the document – if this is not
|
||||
set, the created date of the item is used.
|
||||
- a **due date**: an optional date indicating that something has to be
|
||||
done (e.g. paying a bill, submitting it) about this item until this
|
||||
date
|
||||
- a **direction**: one of "incoming" or "outgoing"
|
||||
- a **name**: some item name, defaults to the file name of the
|
||||
attachments
|
||||
- some **notes**: arbitrary descriptive text. You can use markdown
|
||||
here, which is appropriately formatted in the web application.
|
||||
|
||||
### Collective
|
||||
|
||||
The users of the application are part of a **collective**. A
|
||||
**collective** is a group of users that share access to the same
|
||||
items. The account name is therefore comprised of a *collective name*
|
||||
and a *user name*.
|
||||
|
||||
All users of a collective are equal; they have same permissions to
|
||||
access all items. The items don't belong to a user, but to the
|
||||
collective.
|
||||
|
||||
That means, to identify yourself when signing in, you have to give the
|
||||
collective name and your user name. By default it is separated by a
|
||||
slash `/`, for example `smith/john`. If your user name is the same as
|
||||
the collective name, you can omit one; so `smith/smith` can be
|
||||
abbreviated to just `smith`.
|
@ -1,235 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Installation
|
||||
permalink: doc/install
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
This page contains detailed installation instructions. For a quick
|
||||
start, refer to [this page](../getit).
|
||||
|
||||
Docspell has been developed and tested on a GNU/Linux system. It may
|
||||
run on Windows and MacOS machines, too (ghostscript and tesseract are
|
||||
available on these systems). But I've never tried.
|
||||
|
||||
Docspell consists of two components that are started in separate
|
||||
processes:
|
||||
|
||||
1. *REST Server* This is the main application, providing the REST Api
|
||||
and the web application.
|
||||
2. *Joex* (job executor) This is the component that does the document
|
||||
processing.
|
||||
|
||||
They can run on multiple machines. All REST server and Joex instances
|
||||
should be on the same network. It is not strictly required that they
|
||||
can reach each other, but the components can then notify themselves
|
||||
about new or done work.
|
||||
|
||||
While this is possible, the simple setup is to start both components
|
||||
once on the same machine.
|
||||
|
||||
The [download page](https://github.com/eikek/docspell/releases)
|
||||
provides pre-compiled packages and the [development page](../dev)
|
||||
contains build instructions.
|
||||
|
||||
|
||||
## Prerequisites
|
||||
|
||||
The two components have one prerequisite in common: they both require
|
||||
Java to run. While this is the only requirement for the *REST server*,
|
||||
the *Joex* components requires some more external programs.
|
||||
|
||||
### Java
|
||||
|
||||
Very often, Java is already installed. You can check this by opening a
|
||||
terminal and typing `java -version`. Otherwise install Java using your
|
||||
package manager or see [this site](https://adoptopenjdk.net/) for
|
||||
other options.
|
||||
|
||||
It is enough to install the JRE. The JDK is required, if you want to
|
||||
build docspell from source.
|
||||
|
||||
Docspell has been tested with Java version 1.8 (or sometimes referred
|
||||
to as JRE 8 and JDK 8, respectively). The pre-build packages are also
|
||||
build using JDK 8. But a later version of Java should work as well.
|
||||
|
||||
The next tools are only required on machines running the *Joex*
|
||||
component.
|
||||
|
||||
### External Programs for Joex
|
||||
|
||||
- [Ghostscript](http://pages.cs.wisc.edu/~ghost/) (the `gs` command)
|
||||
is used to extract/convert PDF files into images that are then fed
|
||||
to ocr. It is available on most GNU/Linux distributions.
|
||||
- [Unpaper](https://github.com/Flameeyes/unpaper) is a program that
|
||||
pre-processes images to yield better results when doing ocr. If this
|
||||
is not installed, docspell tries without it. However, it is
|
||||
recommended to install, because it [improves text
|
||||
extraction](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality)
|
||||
(at the expense of a longer runtime).
|
||||
- [Tesseract](https://github.com/tesseract-ocr/tesseract) is the tool
|
||||
doing the OCR (converts images into text). It can also convert
|
||||
images into pdf files. It is a widely used open source OCR engine.
|
||||
Tesseract 3 and 4 should work with docspell; you can adopt the
|
||||
command line in the configuration file, if necessary.
|
||||
- [Unoconv](https://github.com/unoconv/unoconv) is used to convert
|
||||
office documents into PDF files. It uses libreoffice/openoffice.
|
||||
- [wkhtmltopdf](https://wkhtmltopdf.org/) is used to convert HTML into
|
||||
PDF files.
|
||||
- [OCRmyPDF](https://github.com/jbarlow83/OCRmyPDF) can be optionally
|
||||
used to convert PDF to PDF files. It adds an OCR layer to scanned
|
||||
PDF files to make them searchable. It also creates PDF/A files from
|
||||
the input pdf.
|
||||
|
||||
The performance of `unoconv` can be improved by starting `unoconv -l`
|
||||
in a separate process. This runs a libreoffice/openoffice listener
|
||||
therefore avoids starting one each time `unoconv` is called.
|
||||
|
||||
### Example Debian
|
||||
|
||||
On Debian this should install all joex requirements:
|
||||
|
||||
``` bash
|
||||
sudo apt-get install ghostscript tesseract-ocr tesseract-ocr-deu tesseract-ocr-eng unpaper unoconv wkhtmltopdf ocrmypdf
|
||||
```
|
||||
|
||||
|
||||
## Database
|
||||
|
||||
Both components must have access to a SQL database. Docspell has
|
||||
support these databases:
|
||||
|
||||
- PostreSQL
|
||||
- MariaDB
|
||||
- H2
|
||||
|
||||
The H2 database is an interesting option for personal and mid-size
|
||||
setups, as it requires no additional work. It is integrated into
|
||||
docspell and works really well. It is also configured as the default
|
||||
database.
|
||||
|
||||
For large installations, PostgreSQL or MariaDB is recommended. Create
|
||||
a database and a user with enough privileges (read, write, create
|
||||
table) to that database.
|
||||
|
||||
When using H2, make sure that all components access the same database
|
||||
– the jdbc url must point to the same file. Then, it is important to
|
||||
add the options
|
||||
`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
|
||||
of the url. See the [config page](configure#jdbc) for an example.
|
||||
|
||||
|
||||
## Installing from ZIP files
|
||||
|
||||
After extracting the zip files, you'll find a start script in the
|
||||
`bin/` folder.
|
||||
|
||||
|
||||
## Installing from DEB packages
|
||||
|
||||
The DEB packages can be installed on Debian, or Debian based Distros:
|
||||
|
||||
``` bash
|
||||
$ sudo dpkg -i docspell*.deb
|
||||
```
|
||||
|
||||
Then the start scripts are in your `$PATH`. Run `docspell-restserver`
|
||||
or `docspell-joex` from a terminal window.
|
||||
|
||||
The packages come with a systemd unit file that will be installed to
|
||||
autostart the services.
|
||||
|
||||
|
||||
## Running
|
||||
|
||||
Run the start script (in the corresponding `bin/` directory when using
|
||||
the zip files):
|
||||
|
||||
```
|
||||
$ ./docspell-restserver*/bin/docspell-restserver
|
||||
$ ./docspell-joex*/bin/docspell-joex
|
||||
```
|
||||
|
||||
This will startup both components using the default configuration. The
|
||||
configuration should be adopted to your needs. For example, the
|
||||
database connection is configured to use a H2 database in the `/tmp`
|
||||
directory. Please refer to the [configuration page](configure) for how
|
||||
to create a custom config file. Once you have your config file, simply
|
||||
pass it as argument to the command:
|
||||
|
||||
```
|
||||
$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
|
||||
$ ./docspell-joex*/bin/docspell-joex /path/to/joex-config.conf
|
||||
```
|
||||
|
||||
After starting the rest server, you can reach the web application at
|
||||
path `/app`, so using default values it would be
|
||||
`http://localhost:7880/app`.
|
||||
|
||||
You should be able to create a new account and sign in. Check the
|
||||
[configuration page](configure) to further customize docspell.
|
||||
|
||||
|
||||
### Options
|
||||
|
||||
The start scripts support some options to configure the JVM. One often
|
||||
used setting is the maximum heap size of the JVM. By default, java
|
||||
determines it based on properties of the current machine. You can
|
||||
specify it by given java startup options to the command:
|
||||
|
||||
```
|
||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
|
||||
```
|
||||
|
||||
This would limit the maximum heap to 1GB. The double slash separates
|
||||
internal options and the arguments to the program. Another frequently
|
||||
used option is to change the default temp directory. Usually it is
|
||||
`/tmp`, but it may be desired to have a dedicated temp directory,
|
||||
which can be configured:
|
||||
|
||||
```
|
||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
|
||||
```
|
||||
|
||||
The command:
|
||||
|
||||
```
|
||||
$ ./docspell-restserver*/bin/docspell-restserver -h
|
||||
```
|
||||
|
||||
gives an overview of supported options.
|
||||
|
||||
|
||||
## Raspberry Pi, and similiar
|
||||
|
||||
Both component can run next to each other on a raspberry pi or
|
||||
similiar device.
|
||||
|
||||
|
||||
### REST Server
|
||||
|
||||
The REST server component runs very well on the Raspberry Pi and
|
||||
similiar devices. It doesn't require much resources, because the heavy
|
||||
work is done by the joex components.
|
||||
|
||||
|
||||
### Joex
|
||||
|
||||
Running the joex component on the Raspberry Pi is possible, but will
|
||||
result in long processing times for OCR. Files that don't require OCR
|
||||
are no problem.
|
||||
|
||||
Tested on a RPi model 3 (4 cores, 1G RAM) processing a PDF (scanned
|
||||
with 300dpi) with two pages took 9:52. You can speed it up
|
||||
considerably by uninstalling the `unpaper` command, because this step
|
||||
takes quite long. This, of course, reduces the quality of OCR. But
|
||||
without `unpaper` the same sample pdf was then processed in 1:24, a
|
||||
speedup of 8 minutes.
|
||||
|
||||
You should limit the joex pool size to 1 and, depending on your model
|
||||
and the amount of RAM, set a heap size of at least 500M
|
||||
(`-J-Xmx500M`).
|
||||
|
||||
For personal setups, when you don't need the processing results asap,
|
||||
this can work well enough.
|
@ -1,20 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Tools
|
||||
permalink: doc/tools
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
The `tools/` folder contains some scripts and other resources intented
|
||||
for integrating docspell.
|
||||
|
||||
- [ds.sh](ds) A script to quickly upload files from the command
|
||||
line.
|
||||
- [Consume Directory](consumedir) A script to watch a directory
|
||||
for new files and upload them to docspell.
|
||||
- [Browser Extension](browserext) An extension for firefox to
|
||||
upload files from your browser via *right-click -> upload to
|
||||
docspell*.
|
||||
- [SMTP Gateway](smtpgateway) Start a SMTP server that forwards all
|
||||
mails to docspell.
|
@ -1,107 +0,0 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Quickstart
|
||||
permalink: getit
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
There are the following quick ways to get docspell to run on your
|
||||
machine:
|
||||
|
||||
- [Download, Unpack, Run](#without-docker) You can download
|
||||
pre-compiled binaries from the [Release
|
||||
Page](https://github.com/eikek/docspell/releases). There are `deb`
|
||||
packages and generic zip files.
|
||||
- [With Docker](#with-docker)
|
||||
- [NixOs Module](doc/nix#docspell-as-a-service-on-nixos)
|
||||
|
||||
Check the [demo videos](demo) to see the basic idea. Refer to the
|
||||
[documentation](doc) for more information on how to use docspell.
|
||||
|
||||
|
||||
## Download, Unpack, Run
|
||||
|
||||
### Prerequisite
|
||||
|
||||
Install Java (use your package manager or look
|
||||
[here](https://adoptopenjdk.net/)).
|
||||
|
||||
OCR functionality requires the following tools:
|
||||
|
||||
- [tesseract](https://github.com/tesseract-ocr/tesseract),
|
||||
- [ghostscript](http://pages.cs.wisc.edu/~ghost/) and possibly
|
||||
- [unpaper](https://github.com/Flameeyes/unpaper).
|
||||
|
||||
The last is not really required, but improves OCR.
|
||||
|
||||
PDF conversion requires the following tools:
|
||||
|
||||
- [unoconv](https://github.com/unoconv/unoconv)
|
||||
- [wkhtmltopdf](https://wkhtmltopdf.org/)
|
||||
|
||||
|
||||
### Using zip files
|
||||
|
||||
You need to download the two files:
|
||||
|
||||
- [docspell-restserver-{{site.version}}.zip](https://github.com/eikek/docspell/releases/download/v{{site.version}}/docspell-restserver-{{site.version}}.zip)
|
||||
- [docspell-joex-{{site.version}}.zip](https://github.com/eikek/docspell/releases/download/v{{site.version}}/docspell-joex-{{site.version}}.zip)
|
||||
|
||||
|
||||
1. Unzip both files:
|
||||
``` bash
|
||||
$ unzip docspell-*.zip
|
||||
```
|
||||
2. Open two terminal windows and navigate to the the directory
|
||||
containing the zip files.
|
||||
3. Start both components executing:
|
||||
``` bash
|
||||
$ ./docspell-restserver*/bin/docspell-restserver
|
||||
```
|
||||
in one terminal and
|
||||
``` bash
|
||||
$ ./docspell-joex*/bin/docspell-joex
|
||||
```
|
||||
in the other.
|
||||
4. Point your browser to: <http://localhost:7880/app>
|
||||
5. Register a new account, sign in and try it.
|
||||
|
||||
Note, that this setup doesn't include watching a directory. You can
|
||||
use the [`consumedir.sh`](doc/tools/consumedir) tool for this or use
|
||||
the docker variant below.
|
||||
|
||||
## With Docker
|
||||
|
||||
There is a [docker-compose](https://docs.docker.com/compose/) setup
|
||||
available in the `/docker` folder.
|
||||
|
||||
1. Clone the github repository
|
||||
```bash
|
||||
$ git clone https://github.com/eikek/docspell
|
||||
```
|
||||
2. Change into the `docker` directory:
|
||||
```bash
|
||||
$ cd docspell/docker
|
||||
```
|
||||
3. Run `docker-compose up`:
|
||||
|
||||
```bash
|
||||
$ export DOCSPELL_HEADER_VALUE="my-secret-123"
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
The environment variable defines a secret that is shared between
|
||||
some containers. You can define whatever you like. Please see the
|
||||
[`consumedir.sh`](doc/tools/consumedir#docker) docs for additional
|
||||
info.
|
||||
4. Goto <http://localhost:7880>, signup and login. When signing up,
|
||||
you can choose the same name for collective and user. Then login
|
||||
with this name and the password.
|
||||
|
||||
5. (Optional) Create a folder `./docs/<collective-name>` (the name you
|
||||
chose for the collective at registration) and place files in there
|
||||
for importing them.
|
||||
|
||||
The directory contains a file `docspell.conf` that you can
|
||||
[modify](doc/configure) as needed.
|
@ -1,13 +0,0 @@
|
||||
---
|
||||
layout: homeFeatures
|
||||
features:
|
||||
- first: ["Stow documents away", "Most of the time documents (emails, postal mail) are received or created. It should be fast to stow them away, knowing that they can be found if necessary.", "uploading"]
|
||||
- second: ["Automatic Tagging", "All documents are analyzed and tagged automatically. It may not always be correct; results can be reviewed and corrected.", "metadata"]
|
||||
- third: ["Work with them", "Search for documents by their meta data or via full-text search. Send them via e-mail. Add your own tags, names etc to better match your workflow.", "finding"]
|
||||
---
|
||||
|
||||
|
||||
## License
|
||||
|
||||
This project is distributed under the
|
||||
[GPLv3](http://www.gnu.org/licenses/gpl-3.0.html)
|
@ -1,46 +0,0 @@
|
||||
#site-main {
|
||||
background: aliceblue;
|
||||
}
|
||||
|
||||
#masthead {
|
||||
background: url(../img/back-master-small.jpg);
|
||||
background-repeat: no-repeat;
|
||||
background-size: 100% 100%;
|
||||
animation: none;
|
||||
height: 26vh;
|
||||
}
|
||||
|
||||
.thumbnail {
|
||||
border: 1px solid #aaa;
|
||||
}
|
||||
|
||||
.features-image {
|
||||
height: 200px;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.docs #sidebar-wrapper .sidebar-nav .sidebar-nav-item.open>a, .docs #sidebar-wrapper .sidebar-nav .sidebar-nav-item.open button {
|
||||
background: #495680;
|
||||
}
|
||||
.docs #sidebar-wrapper .sidebar-nav .sidebar-nav-item .sub-section {
|
||||
background: #172651;
|
||||
}
|
||||
.docs #sidebar-wrapper .sidebar-nav .sidebar-nav-item .sub-section a.active {
|
||||
font-weight: bold;
|
||||
}
|
||||
.docs #sidebar-wrapper {
|
||||
background: #172651;
|
||||
}
|
||||
|
||||
.docs #sidebar-wrapper #sidebar-brand {
|
||||
background: #495680;
|
||||
|
||||
}
|
||||
|
||||
.docs h4 {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.docs .thumbnail img {
|
||||
width: 100%;
|
||||
}
|
@ -1,94 +0,0 @@
|
||||
options:
|
||||
- title: Home
|
||||
url: index.html
|
||||
|
||||
- title: Demo
|
||||
url: demo
|
||||
|
||||
- title: Quickstart
|
||||
url: getit
|
||||
|
||||
- title: Features/Limitations
|
||||
url: features
|
||||
|
||||
- title: Documentation
|
||||
url: doc
|
||||
|
||||
nested_options:
|
||||
- title: Installation
|
||||
url: doc/install
|
||||
|
||||
- title: Configuring
|
||||
url: doc/configure
|
||||
|
||||
- title: Nix/NixOS
|
||||
url: doc/nix
|
||||
|
||||
- title: Reverse Proxy
|
||||
url: doc/reverseproxy
|
||||
|
||||
- title: Meta Data
|
||||
url: doc/metadata
|
||||
|
||||
- title: Finding Items
|
||||
url: doc/finding
|
||||
|
||||
- title: Curate Items
|
||||
url: doc/curate
|
||||
|
||||
- title: Uploads
|
||||
url: doc/uploading
|
||||
|
||||
- title: Processing Queue
|
||||
url: doc/processing
|
||||
|
||||
- title: E-Mail Settings
|
||||
url: doc/emailsettings
|
||||
|
||||
- title: Send via E-Mail
|
||||
url: doc/mailitem
|
||||
|
||||
- title: Notify on due Items
|
||||
url: doc/notifydueitems
|
||||
|
||||
- title: Scan Mailboxes
|
||||
url: doc/scanmailbox
|
||||
|
||||
- title: Joex
|
||||
url: doc/joex
|
||||
|
||||
- title: Tools
|
||||
url: doc/tools
|
||||
|
||||
nested_options:
|
||||
- title: Upload CLI
|
||||
url: doc/tools/ds
|
||||
|
||||
- title: Consume Directory
|
||||
url: doc/tools/consumedir
|
||||
|
||||
- title: Browser Extension (Firefox)
|
||||
url: doc/tools/browserext
|
||||
|
||||
- title: SMTP Gateway
|
||||
url: doc/tools/smtpgateway
|
||||
|
||||
- title: Api
|
||||
url: api
|
||||
|
||||
nested_options:
|
||||
- title: REST Api Doc
|
||||
url: openapi/docspell-openapi.html
|
||||
|
||||
- title: REST OpenApi Spec
|
||||
url: openapi/docspell-openapi.yml
|
||||
|
||||
- title: Development
|
||||
url: dev
|
||||
|
||||
nested_options:
|
||||
- title: ADRs
|
||||
url: dev/adr
|
||||
|
||||
- title: Changelog
|
||||
url : changelog
|
Before Width: | Height: | Size: 1.2 MiB |
Before Width: | Height: | Size: 1.7 MiB |
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/logo-96.png
|
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/logo-only.svg
|
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/stow.svg
|
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/logo-only-36.svg
|
@ -1 +0,0 @@
|
||||
light-navbar-brand.svg
|
Before Width: | Height: | Size: 4.8 KiB |
Before Width: | Height: | Size: 43 KiB |
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/tag.svg
|
@ -1 +0,0 @@
|
||||
../../../../../../../artwork/search.svg
|
Before Width: | Height: | Size: 180 KiB |
40
project/Cmd.scala
Normal file
@ -0,0 +1,40 @@
|
||||
package docspell.build
|
||||
|
||||
import sbt._
|
||||
import scala.sys.process._
|
||||
import java.util.concurrent.atomic.AtomicReference
|
||||
|
||||
/** Helper for running external commands. */
|
||||
object Cmd {
|
||||
|
||||
case class Result(rc: Int, out: String, err: String) {
|
||||
|
||||
def throwIfNot(success: Int): Result =
|
||||
if (rc != success) sys.error(s"Unsuccessful return: $rc")
|
||||
else this
|
||||
}
|
||||
|
||||
def run(cmd: Seq[String], wd: File, logger: Logger): Unit = {
|
||||
val res = Cmd.exec(cmd, Some(wd))
|
||||
logger.info(res.out)
|
||||
logger.error(res.err)
|
||||
res.throwIfNot(0)
|
||||
}
|
||||
|
||||
def exec(cmd: Seq[String], wd: Option[File]): Result = {
|
||||
val capt = new Capture
|
||||
val rc = Process(cmd, wd).!(capt.logger)
|
||||
Result(rc, capt.out.get.mkString("\n"), capt.err.get.mkString("\n"))
|
||||
}
|
||||
|
||||
final private class Capture {
|
||||
val err = new AtomicReference[List[String]](Nil)
|
||||
val out = new AtomicReference[List[String]](Nil)
|
||||
|
||||
val logger = ProcessLogger(
|
||||
line => out.getAndAccumulate(List(line), _ ++ _),
|
||||
line => err.getAndAccumulate(List(line), _ ++ _)
|
||||
)
|
||||
|
||||
}
|
||||
}
|
106
project/ZolaPlugin.scala
Normal file
@ -0,0 +1,106 @@
|
||||
package docspell.build
|
||||
|
||||
import sbt._
|
||||
import sbt.Keys._
|
||||
import scala.sys.process._
|
||||
|
||||
object ZolaPlugin extends AutoPlugin {
|
||||
|
||||
object autoImport {
|
||||
val zolaRootDir = settingKey[File]("The root directory of zola")
|
||||
val zolaOutputDir = settingKey[File]("The directory to put the final site")
|
||||
val zolaCommand = settingKey[String]("The zola executable")
|
||||
val zolaTestBaseUrl =
|
||||
settingKey[String]("The base-url to use when building the test site.")
|
||||
|
||||
val zolaBuild = taskKey[Unit](
|
||||
"Build the site using zola, which is then available in 'zolaOutputDir'."
|
||||
)
|
||||
val zolaBuildTest = taskKey[Unit](
|
||||
"Build the site using zola, which is then available in 'zolaOutputDir'. " +
|
||||
"It uses a different base-url. So the final site can be tested using " +
|
||||
"'python -m SimpleHTTPServer 1234' for example."
|
||||
)
|
||||
val zolaCheck = taskKey[Unit]("Runs zola check to check links")
|
||||
}
|
||||
|
||||
import autoImport._
|
||||
|
||||
def zolaSettings: Seq[Setting[_]] =
|
||||
Seq(
|
||||
zolaRootDir := baseDirectory.value / "site",
|
||||
zolaOutputDir := target.value / "zola-site",
|
||||
zolaCommand := "zola",
|
||||
zolaTestBaseUrl := "http://localhost:1234",
|
||||
zolaBuild := {
|
||||
val logger = streams.value.log
|
||||
logger.info("Building web site using zola ...")
|
||||
buildSite(zolaCommand.value, zolaRootDir.value, zolaOutputDir.value, None, logger)
|
||||
logger.info("Website built")
|
||||
},
|
||||
zolaBuildTest := {
|
||||
val logger = streams.value.log
|
||||
val baseurl = zolaTestBaseUrl.value
|
||||
logger.info("Building web site (test) using zola ...")
|
||||
buildSite(
|
||||
zolaCommand.value,
|
||||
zolaRootDir.value,
|
||||
zolaOutputDir.value,
|
||||
Some(baseurl),
|
||||
logger
|
||||
)
|
||||
logger.info(s"Website built. Check it with base-url $baseurl")
|
||||
},
|
||||
zolaCheck := {
|
||||
val logger = streams.value.log
|
||||
logger.info("Checking web site using zola ...")
|
||||
checkSite(zolaCommand.value, zolaRootDir.value, logger)
|
||||
}
|
||||
)
|
||||
|
||||
override def projectSettings: Seq[Setting[_]] =
|
||||
zolaSettings
|
||||
|
||||
def buildSite(
|
||||
zolaCmd: String,
|
||||
inDir: File,
|
||||
outDir: File,
|
||||
base: Option[String],
|
||||
logger: Logger
|
||||
): Unit = {
|
||||
val baseUrl = base match {
|
||||
case Some(url) =>
|
||||
Seq("--base-url", url)
|
||||
case None =>
|
||||
runYarnInstall("yarn", inDir.getParentFile, logger)
|
||||
runElmCompile("elm", inDir.getParentFile, outDir, logger)
|
||||
Seq.empty
|
||||
}
|
||||
Cmd.run(
|
||||
Seq(zolaCmd, "build", "-o", outDir.absolutePath.toString) ++ baseUrl,
|
||||
inDir,
|
||||
logger
|
||||
)
|
||||
}
|
||||
|
||||
def checkSite(zolaCmd: String, inDir: File, logger: Logger): Unit =
|
||||
Cmd.run(Seq(zolaCmd, "check"), inDir, logger)
|
||||
|
||||
def runYarnInstall(yarnCmd: String, inDir: File, logger: Logger): Unit =
|
||||
Cmd.run(Seq(yarnCmd, "install"), inDir, logger)
|
||||
|
||||
def runElmCompile(elmCmd: String, inDir: File, zolaOut: File, logger: Logger): Unit =
|
||||
Cmd.run(
|
||||
Seq(
|
||||
elmCmd,
|
||||
"make",
|
||||
"--output",
|
||||
(zolaOut / "static" / "js" / "bundle.js").absolutePath.toString,
|
||||
"--optimize",
|
||||
(inDir / "elm" / "Main.elm").toString
|
||||
),
|
||||
inDir,
|
||||
logger
|
||||
)
|
||||
|
||||
}
|
@ -1,14 +1,20 @@
|
||||
with import <nixpkgs> { };
|
||||
let
|
||||
initScript = writeScript "docspell-build-init" ''
|
||||
nixpkgsUnstable = builtins.fetchTarball {
|
||||
url = "https://github.com/NixOS/nixpkgs-channels/archive/nixos-unstable.tar.gz";
|
||||
};
|
||||
pkgsUnstable = import nixpkgsUnstable { };
|
||||
initScript = pkgsUnstable.writeScript "docspell-build-init" ''
|
||||
export LD_LIBRARY_PATH=
|
||||
${bash}/bin/bash -c sbt
|
||||
${pkgsUnstable.bash}/bin/bash -c sbt
|
||||
'';
|
||||
in
|
||||
in with pkgsUnstable;
|
||||
|
||||
buildFHSUserEnv {
|
||||
name = "docspell-sbt";
|
||||
targetPkgs = pkgs: with pkgs; [
|
||||
netcat jdk8 wget which zsh dpkg sbt git elmPackages.elm ncurses fakeroot mc jekyll
|
||||
netcat jdk8 wget which zsh dpkg sbt git elmPackages.elm ncurses fakeroot mc
|
||||
zola yarn
|
||||
|
||||
# haskells http client needs this (to download elm packages)
|
||||
iana-etc
|
||||
];
|
||||
|
@ -1,9 +1,9 @@
|
||||
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.19")
|
||||
addSbtPlugin("com.47deg" % "sbt-microsites" % "1.2.1")
|
||||
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0")
|
||||
addSbtPlugin("com.github.eikek" % "sbt-openapi-schema" % "0.6.1")
|
||||
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.13")
|
||||
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.6.3")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.7.4")
|
||||
addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
|
||||
|
30
website/README.md
Normal file
@ -0,0 +1,30 @@
|
||||
# Website
|
||||
|
||||
This is the docspell website and documentation.
|
||||
|
||||
## Building
|
||||
|
||||
The website is created using [zola](https://github.com/getzola/zola)
|
||||
static site generator. The (very minimal) dynamic parts are written in
|
||||
Elm.
|
||||
|
||||
The `build.sh` script builds the site.
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
Install things by running `yarn install`.
|
||||
|
||||
Open two terminals. In first run:
|
||||
|
||||
``` shell
|
||||
nix-shell --run ./run-elm.sh
|
||||
```
|
||||
|
||||
and in the second
|
||||
|
||||
``` shell
|
||||
nix-shell --run "cd site && zola serve"
|
||||
```
|
||||
|
||||
Open browser at `localhost:1111`.
|
10
website/build.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
yarn install
|
||||
elm make --output site/static/js/bundle.js --optimize elm/Main.elm
|
||||
cd site && zola build
|
||||
cd ..
|
||||
|
||||
echo "Site is in site/public."
|
9
website/elm-analyse.json
Normal file
@ -0,0 +1,9 @@
|
||||
{
|
||||
"excludedPaths": [
|
||||
"modules/webapp/target/elm-src/"
|
||||
],
|
||||
"checks" : {
|
||||
"ImportAll": false,
|
||||
"SingleFieldRecord": false
|
||||
}
|
||||
}
|
28
website/elm.json
Normal file
@ -0,0 +1,28 @@
|
||||
{
|
||||
"type": "application",
|
||||
"source-directories": [
|
||||
"elm"
|
||||
],
|
||||
"elm-version": "0.19.1",
|
||||
"dependencies": {
|
||||
"direct": {
|
||||
"elm/browser": "1.0.2",
|
||||
"elm/core": "1.0.5",
|
||||
"elm/html": "1.0.0",
|
||||
"elm/random": "1.0.0",
|
||||
"elm-community/random-extra": "3.1.0",
|
||||
"elm-explorations/markdown": "1.0.0"
|
||||
},
|
||||
"indirect": {
|
||||
"elm/json": "1.1.3",
|
||||
"elm/time": "1.0.0",
|
||||
"elm/url": "1.0.0",
|
||||
"elm/virtual-dom": "1.0.2",
|
||||
"owanturist/elm-union-find": "1.0.0"
|
||||
}
|
||||
},
|
||||
"test-dependencies": {
|
||||
"direct": {},
|
||||
"indirect": {}
|
||||
}
|
||||
}
|
36
website/elm/ExtraAttr.elm
Normal file
@ -0,0 +1,36 @@
|
||||
module ExtraAttr exposing (..)
|
||||
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
|
||||
|
||||
ariaExpanded : Bool -> Attribute msg
|
||||
ariaExpanded flag =
|
||||
attribute "aria-expanded"
|
||||
(if flag then
|
||||
"true"
|
||||
|
||||
else
|
||||
"false"
|
||||
)
|
||||
|
||||
|
||||
ariaHidden : Bool -> Attribute msg
|
||||
ariaHidden flag =
|
||||
attribute "aria-hidden"
|
||||
(if flag then
|
||||
"true"
|
||||
|
||||
else
|
||||
"false"
|
||||
)
|
||||
|
||||
|
||||
ariaLabel : String -> Attribute msg
|
||||
ariaLabel name =
|
||||
attribute "aria-label" name
|
||||
|
||||
|
||||
role : String -> Attribute msg
|
||||
role name =
|
||||
attribute "role" name
|
116
website/elm/Feature.elm
Normal file
@ -0,0 +1,116 @@
|
||||
module Feature exposing (..)
|
||||
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
import Markdown
|
||||
|
||||
|
||||
type alias Feature =
|
||||
{ image : String
|
||||
, header : String
|
||||
, description : String
|
||||
}
|
||||
|
||||
|
||||
featureBox : Int -> Feature -> Html msg
|
||||
featureBox index f =
|
||||
case isOdd index of
|
||||
False ->
|
||||
div [ class "columns is-vcentered box mb-5" ]
|
||||
[ div [ class "column is-three-quarter" ]
|
||||
[ figure [ class "image is-2by1 feature-image" ]
|
||||
[ img [ src f.image ] []
|
||||
]
|
||||
]
|
||||
, div [ class "column" ]
|
||||
[ h2 [ class "title" ]
|
||||
[ text f.header
|
||||
]
|
||||
, Markdown.toHtml []
|
||||
f.description
|
||||
]
|
||||
]
|
||||
|
||||
True ->
|
||||
div [ class "columns is-vcentered box mb-5" ]
|
||||
[ div [ class "column is-three-quarter" ]
|
||||
[ h2 [ class "title" ]
|
||||
[ text f.header
|
||||
]
|
||||
, Markdown.toHtml []
|
||||
f.description
|
||||
]
|
||||
, div [ class "column" ]
|
||||
[ figure [ class "image is-2by1 feature-image" ]
|
||||
[ img [ src f.image ] []
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
features : List Feature
|
||||
features =
|
||||
[ { image = "img/user-feature.png"
|
||||
, header = "Multi-User per Account"
|
||||
, description = """
|
||||
Each account (a *collective*) can have multiple users that share the
|
||||
same files. For example, everyone in your family can work with your
|
||||
files while using their own account with their own settings.
|
||||
"""
|
||||
}
|
||||
, { image = "img/ocr-feature.png"
|
||||
, header = "Text Extraction with OCR"
|
||||
, description = """
|
||||
Text is extracted from all files. For scanned documents/images, OCR is used by utilising tesseract. The text is analysed and is available for full-text search.
|
||||
"""
|
||||
}
|
||||
, { image = "img/analyze-feature.png"
|
||||
, header = "Text Analysis"
|
||||
, description = """
|
||||
The extracted text is analyzed and is used to find properties that can be annotated to your documents automatically.
|
||||
"""
|
||||
}
|
||||
, { image = "img/filetype-feature.svg"
|
||||
, header = "Support for many files"
|
||||
, description = """
|
||||
Docspell can read many file types. ZIP and EML (e-mail file format) files are extracted and their contents imported.
|
||||
"""
|
||||
}
|
||||
, { image = "img/convertpdf-feature.svg"
|
||||
, header = "Conversion to PDF"
|
||||
, description = """
|
||||
All files are converted to PDF. Don't worry about the originals. Original files are stored, too and can be downloaded untouched. When creating PDFs from image data (often returned from scanners), the resulting PDF contains the extracted text and is searchable.
|
||||
"""
|
||||
}
|
||||
, { image = "img/fts-feature.png"
|
||||
, header = "Full-Text Search"
|
||||
, description = """
|
||||
The extracted text of all files and some properties, like names and notes, are available for full-text search. Full-text search can also be used to further constrain the results of the search-menu where you can search by tags, correspondent, etc.
|
||||
"""
|
||||
}
|
||||
, { image = "img/sendmail-feature.png"
|
||||
, header = "Send via E-Mail"
|
||||
, description = """
|
||||
|
||||
Users can define SMTP settings in the app and are then able to send items out via E-Mail. This is often useful to share with other people. There is e-mail-address completion from your address book, of course.
|
||||
|
||||
"""
|
||||
}
|
||||
, { image = "img/scanmailbox-feature.png"
|
||||
, header = "Import Mailboxes"
|
||||
, description = """
|
||||
Users can define IMAP settings so that docspell can import their e-mails. This can be done periodically based on a schedule. Imported mails can be moved away into another folder or deleted.
|
||||
"""
|
||||
}
|
||||
, { image = "img/notify-feature.png"
|
||||
, header = "Notifications"
|
||||
, description = """
|
||||
Users can be notified by e-mail for documents whose due-date comes closer.
|
||||
"""
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
isOdd : Int -> Bool
|
||||
isOdd num =
|
||||
modBy 2 num == 1
|
91
website/elm/GetStarted.elm
Normal file
@ -0,0 +1,91 @@
|
||||
module GetStarted exposing (..)
|
||||
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
import Icons
|
||||
import Markdown
|
||||
|
||||
|
||||
getStarted : String -> List (Html msg)
|
||||
getStarted version =
|
||||
[ div [ class "content container is-size-5" ]
|
||||
[ Markdown.toHtml []
|
||||
"""Docspell consists of several components. The easiest way to get started is probably to use docker and
|
||||
[docker-compose](https://docs.docker.com/compose/)."""
|
||||
, Markdown.toHtml []
|
||||
("""1. Clone the github repository
|
||||
```bash
|
||||
$ git clone https://github.com/eikek/docspell
|
||||
```
|
||||
Alternatively, [download](https://github.com/eikek/docspell/archive/v"""
|
||||
++ version
|
||||
++ """.zip) the sources and extract the zip file.
|
||||
2. Change into the `docker` directory:
|
||||
```bash
|
||||
$ cd docspell/docker
|
||||
```
|
||||
3. Run `docker-compose up`:
|
||||
|
||||
```bash
|
||||
$ export DOCSPELL_HEADER_VALUE="my-secret-123"
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
The environment variable defines a secret that is shared between
|
||||
some containers. You can define whatever you like. Please see the
|
||||
[`consumedir.sh`](doc/tools/consumedir#docker) docs for additional
|
||||
info.
|
||||
4. Goto <http://localhost:7880>, signup and login. When signing up,
|
||||
you can choose the same name for collective and user. Then login
|
||||
with this name and the password.
|
||||
|
||||
5. (Optional) Create a folder `./docs/<collective-name>` (the name you
|
||||
chose for the collective at registration) and place files in there
|
||||
for importing them.
|
||||
|
||||
The directory contains a file `docspell.conf` that you can
|
||||
[modify](docs/configure) as needed.
|
||||
"""
|
||||
)
|
||||
]
|
||||
, div [ class "content container" ]
|
||||
[ div [ class "notification is-info is-light" ]
|
||||
[ text "If you don't use docker, there are other ways that are "
|
||||
, text "described in the relevant "
|
||||
, a [ href "/docs/install" ]
|
||||
[ text "documentation page"
|
||||
]
|
||||
]
|
||||
]
|
||||
, div [ class "content container" ]
|
||||
[ div [ class "notification is-success is-light" ]
|
||||
[ div [ class "content is-medium" ]
|
||||
[ h3 [ class "title" ]
|
||||
[ text "Where to go from here?"
|
||||
]
|
||||
, ul []
|
||||
[ li []
|
||||
[ text "The "
|
||||
, a [ href "/docs/intro" ]
|
||||
[ text "introduction" ]
|
||||
, text " writes about the goals and basic idea."
|
||||
]
|
||||
, li []
|
||||
[ text "There is a comprehensive "
|
||||
, a [ href "/docs" ]
|
||||
[ text "documentation"
|
||||
]
|
||||
, text " available."
|
||||
]
|
||||
, li []
|
||||
[ text "The source code is hosted on "
|
||||
, a [ href "https://github.com/eikek/docspell" ]
|
||||
[ text "github"
|
||||
]
|
||||
, text "."
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
54
website/elm/Icons.elm
Normal file
@ -0,0 +1,54 @@
|
||||
module Icons exposing (..)
|
||||
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
|
||||
|
||||
copyright : Html msg
|
||||
copyright =
|
||||
img [ src "icons/copyright-40.svg" ] []
|
||||
|
||||
|
||||
infoSquared : Html msg
|
||||
infoSquared =
|
||||
img [ src "icons/info-square-40.svg" ] []
|
||||
|
||||
|
||||
refresh : Html msg
|
||||
refresh =
|
||||
img [ src "icons/refresh-40.svg" ] []
|
||||
|
||||
|
||||
logo : Html msg
|
||||
logo =
|
||||
img [ src "icons/logo-only-36.svg" ] []
|
||||
|
||||
|
||||
logoMC : Html msg
|
||||
logoMC =
|
||||
img [ src "icons/logo-only-mc.svg" ] []
|
||||
|
||||
|
||||
logoWidth : Int -> Html msg
|
||||
logoWidth w =
|
||||
img [ src "icons/logo-only.svg", width w ] []
|
||||
|
||||
|
||||
home : Html msg
|
||||
home =
|
||||
img [ src "icons/home-40.svg" ] []
|
||||
|
||||
|
||||
docs : Html msg
|
||||
docs =
|
||||
img [ src "icons/notes-40.svg" ] []
|
||||
|
||||
|
||||
github : Html msg
|
||||
github =
|
||||
img [ src "/icons/github-40.svg" ] []
|
||||
|
||||
|
||||
githubGreen : Html msg
|
||||
githubGreen =
|
||||
img [ src "/icons/github-40-green.svg" ] []
|
324
website/elm/Main.elm
Normal file
@ -0,0 +1,324 @@
|
||||
module Main exposing (..)
|
||||
|
||||
import Browser exposing (Document)
|
||||
import Browser.Navigation exposing (Key)
|
||||
import ExtraAttr exposing (..)
|
||||
import Feature exposing (Feature)
|
||||
import GetStarted
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
import Html.Events exposing (..)
|
||||
import Icons
|
||||
import Random
|
||||
import Random.List
|
||||
|
||||
|
||||
|
||||
-- MAIN
|
||||
|
||||
|
||||
main : Program Flags Model Msg
|
||||
main =
|
||||
Browser.element
|
||||
{ init = init
|
||||
, view = view
|
||||
, update = update
|
||||
, subscriptions = subscriptions
|
||||
}
|
||||
|
||||
|
||||
|
||||
--- Model
|
||||
|
||||
|
||||
type alias Flags =
|
||||
{ version : String
|
||||
}
|
||||
|
||||
|
||||
type alias Model =
|
||||
{ navbarOpen : Bool
|
||||
, features : List Feature
|
||||
, flags : Flags
|
||||
}
|
||||
|
||||
|
||||
type Msg
|
||||
= ToggleNavbarMenu
|
||||
| ShuffleFeatures
|
||||
| ListShuffled (List Feature)
|
||||
|
||||
|
||||
|
||||
--- Init
|
||||
|
||||
|
||||
viewFeatureCount : Int
|
||||
viewFeatureCount =
|
||||
10
|
||||
|
||||
|
||||
init : Flags -> ( Model, Cmd Msg )
|
||||
init flags =
|
||||
( { navbarOpen = False
|
||||
, features = List.take viewFeatureCount Feature.features
|
||||
, flags = flags
|
||||
}
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
|
||||
shuffleFeatures : Cmd Msg
|
||||
shuffleFeatures =
|
||||
Random.List.shuffle Feature.features
|
||||
|> Random.map (List.take viewFeatureCount)
|
||||
|> Random.generate ListShuffled
|
||||
|
||||
|
||||
|
||||
--- Update
|
||||
|
||||
|
||||
update : Msg -> Model -> ( Model, Cmd Msg )
|
||||
update msg model =
|
||||
case msg of
|
||||
ToggleNavbarMenu ->
|
||||
( { model | navbarOpen = not model.navbarOpen }
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
ShuffleFeatures ->
|
||||
( model, shuffleFeatures )
|
||||
|
||||
ListShuffled lf ->
|
||||
( { model | features = lf }
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
|
||||
subscriptions : Model -> Sub Msg
|
||||
subscriptions _ =
|
||||
Sub.none
|
||||
|
||||
|
||||
|
||||
--- View
|
||||
|
||||
|
||||
view : Model -> Html Msg
|
||||
view model =
|
||||
node "body"
|
||||
[]
|
||||
[ mainHero model
|
||||
, featureHero model
|
||||
, section [ class "section" ]
|
||||
[ div [ class "container" ]
|
||||
(List.indexedMap Feature.featureBox model.features
|
||||
++ [ div [ class "columns box" ]
|
||||
[ div [ class "column is-full" ]
|
||||
[ div [ class "content has-text-centered is-medium" ]
|
||||
[ text "A more complete list can be found in "
|
||||
, a [ href "/docs/features" ] [ text "here" ]
|
||||
, text "."
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
)
|
||||
]
|
||||
, getStartedHero model
|
||||
, div [ class "section" ]
|
||||
(GetStarted.getStarted model.flags.version)
|
||||
, footHero model
|
||||
]
|
||||
|
||||
|
||||
footHero : Model -> Html Msg
|
||||
footHero model =
|
||||
footer
|
||||
[ id "footer"
|
||||
, class "footer"
|
||||
]
|
||||
[ div [ class "has-text-centered" ]
|
||||
[ span []
|
||||
[ text ("Docspell, " ++ model.flags.version)
|
||||
]
|
||||
, span [ class "pr-1 pl-1" ]
|
||||
[ text " • "
|
||||
]
|
||||
, a
|
||||
[ href "https://spdx.org/licenses/GPL-3.0-or-later.html"
|
||||
, target "_blank"
|
||||
]
|
||||
[ text "GPLv3+"
|
||||
]
|
||||
, span [ class "pr-1 pl-1" ]
|
||||
[ text " • "
|
||||
]
|
||||
, a
|
||||
[ href "https://github.com/eikek/docspell"
|
||||
, target "_blank"
|
||||
]
|
||||
[ text "Source Code"
|
||||
]
|
||||
, span [ class "pr-1 pl-1" ]
|
||||
[ text " • "
|
||||
]
|
||||
, span []
|
||||
[ text "© 2020 "
|
||||
]
|
||||
, a
|
||||
[ href "https://github.com/eikek"
|
||||
, target "_blank"
|
||||
]
|
||||
[ text "@eikek"
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
getStartedHero : Model -> Html Msg
|
||||
getStartedHero _ =
|
||||
section
|
||||
[ id "get-started"
|
||||
, class "hero is-primary is-bold"
|
||||
]
|
||||
[ div [ class "hero-body" ]
|
||||
[ div [ class "container" ]
|
||||
[ h2 [ class "title" ]
|
||||
[ text "Get Started"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
featureHero : Model -> Html Msg
|
||||
featureHero model =
|
||||
section
|
||||
[ id "feature-selection"
|
||||
, class "hero is-info is-bold"
|
||||
]
|
||||
[ div
|
||||
[ class "hero-body"
|
||||
]
|
||||
[ div [ class "container" ]
|
||||
[ h2 [ class "title" ]
|
||||
[ text "Feature Selection"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
mainHero : Model -> Html Msg
|
||||
mainHero model =
|
||||
section
|
||||
[ id "hero-main"
|
||||
, class "hero is-fullheight is-primary"
|
||||
]
|
||||
[ div [ class "hero-head" ]
|
||||
[ nav [ class "navbar" ]
|
||||
[ div [ class "navbar-brand" ]
|
||||
[ a
|
||||
[ class "navbar-item"
|
||||
, href "/"
|
||||
]
|
||||
[ span [ class "icon is-large" ]
|
||||
[ Icons.logo
|
||||
]
|
||||
, text "Docspell"
|
||||
]
|
||||
, a
|
||||
[ role "button"
|
||||
, onClick ToggleNavbarMenu
|
||||
, classList
|
||||
[ ( "navbar-burger", True )
|
||||
, ( "is-active", model.navbarOpen )
|
||||
]
|
||||
, ariaLabel "menu"
|
||||
, ariaExpanded False
|
||||
]
|
||||
[ span [ ariaHidden True ] []
|
||||
, span [ ariaHidden True ] []
|
||||
, span [ ariaHidden True ] []
|
||||
]
|
||||
]
|
||||
, div
|
||||
[ classList
|
||||
[ ( "navbar-menu", True )
|
||||
, ( "is-active", model.navbarOpen )
|
||||
]
|
||||
]
|
||||
[ div [ class "navbar-start" ]
|
||||
[ a
|
||||
[ href "docs/"
|
||||
, class "navbar-item"
|
||||
]
|
||||
[ span [ class "icon" ]
|
||||
[ Icons.docs
|
||||
]
|
||||
, span []
|
||||
[ text "Documentation"
|
||||
]
|
||||
]
|
||||
, a
|
||||
[ target "_blank"
|
||||
, href "https://github.com/eikek/docspell"
|
||||
, class "navbar-item"
|
||||
]
|
||||
[ span [ class "icon" ]
|
||||
[ Icons.github
|
||||
]
|
||||
, span []
|
||||
[ text "Github"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
, div [ class "hero-body" ]
|
||||
[ div
|
||||
[ class "container has-text-centered"
|
||||
]
|
||||
[ Icons.logoWidth 112
|
||||
, h1 [ class "title main-title is-2" ]
|
||||
[ text "Docspell"
|
||||
]
|
||||
, h2 [ class "subtitle is-3" ]
|
||||
[ text "Simple document organizer"
|
||||
]
|
||||
, p [ class "content is-medium" ]
|
||||
[ text "Docspell can assist in organizing your piles of "
|
||||
, text "digital documents, resulting from scanners, e-mails "
|
||||
, text "and other sources with miminal effort."
|
||||
]
|
||||
, div [ class " buttons is-centered" ]
|
||||
[ a
|
||||
[ class "button is-primary is-medium"
|
||||
, href "#get-started"
|
||||
]
|
||||
[ text "Get Started"
|
||||
]
|
||||
, a
|
||||
[ class "button is-info is-medium"
|
||||
, href "#feature-selection"
|
||||
]
|
||||
[ text "Features"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
, div [ class "hero-foot" ]
|
||||
[ span [ class "unsplash-credit" ]
|
||||
[ text "Photo by "
|
||||
, a
|
||||
[ href "https://unsplash.com/@numericcitizen"
|
||||
, target "_blank"
|
||||
]
|
||||
[ text "JF Martin"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
6
website/package.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"license": "GPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
"bulma": "^0.9.0"
|
||||
}
|
||||
}
|
9
website/run-elm.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CMD="elm make --output site/static/js/bundle.js --optimize elm/Main.elm"
|
||||
$CMD
|
||||
|
||||
inotifywait -m -e close_write -r elm/ |
|
||||
while read f; do
|
||||
$CMD
|
||||
done
|
15
website/shell.nix
Normal file
@ -0,0 +1,15 @@
|
||||
let
|
||||
nixpkgsUnstable = builtins.fetchTarball {
|
||||
url = "https://github.com/NixOS/nixpkgs-channels/archive/nixos-unstable.tar.gz";
|
||||
};
|
||||
pkgsUnstable = import nixpkgsUnstable { };
|
||||
in
|
||||
with pkgsUnstable;
|
||||
|
||||
mkShell {
|
||||
buildInputs = [
|
||||
zola
|
||||
yarn
|
||||
inotifyTools
|
||||
];
|
||||
}
|
29
website/site/config.toml
Normal file
@ -0,0 +1,29 @@
|
||||
# The URL the site will be built for
|
||||
base_url = "https://docspell.org"
|
||||
|
||||
# Whether to automatically compile all Sass files in the sass directory
|
||||
compile_sass = true
|
||||
|
||||
# Whether to do syntax highlighting
|
||||
# Theme can be customised by setting the `highlight_theme` variable to a theme supported by Zola
|
||||
highlight_code = true
|
||||
|
||||
highlight_theme = "gruvbox-dark"
|
||||
|
||||
# Whether to build a search index to be used later on by a JavaScript library
|
||||
build_search_index = true
|
||||
|
||||
[link_checker]
|
||||
skip_prefixes = [
|
||||
"http://localhost",
|
||||
"/openapi",
|
||||
"https://www.abisource.com" # has bad ssl config
|
||||
]
|
||||
skip_anchor_prefixes = [
|
||||
"https://github.com",
|
||||
"https://package.elm-lang.org"
|
||||
]
|
||||
|
||||
[extra]
|
||||
# Put all your custom variables here
|
||||
version = "0.9.0-SNAPSHOT"
|
3
website/site/content/doc/_index.md
Normal file
@ -0,0 +1,3 @@
|
||||
+++
|
||||
redirect_to = "/docs"
|
||||
+++
|
9
website/site/content/docs/_index.md
Normal file
@ -0,0 +1,9 @@
|
||||
+++
|
||||
title = "Overview"
|
||||
template = "overview.html"
|
||||
insert_anchor_links = "right"
|
||||
+++
|
||||
|
||||
# Note
|
||||
|
||||
This content is not rendered. Everything is in the template.
|
@ -1,19 +1,20 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Api
|
||||
permalink: api
|
||||
---
|
||||
|
||||
# {{page.title}}
|
||||
+++
|
||||
title = "Api"
|
||||
description = "Contains documentation about the REST API."
|
||||
weight = 70
|
||||
insert_anchor_links = "right"
|
||||
[extra]
|
||||
mktoc = true
|
||||
+++
|
||||
|
||||
Docspell is designed as a REST server that uses JSON to exchange
|
||||
data. The REST api can be used to integrate docspell into your
|
||||
workflow.
|
||||
|
||||
[Docspell REST Api Doc](openapi/docspell-openapi.html)
|
||||
[Docspell REST Api Doc](/openapi/docspell-openapi.html)
|
||||
|
||||
The "raw" `openapi.yml` specification file can be found
|
||||
[here](openapi/docspell-openapi.yml).
|
||||
[here](/openapi/docspell-openapi.yml).
|
||||
|
||||
The routes can be divided into protected and unprotected routes. The
|
||||
unprotected, or open routes are at `/open/*` while the protected
|
||||
@ -56,7 +57,7 @@ These examples use the great command line tool
|
||||
|
||||
### Login
|
||||
|
||||
```
|
||||
``` bash
|
||||
$ curl -X POST -d '{"account": "smith", "password": "test"}' http://localhost:7880/api/v1/open/auth/login
|
||||
{"collective":"smith"
|
||||
,"user":"smith"
|
||||
@ -69,7 +70,7 @@ $ curl -X POST -d '{"account": "smith", "password": "test"}' http://localhost:78
|
||||
|
||||
### Get new token
|
||||
|
||||
```
|
||||
``` bash
|
||||
$ curl -XPOST -H 'X-Docspell-Auth: 1568142350115-ZWlrZS9laWtl-$2a$10$rGZUFDAVNIKh4Tj6u6tlI.-O2euwCvmBT0TlyDmIHR1ZsLQPAI=' http://localhost:7880/api/v1/sec/auth/session
|
||||
{"collective":"smith"
|
||||
,"user":"smith"
|
||||
@ -82,7 +83,7 @@ $ curl -XPOST -H 'X-Docspell-Auth: 1568142350115-ZWlrZS9laWtl-$2a$10$rGZUFDAVNIK
|
||||
|
||||
### Get some insights
|
||||
|
||||
```
|
||||
``` bash
|
||||
$ curl -H 'X-Docspell-Auth: 1568142446077-ZWlrZS9laWtl-$2a$10$3B0teJ9rMpsBJPzHfZZPoO-WeA1bkfEONBN8fyzWE8DeaAHtUc=' http://localhost:7880/api/v1/sec/collective/insights
|
||||
{"incomingCount":3
|
||||
,"outgoingCount":1
|
@ -1,10 +1,11 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Configuring
|
||||
permalink: doc/configure
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
+++
|
||||
title = "Configuration"
|
||||
insert_anchor_links = "right"
|
||||
description = "There are several tools distributed with docspell, like a program to watch a folder and import files to docspell."
|
||||
weight = 40
|
||||
[extra]
|
||||
mktoc = true
|
||||
+++
|
||||
|
||||
Docspell's executable can take one argument – a configuration file. If
|
||||
that is not given, the defaults are used. The config file overrides
|
||||
@ -13,13 +14,13 @@ necessary.
|
||||
|
||||
This applies to the restserver and the joex as well.
|
||||
|
||||
## Important Config Options
|
||||
# Important Config Options
|
||||
|
||||
The configuration of both components uses separate namespaces. The
|
||||
configuration for the REST server is below `docspell.server`, while
|
||||
the one for joex is below `docspell.joex`.
|
||||
|
||||
### JDBC
|
||||
## JDBC
|
||||
|
||||
This configures the connection to the database. This has to be
|
||||
specified for the rest server and joex. By default, a H2 database in
|
||||
@ -27,7 +28,7 @@ the current `/tmp` directory is configured.
|
||||
|
||||
The config looks like this (both components):
|
||||
|
||||
```
|
||||
``` conf
|
||||
docspell.joex.jdbc {
|
||||
url = ...
|
||||
user = ...
|
||||
@ -55,7 +56,7 @@ include these options:
|
||||
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
|
||||
```
|
||||
|
||||
#### Examples
|
||||
### Examples
|
||||
|
||||
PostgreSQL:
|
||||
```
|
||||
@ -73,14 +74,14 @@ url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_
|
||||
```
|
||||
|
||||
|
||||
### Full-Text Search: SOLR
|
||||
## Full-Text Search: SOLR
|
||||
|
||||
[Apache SOLR](https://lucene.apache.org/solr) is used to provide the
|
||||
full-text search. Both docspell components must provide the same
|
||||
connection setup. This is defined in the `full-text-search.solr`
|
||||
subsection:
|
||||
|
||||
```
|
||||
``` conf
|
||||
...
|
||||
full-text-search {
|
||||
enabled = true
|
||||
@ -108,7 +109,7 @@ While the `full-text-search.solr` options are the same for joex and
|
||||
the restserver, there are some settings that differ. The restserver
|
||||
has this additional setting, that may be of interest:
|
||||
|
||||
```
|
||||
``` conf
|
||||
full-text-search {
|
||||
recreate-key = "test123"
|
||||
}
|
||||
@ -117,7 +118,7 @@ full-text-search {
|
||||
This key is required if you want docspell to drop and re-create the
|
||||
entire index. This is possible via a REST call:
|
||||
|
||||
``` shell
|
||||
``` bash
|
||||
$ curl -XPOST http://localhost:7880/api/v1/open/fts/reIndexAll/test123
|
||||
```
|
||||
|
||||
@ -138,13 +139,13 @@ The solr index doesn't contain any new information, it can be
|
||||
regenerated any time using the above REST call. Thus it doesn't need
|
||||
to be backed up.
|
||||
|
||||
### Bind
|
||||
## Bind
|
||||
|
||||
The host and port the http server binds to. This applies to both
|
||||
components. The joex component also exposes a small REST api to
|
||||
inspect its state and notify the scheduler.
|
||||
|
||||
```
|
||||
``` conf
|
||||
docspell.server.bind {
|
||||
address = localhost
|
||||
port = 7880
|
||||
@ -158,7 +159,7 @@ docspell.joex.bind {
|
||||
By default, it binds to `localhost` and some predefined port. This
|
||||
must be changed, if components are on different machines.
|
||||
|
||||
### baseurl
|
||||
## Baseurl
|
||||
|
||||
The base url is an important setting that defines the http URL where
|
||||
the corresponding component can be reached. It applies to both
|
||||
@ -174,7 +175,7 @@ docspell.server.baseurl = ...
|
||||
docspell.joex.baseurl = ...
|
||||
```
|
||||
|
||||
#### Examples
|
||||
### Examples
|
||||
|
||||
```
|
||||
docspell.server.baseurl = "https://docspell.example.com"
|
||||
@ -182,19 +183,19 @@ docspell.joex.baseurl = "http://192.168.101.10"
|
||||
```
|
||||
|
||||
|
||||
### app-id
|
||||
## App-id
|
||||
|
||||
The `app-id` is the identifier of the corresponding instance. It *must
|
||||
be unique* for all instances. By default the REST server uses `rest1`
|
||||
and joex `joex1`. It is recommended to overwrite this setting to have
|
||||
an explicit and stable identifier.
|
||||
|
||||
```
|
||||
``` conf
|
||||
docspell.server.app-id = "rest1"
|
||||
docspell.joex.app-id = "joex1"
|
||||
```
|
||||
|
||||
### registration options
|
||||
## Registration Options
|
||||
|
||||
This defines if and how new users can create accounts. There are 3
|
||||
options:
|
||||
@ -205,7 +206,7 @@ options:
|
||||
|
||||
This applies only to the REST sevrer component.
|
||||
|
||||
```
|
||||
``` conf
|
||||
docspell.server.signup {
|
||||
mode = "open"
|
||||
|
||||
@ -227,11 +228,11 @@ this. If it is not set, then invitation won't work. New invitation
|
||||
keys can be generated from within the web application or via REST
|
||||
calls (using `curl`, for example).
|
||||
|
||||
```
|
||||
``` bash
|
||||
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
|
||||
```
|
||||
|
||||
### Authentication
|
||||
## Authentication
|
||||
|
||||
Authentication works in two ways:
|
||||
|
||||
@ -245,7 +246,7 @@ token can be given as a normal http header or via a cookie header.
|
||||
|
||||
These settings apply only to the REST server.
|
||||
|
||||
```
|
||||
``` conf
|
||||
docspell.server.auth {
|
||||
server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
|
||||
session-valid = "5 minutes"
|
||||
@ -264,7 +265,7 @@ just some minutes, the web application obtains new ones
|
||||
periodically. So a short time is recommended.
|
||||
|
||||
|
||||
## File Format
|
||||
# File Format
|
||||
|
||||
The format of the configuration files can be
|
||||
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
|
||||
@ -272,27 +273,25 @@ JSON or whatever the used [config
|
||||
library](https://github.com/lightbend/config) understands. The default
|
||||
values below are in HOCON format, which is recommended, since it
|
||||
allows comments and has some [advanced
|
||||
features](https://github.com/lightbend/config/blob/master/README.md#features-of-hocon). Please
|
||||
refer to their documentation for more on this.
|
||||
features](https://github.com/lightbend/config#features-of-hocon).
|
||||
Please refer to their documentation for more on this.
|
||||
|
||||
Here are the default configurations.
|
||||
|
||||
|
||||
## Default Config
|
||||
# Default Config
|
||||
## Rest Server
|
||||
|
||||
### Rest Server
|
||||
{{ incl_conf(path="templates/shortcodes/server.conf") }}
|
||||
|
||||
```
|
||||
{% include server.conf %}
|
||||
```
|
||||
|
||||
### Joex
|
||||
## Joex
|
||||
|
||||
```
|
||||
{% include joex.conf %}
|
||||
```
|
||||
|
||||
## Logging
|
||||
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
|
||||
|
||||
|
||||
# Logging
|
||||
|
||||
By default, docspell logs to stdout. This works well, when managed by
|
||||
systemd or other inits. Logging is done by
|
||||
@ -302,7 +301,7 @@ for how to configure logging.
|
||||
If you created your logback config file, it can be added as argument
|
||||
to the executable using this syntax:
|
||||
|
||||
```
|
||||
``` bash
|
||||
/path/to/docspell -Dlogback.configurationFile=/path/to/your/logging-config-file
|
||||
```
|
||||
|
9
website/site/content/docs/dev/_index.md
Normal file
@ -0,0 +1,9 @@
|
||||
+++
|
||||
title = "Development"
|
||||
description = "Contains build instructions and other internal notes."
|
||||
weight = 300
|
||||
sort_by = "weight"
|
||||
insert_anchor_links = "right"
|
||||
template = "pages.html"
|
||||
redirect_to = "/docs/dev/building"
|
||||
+++
|
@ -1,18 +1,15 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Use Markdown Architectural Decision Records
|
||||
permalink: dev/adr/0000
|
||||
---
|
||||
+++
|
||||
title = "Use Markdown Architectural Decision Records"
|
||||
weight = 10
|
||||
+++
|
||||
|
||||
# Use Markdown Architectural Decision Records
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
We want to [record architectural decisions](https://adr.github.io/)
|
||||
made in this project. Which format and structure should these records
|
||||
follow?
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [MADR](https://adr.github.io/madr/) 2.1.0 - The Markdown Architectural Decision Records
|
||||
* [Michael Nygard's template](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions) - The first incarnation of the term "ADR"
|
||||
@ -23,7 +20,7 @@ follow?
|
||||
<https://github.com/joelparkerhenderson/architecture_decision_record>
|
||||
* Formless - No conventions for file format and structure
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
Chosen option: "MADR 2.1.0", because
|
||||
|
@ -1,29 +1,26 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Components
|
||||
permalink: dev/adr/0001_components
|
||||
---
|
||||
+++
|
||||
title = "Components"
|
||||
weight = 20
|
||||
+++
|
||||
|
||||
# Components
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
How should the application be structured into its main components? The
|
||||
goal is to be able to have multiple rest servers/webapps and multiple
|
||||
document processor components working togehter.
|
||||
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
The following are the "main" modules. There may be more helper modules
|
||||
and libraries that support implementing a feature.
|
||||
|
||||
### store
|
||||
## store
|
||||
|
||||
The code related to database access. It also provides the job
|
||||
queue. It is designed as a library.
|
||||
|
||||
### joex
|
||||
## joex
|
||||
|
||||
Joex stands for "job executor".
|
||||
|
||||
@ -37,7 +34,7 @@ It provides the document processing code.
|
||||
It provides a http rest server to get insight into the joex state
|
||||
and also to be notified for new jobs.
|
||||
|
||||
### backend
|
||||
## backend
|
||||
|
||||
It provides all the logic, except document processing, as a set of
|
||||
"operations". An operation can be directly mapped to a rest
|
||||
@ -45,7 +42,7 @@ endpoint.
|
||||
|
||||
It is designed as a library.
|
||||
|
||||
### rest api
|
||||
## rest api
|
||||
|
||||
This module contains the specification for the rest server as an
|
||||
`openapi.yml` file. It is packaged as a scala library that also
|
||||
@ -54,7 +51,7 @@ provides types and conversions to/from json.
|
||||
The idea is that the `rest server` module can depend on it as well as
|
||||
rest clients.
|
||||
|
||||
### rest server
|
||||
## rest server
|
||||
|
||||
This is the main application. It directly depends on the `backend`
|
||||
module, and each rest endpoint maps to a "backend operation". It is
|
||||
@ -62,6 +59,6 @@ also responsible for converting the json data inside http requests
|
||||
to/from types recognized by the `backend` module.
|
||||
|
||||
|
||||
### webapp
|
||||
## webapp
|
||||
|
||||
This module provides the user interface as a web application.
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Component Interaction
|
||||
permalink: dev/adr/0002_component_interaction
|
||||
---
|
||||
+++
|
||||
title = "Component Interaction"
|
||||
weight = 30
|
||||
+++
|
||||
|
||||
# Component Interaction
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
There are multiple web applications with their rest servers and there
|
||||
are multiple document processors. These processes must communicate:
|
||||
@ -18,14 +15,14 @@ are multiple document processors. These processes must communicate:
|
||||
user that is currently logged in. Since it's not known which
|
||||
rest-server the user is using right now, all must be notified.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
1. JMS (ActiveMQ or similiar): Message Broker as another active
|
||||
component
|
||||
2. Akka: using a cluster
|
||||
3. DB: Register with "call back urls"
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
Choosing option 3: DB as central synchronisation point.
|
||||
|
||||
@ -42,7 +39,7 @@ It works roughly like this:
|
||||
- each component has db access
|
||||
- rest servers can list all processors and vice versa
|
||||
|
||||
### Positive Consequences
|
||||
## Positive Consequences
|
||||
|
||||
- complexity of the whole application is not touched
|
||||
- since a lot of data must be transferred to the document processors,
|
||||
@ -53,7 +50,7 @@ It works roughly like this:
|
||||
- no additional knowledge required
|
||||
- simple to understand and so not hard to debug
|
||||
|
||||
### Negative Consequences
|
||||
## Negative Consequences
|
||||
|
||||
- all components must have db access. this also is a security con,
|
||||
because if one of those processes is hacked, db access is
|
@ -1,13 +1,10 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Encryption
|
||||
permalink: dev/adr/0003_encryption
|
||||
---
|
||||
|
||||
# Encryption
|
||||
+++
|
||||
title = "Encryption"
|
||||
weight = 40
|
||||
+++
|
||||
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Since docspell may store important documents, it should be possible to
|
||||
encrypt them on the server. It should be (almost) transparent to the
|
||||
@ -29,21 +26,21 @@ So in short, encryption means:
|
||||
such that files can be downloaded in clear form
|
||||
|
||||
|
||||
## Decision Drivers
|
||||
# Decision Drivers
|
||||
|
||||
* major driver is to provide most possible privacy for users
|
||||
* even at the expense of less features; currently I think that the
|
||||
associated meta data is enough for finding documents (i.e. full text
|
||||
search is not needed)
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
It is clear, that only blobs (file contents) can be encrypted, but not
|
||||
the associated metadata. And the extracted text must be encrypted,
|
||||
too, obviously.
|
||||
|
||||
|
||||
### Public Key Encryption (PKE)
|
||||
## Public Key Encryption (PKE)
|
||||
|
||||
With PKE that the server can automatically encrypt files using
|
||||
publicly available key data. It wouldn't require a user to provide a
|
||||
@ -68,7 +65,7 @@ attacks and also for accidental leakage (for example, if a bug in the
|
||||
software would access a file of another user).
|
||||
|
||||
|
||||
### No Encryption
|
||||
## No Encryption
|
||||
|
||||
If only blobs are encrypted, against which type of attack would it
|
||||
provide protection?
|
||||
@ -87,7 +84,7 @@ But the downside is, that it increases complexity *a lot*. And since
|
||||
this is a personal tool for personal use, is it worth the effort?
|
||||
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
No encryption, because of its complexity.
|
||||
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: ISO8601 vs Millis
|
||||
permalink: dev/adr/0004_iso8601vsEpoch
|
||||
---
|
||||
+++
|
||||
title = "ISO8601 vs Millis as Date-Time transfer"
|
||||
weight = 50
|
||||
+++
|
||||
|
||||
# ISO8601 vs Millis as Date-Time transfer
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
The question is whether the REST Api should return an ISO8601
|
||||
formatted string in UTC timezone, or the unix time (number of
|
||||
@ -29,7 +26,7 @@ information in a ISO8601 string than in the epoch millis.
|
||||
|
||||
To avoid confusion, all date/time values should use the same encoding.
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
I go with the epoch time. Every timestamp/date-time values is
|
||||
transfered as Unix timestamp.
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Joex - Job Executor
|
||||
permalink: dev/adr/0005_job-executor
|
||||
---
|
||||
+++
|
||||
title = "Joex - Job Executor"
|
||||
weight = 60
|
||||
+++
|
||||
|
||||
# Job Executor
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Docspell is a multi-user application. When processing user's
|
||||
documents, there must be some thought on how to distribute all the
|
||||
@ -40,7 +37,7 @@ associated task. This is used to run the document processing jobs
|
||||
program was not installed or whatever). In such a case it is good
|
||||
to know that the task will be retried later.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
In contrast to other ADRs this is just some sketching of thoughts for
|
||||
the current implementation.
|
||||
@ -71,7 +68,7 @@ the current implementation.
|
||||
jobs that are due at the specific point in time ignoring the
|
||||
priority hint.
|
||||
|
||||
### More Details
|
||||
## More Details
|
||||
|
||||
A job has these properties
|
||||
|
||||
@ -115,7 +112,7 @@ Once there are no jobs in the queue the executor goes into sleep and
|
||||
must be waked to run again. If a job is submitted, the executors are
|
||||
notified.
|
||||
|
||||
### Stuck Jobs
|
||||
## Stuck Jobs
|
||||
|
||||
A job is going into _stuck_ state, if the task has failed. In this
|
||||
state, the task is rerun after a while until a maximum retry count is
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: More File Types
|
||||
permalink: dev/adr/0006_more-file-types
|
||||
---
|
||||
+++
|
||||
title = "More File Types"
|
||||
weight = 70
|
||||
+++
|
||||
|
||||
# More File Types
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Docspell currently only supports PDF files. This has simplified early
|
||||
development and design a lot and so helped with starting the project.
|
||||
@ -38,7 +35,7 @@ since the conversion may not always create best results, the original
|
||||
file should be kept.
|
||||
|
||||
|
||||
## Decision Drivers
|
||||
# Decision Drivers
|
||||
|
||||
People expect that software like Docspell support the most common
|
||||
document types, like all the “office documents” (`docx`, `rtf`, `odt`,
|
||||
@ -47,15 +44,15 @@ those files instead of PDF. Some (older) scanners may not be able to
|
||||
scan into PDF files but only to image files.
|
||||
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
This ADR does not evaluate different options. It rather documents why
|
||||
this feature is realized and the thoughts that lead to how it is
|
||||
implemented.
|
||||
|
||||
## Realization
|
||||
# Realization
|
||||
|
||||
### Data Model
|
||||
## Data Model
|
||||
|
||||
The `attachment` table holds one file. There will be another table
|
||||
`attachment_source` that holds the original file. It looks like this:
|
||||
@ -90,7 +87,7 @@ pointing to an `attachment_source` is also not correct, because it
|
||||
allows the same `attachment_source` record to be associated with many
|
||||
`attachment` records. This would do even more harm, in my opinion.
|
||||
|
||||
### Migration
|
||||
## Migration
|
||||
|
||||
Creating a new table and not altering existing ones, should simplify
|
||||
data migration.
|
||||
@ -101,7 +98,7 @@ inserted into the new table. This presents the trivial case where the
|
||||
attachment and source are the same.
|
||||
|
||||
|
||||
### Processing
|
||||
## Processing
|
||||
|
||||
The first step in processing is now converting the file into a pdf. If
|
||||
it already is a pdf, nothing is done. This step is before text
|
||||
@ -113,7 +110,7 @@ If conversion is not supported for the input file, it is skipped. If
|
||||
conversion fails, the error is propagated to let the retry mechanism
|
||||
take care.
|
||||
|
||||
#### What types?
|
||||
### What types?
|
||||
|
||||
Which file types should be supported? At a first step, all major
|
||||
office documents, common images, plain text (i.e. markdown) and html
|
||||
@ -124,20 +121,18 @@ There is always the preference to use jvm internal libraries in order
|
||||
to be more platform independent and to reduce external dependencies.
|
||||
But this is not always possible (like doing OCR).
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/process-files.png" title="Overview processing files">
|
||||
</div>
|
||||
{{ figure(file="process-files.png") }}
|
||||
|
||||
#### Conversion
|
||||
### Conversion
|
||||
|
||||
- Office documents (`doc`, `docx`, `xls`, `xlsx`, `odt`, `ods`):
|
||||
unoconv (see [ADR 9](0009_convert_office_docs))
|
||||
- HTML (`html`): wkhtmltopdf (see [ADR 7](0007_convert_html_files))
|
||||
unoconv (see [ADR 9](@/docs/dev/adr/0009_convert_office_docs.md))
|
||||
- HTML (`html`): wkhtmltopdf (see [ADR 7](@/docs/dev/adr/0007_convert_html_files.md))
|
||||
- Text/Markdown (`txt`, `md`): Java-Lib flexmark + wkhtmltopdf
|
||||
- Images (`jpg`, `png`, `tif`): Tesseract (see [ADR
|
||||
10](0010_convert_image_files))
|
||||
10](@/docs/dev/adr/0010_convert_image_files.md))
|
||||
|
||||
#### Text Extraction
|
||||
### Text Extraction
|
||||
|
||||
- Office documents (`doc`, `docx`, `xls`, `xlsx`): Apache Poi
|
||||
- Office documends (`odt`, `ods`): Apache Tika (including the sources)
|
||||
@ -146,10 +141,10 @@ But this is not always possible (like doing OCR).
|
||||
- Text/Markdown: n.a.
|
||||
- PDF: Apache PDFBox or Tesseract
|
||||
|
||||
## Links
|
||||
# Links
|
||||
|
||||
* [Convert HTML Files](0007_convert_html_files)
|
||||
* [Convert Plain Text](0008_convert_plain_text)
|
||||
* [Convert Office Documents](0009_convert_office_docs)
|
||||
* [Convert Image Files](0010_convert_image_files)
|
||||
* [Extract Text from Files](0011_extract_text)
|
||||
* [Convert HTML Files](@/docs/dev/adr/0007_convert_html_files.md)
|
||||
* [Convert Plain Text](@/docs/dev/adr/0008_convert_plain_text.md)
|
||||
* [Convert Office Documents](@/docs/dev/adr/0009_convert_office_docs.md)
|
||||
* [Convert Image Files](@/docs/dev/adr/0010_convert_image_files.md)
|
||||
* [Extract Text from Files](@/docs/dev/adr/0011_extract_text.md)
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Convert HTML Files
|
||||
permalink: dev/adr/0007_convert_html_files
|
||||
---
|
||||
+++
|
||||
title = "Convert HTML Files"
|
||||
weight = 80
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
How can HTML documents be converted into a PDF file that looks as much
|
||||
as possible like the original?
|
||||
@ -17,7 +14,7 @@ has a better outcome, then an external tool is fine, too.
|
||||
Since Docspell is free software, the tools must also be free.
|
||||
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [pandoc](https://pandoc.org/) external command
|
||||
* [wkhtmltopdf](https://wkhtmltopdf.org/) external command
|
||||
@ -25,9 +22,7 @@ Since Docspell is free software, the tools must also be free.
|
||||
|
||||
Native (firefox) view:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-html-native.jpg" title="Native view of an HTML example file">
|
||||
</div>
|
||||
{{ figure(file="example-html-native.jpg") }}
|
||||
|
||||
Note: the example html is from
|
||||
[here](https://www.sparksuite.com/open-source/invoice.html).
|
||||
@ -36,36 +31,28 @@ I downloaded the HTML file to disk together with its resources (using
|
||||
*Save as...* in the browser).
|
||||
|
||||
|
||||
### Pandoc
|
||||
## Pandoc
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-html-pandoc-latex.jpg" title="Pandoc (Latex) HTML->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-html-pandoc-latex.jpg") }}
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-html-pandoc-html.jpg" title="Pandoc (html) HTML->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-html-pandoc-html.jpg") }}
|
||||
|
||||
Not showing the version using `context` pdf-engine, since it looked
|
||||
very similiar to the latex variant.
|
||||
|
||||
|
||||
### wkhtmltopdf
|
||||
## wkhtmltopdf
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-html-wkhtmltopdf.jpg" title="wkhtmltopdf HTML->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-html-wkhtmltopdf.jpg") }}
|
||||
|
||||
|
||||
### Unoconv
|
||||
## Unoconv
|
||||
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-html-unoconv.jpg" title="Unoconv HTML->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-html-unoconv.jpg") }}
|
||||
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
wkhtmltopdf.
|
||||
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Convert Text Files
|
||||
permalink: dev/adr/0008_convert_plain_text
|
||||
---
|
||||
+++
|
||||
title = "Convert Text Files"
|
||||
weight = 90
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
How can plain text and markdown documents be converted into a PDF
|
||||
files?
|
||||
@ -97,15 +94,15 @@ the end.
|
||||
```
|
||||
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [flexmark](https://github.com/vsch/flexmark-java) for markdown to
|
||||
HTML, then use existing machinery described in [adr
|
||||
7](./0007_convert_html_files)
|
||||
7](@/docs/dev/adr/0007_convert_html_files.md)
|
||||
* [pandoc](https://pandoc.org/) external command
|
||||
|
||||
|
||||
### flexmark markdown library for java
|
||||
## flexmark markdown library for java
|
||||
|
||||
Process files with [flexmark](https://github.com/vsch/flexmark-java)
|
||||
and then create a PDF from the resulting html.
|
||||
@ -136,17 +133,13 @@ def renderMarkdown(): ExitCode = {
|
||||
Then run the result through `wkhtmltopdf`.
|
||||
|
||||
Markdown file:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-md-java.jpg" title="Flexmark/wkhtmltopdf MD->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-md-java.jpg") }}
|
||||
|
||||
TXT file:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-txt-java.jpg" title="Flexmark/wkhtmltopdf TXT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-txt-java.jpg") }}
|
||||
|
||||
|
||||
### pandoc
|
||||
## pandoc
|
||||
|
||||
Command:
|
||||
|
||||
@ -155,27 +148,19 @@ pandoc -f markdown -t html -o test.pdf microsite/docs/doc.md
|
||||
```
|
||||
|
||||
Markdown/Latex:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-md-pandoc-latex.jpg" title="Pandoc (Latex) MD->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-md-pandoc-latex.jpg") }}
|
||||
|
||||
Markdown/Html:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-md-pandoc-html.jpg" title="Pandoc (html) MD->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-md-pandoc-html.jpg") }}
|
||||
|
||||
Text/Latex:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-txt-pandoc-latex.jpg" title="Pandoc (Latex) TXT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-txt-pandoc-latex.jpg") }}
|
||||
|
||||
Text/Html:
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-txt-pandoc-html.jpg" title="Pandoc (html) TXT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-txt-pandoc-html.jpg") }}
|
||||
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
Java library "flexmark".
|
||||
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Convert Office Documents
|
||||
permalink: dev/adr/0009_convert_office_docs
|
||||
---
|
||||
+++
|
||||
title = "Convert Office Documents"
|
||||
weight = 100
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
How can office documents, like `docx` or `odt` be converted into a PDF
|
||||
file that looks as much as possible like the original?
|
||||
@ -16,13 +13,13 @@ has a better outcome, then an external tool is fine, too.
|
||||
|
||||
Since Docspell is free software, the tools must also be free.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [Apache POI](https://poi.apache.org) together with
|
||||
[this](https://search.maven.org/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6/jar)
|
||||
library
|
||||
* [pandoc](https://pandoc.org/) external command
|
||||
* [abiword]() external command
|
||||
* [abiword](https://www.abisource.com/) external command
|
||||
* [Unoconv](https://github.com/unoconv/unoconv) external command
|
||||
|
||||
To choose an option, some documents are converted to pdf and compared.
|
||||
@ -34,11 +31,9 @@ Here is the native view to compare with:
|
||||
|
||||
ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-native.jpg" title="Native view of an ODT example file">
|
||||
</div>
|
||||
{{ figure(file="example-odt-native.jpg") }}
|
||||
|
||||
### `XWPFConverter`
|
||||
## `XWPFConverter`
|
||||
|
||||
I couldn't get any example to work. There were exceptions:
|
||||
|
||||
@ -69,7 +64,7 @@ The project (not Apache Poi, the other) seems unmaintained. I could
|
||||
not find any website and the artifact in maven central is from 2016.
|
||||
|
||||
|
||||
### Pandoc
|
||||
## Pandoc
|
||||
|
||||
I know pandoc as a very great tool when converting between markup
|
||||
documents. So this tries it with office documents. It supports `docx`
|
||||
@ -93,9 +88,7 @@ pandoc -f odt -o test.pdf example.odt
|
||||
|
||||
Results ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-pandoc-latex.jpg" title="Pandoc (Latex) ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-pandoc-latex.jpg") }}
|
||||
|
||||
|
||||
```
|
||||
@ -104,9 +97,7 @@ pandoc -f odt -o test.pdf example.docx
|
||||
|
||||
Results DOCX:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-docx-pandoc-latex.jpg" title="Pandoc (Latex) DOCX->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-docx-pandoc-latex.jpg") }}
|
||||
|
||||
|
||||
----
|
||||
@ -119,16 +110,12 @@ pandoc -f odt -t context -o test.pdf example.odt
|
||||
|
||||
Results ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-pandoc-context.jpg" title="Pandoc (Context) ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-pandoc-context.jpg") }}
|
||||
|
||||
|
||||
Results DOCX:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-docx-pandoc-context.jpg" title="Pandoc (Context) DOCX->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-docx-pandoc-context.jpg") }}
|
||||
|
||||
|
||||
----
|
||||
@ -141,15 +128,11 @@ pandoc -f odt -t ms -o test.pdf example.odt
|
||||
|
||||
Results ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-pandoc-ms.jpg" title="Pandoc (MS) ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-pandoc-ms.jpg") }}
|
||||
|
||||
Results DOCX:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-docx-pandoc-ms.jpg" title="Pandoc (MS) DOCX->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-docx-pandoc-ms.jpg") }}
|
||||
|
||||
|
||||
---
|
||||
@ -162,18 +145,14 @@ $ pandoc --extract-media . -f odt -t html -o test.pdf example.odt
|
||||
|
||||
Results ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-pandoc-html.jpg" title="Pandoc (html) ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-pandoc-html.jpg") }}
|
||||
|
||||
Results DOCX:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-docx-pandoc-html.jpg" title="Pandoc (html) DOCX->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-docx-pandoc-html.jpg") }}
|
||||
|
||||
|
||||
### Abiword
|
||||
## Abiword
|
||||
|
||||
Trying with:
|
||||
|
||||
@ -183,15 +162,13 @@ abiword --to=pdf example.odt
|
||||
|
||||
Results:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-abiword.jpg" title="Abiword ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-abiword.jpg") }}
|
||||
|
||||
|
||||
Trying with a `docx` file failed. It worked with a `doc` file.
|
||||
|
||||
|
||||
### Unoconv
|
||||
## Unoconv
|
||||
|
||||
Unoconv relies on libreoffice/openoffice, so installing it will result
|
||||
in installing parts of libreoffice, which is a very large dependency.
|
||||
@ -204,17 +181,13 @@ unoconv -f pdf example.odt
|
||||
|
||||
Results ODT:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-odt-unoconv.jpg" title="Unoconv ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-odt-unoconv.jpg") }}
|
||||
|
||||
Results DOCX:
|
||||
|
||||
<div class="thumbnail">
|
||||
<img src="./img/example-docx-unoconv.jpg" title="Unoconv ODT->PDF">
|
||||
</div>
|
||||
{{ figure(file="example-docx-unoconv.jpg") }}
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
Unoconv.
|
||||
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Convert Image Files
|
||||
permalink: dev/adr/0010_convert_image_files
|
||||
---
|
||||
+++
|
||||
title = "Convert Image Files"
|
||||
weight = 110
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
How to convert image files properly to pdf?
|
||||
|
||||
@ -21,9 +18,9 @@ though:
|
||||
The focus is on document images, maybe from digital cameras or
|
||||
scanners.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [pdfbox]() library
|
||||
* [pdfbox](https://pdfbox.apache.org/) library
|
||||
* [imagemagick](https://www.imagemagick.org/) external command
|
||||
* [img2pdf](https://github.com/josch/img2pdf) external command
|
||||
* [tesseract](https://github.com/tesseract-ocr/tesseract) external command
|
||||
@ -50,7 +47,7 @@ Size:
|
||||
- letter-en.png 191k
|
||||
- letter-en.tiff 4.0M
|
||||
|
||||
### pdfbox
|
||||
## pdfbox
|
||||
|
||||
Using a java library is preferred, if the quality is good enough.
|
||||
There is an
|
||||
@ -99,7 +96,7 @@ Size:
|
||||
- letter-en.png 142k
|
||||
- letter-en.tiff 142k
|
||||
|
||||
### img2pdf
|
||||
## img2pdf
|
||||
|
||||
This is a python tool that adds the image into the pdf without
|
||||
reencoding.
|
||||
@ -120,7 +117,7 @@ Size:
|
||||
- letter-en.png 191k
|
||||
- letter-en.tiff 192k
|
||||
|
||||
### ImageMagick
|
||||
## ImageMagick
|
||||
|
||||
The well known imagemagick tool can convert images to pdfs, too.
|
||||
|
||||
@ -141,7 +138,7 @@ Size:
|
||||
- letter-en.tiff 5.1M
|
||||
|
||||
|
||||
### Tesseract
|
||||
## Tesseract
|
||||
|
||||
Docspell already relies on tesseract for doing OCR. And in contrast to
|
||||
all other candidates, it can create PDFs that are searchable. Of
|
||||
@ -175,7 +172,7 @@ Size:
|
||||
- letter-en.tiff 183k
|
||||
|
||||
|
||||
## Decision
|
||||
# Decision
|
||||
|
||||
Tesseract.
|
||||
|
@ -1,12 +1,10 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Extract Text from Files
|
||||
permalink: dev/adr/0011_extract_text
|
||||
---
|
||||
+++
|
||||
title = "Extract Text from Files"
|
||||
weight = 120
|
||||
+++
|
||||
|
||||
# Extract Text from Files
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
With support for more file types there must be a way to extract text
|
||||
from all of them. It is better to extract text from the source files,
|
||||
@ -16,15 +14,15 @@ There are multiple options and multiple file types. Again, most
|
||||
priority is to use a java/scala library to reduce external
|
||||
dependencies.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
### MS Office Documents
|
||||
## MS Office Documents
|
||||
|
||||
There is only one library I know: [Apache
|
||||
POI](https://poi.apache.org/). It supports `doc(x)` and `xls(x)`.
|
||||
However, it doesn't support open-document format (odt and ods).
|
||||
|
||||
### OpenDocument Format
|
||||
## OpenDocument Format
|
||||
|
||||
There are two libraries:
|
||||
|
||||
@ -43,12 +41,12 @@ text. I created tests that extracted text from my odt/ods files. It
|
||||
worked at first sight, but running the tests in a loop resulted in
|
||||
strange nullpointer exceptions (it only worked the first run).
|
||||
|
||||
### Richtext
|
||||
## Richtext
|
||||
|
||||
Richtext is supported by the jdk (using `RichtextEditorKit` from
|
||||
swing).
|
||||
|
||||
### PDF
|
||||
## PDF
|
||||
|
||||
For "image" pdf files, tesseract is used. For "text" PDF files, the
|
||||
library [Apache PDFBox](https://pdfbox.apache.org) can be used.
|
||||
@ -56,20 +54,20 @@ library [Apache PDFBox](https://pdfbox.apache.org) can be used.
|
||||
There also is [iText](https://github.com/itext/itext7) with a AGPL
|
||||
license.
|
||||
|
||||
### Images
|
||||
## Images
|
||||
|
||||
For images and "image" PDF files, there is already tesseract in place.
|
||||
|
||||
### HTML
|
||||
## HTML
|
||||
|
||||
HTML must be converted into a PDF file before text can be extracted.
|
||||
|
||||
### Text/Markdown
|
||||
## Text/Markdown
|
||||
|
||||
These files can be used as-is, obviously.
|
||||
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
- MS Office files: POI library
|
||||
- Open Document files: Tika, but integrating the few source files that
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Periodic Tasks
|
||||
permalink: dev/adr/0012_periodic_tasks
|
||||
---
|
||||
+++
|
||||
title = "Periodic Tasks"
|
||||
weight = 130
|
||||
+++
|
||||
|
||||
# Periodic Tasks
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Currently there is a `Scheduler` that consumes tasks off a queue in
|
||||
the database. This allows multiple job executors running in parallel
|
||||
@ -27,7 +24,7 @@ than once. If a periodic tasks takes longer than the time between
|
||||
runs, it must wait for the next interval.
|
||||
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
1. Adding a `timer` and `nextrun` field to the current `job` table
|
||||
2. Creating a separate table for periodic tasks
|
@ -1,13 +1,10 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Archive Files
|
||||
permalink: dev/adr/0013_archive_files
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
+++
|
||||
title = "Archive Files"
|
||||
weight = 140
|
||||
+++
|
||||
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Docspell should have support for files that contain the actual files
|
||||
that matter, like zip files and other such things. It should extract
|
||||
@ -20,7 +17,7 @@ the file unmodified.
|
||||
On the other hand, files in there need to be text analysed and
|
||||
converted to pdf files.
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
There is currently a table `attachment_source` which holds references
|
||||
to "original" files. These are the files as uploaded by the user,
|
||||
@ -40,6 +37,6 @@ Archive may contain other archives. Then the inner archives will not
|
||||
be saved. The archive file is extracted recursively, until there is no
|
||||
known archive file found.
|
||||
|
||||
## Initial Support
|
||||
# Initial Support
|
||||
|
||||
Initial support is implemented for ZIP and EML (e-mail files) files.
|
@ -1,14 +1,11 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Fulltext Search Engine
|
||||
permalink: dev/adr/0014_fulltext_search_engine
|
||||
---
|
||||
|
||||
# {{ page.title }}
|
||||
+++
|
||||
title = "Fulltext Search Engine"
|
||||
weight = 150
|
||||
+++
|
||||
|
||||
It should be possible to search the contents of all documents.
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
To allow searching the documents contents efficiently, a separate
|
||||
index is necessary. The "defacto standard" for fulltext search on the
|
||||
@ -21,14 +18,14 @@ feature, it shouldn't have a huge impact on the application, i.e. if
|
||||
the fulltext search component is down or broken, docspell should still
|
||||
work (just the fulltext search is then not working).
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [Apache SOLR](https://lucene.apache.org/solr)
|
||||
* [ElasticSearch](https://www.elastic.co/elasticsearch/)
|
||||
* [PostgreSQL](https://www.postgresql.org/docs/12/textsearch.html)
|
||||
* All of them or a subset
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
If docspell is running on PostgreSQL, it would be nice to also use it
|
||||
for fulltext search to save the cost of running another component. But
|
@ -1,12 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Convert PDF Files
|
||||
permalink: dev/adr/0015_convert_pdf_files
|
||||
---
|
||||
+++
|
||||
title = "Convert PDF Files"
|
||||
weight = 160
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
|
||||
## Context and Problem Statement
|
||||
# Context and Problem Statement
|
||||
|
||||
Some PDFs contain only images (when coming from a scanner) and
|
||||
therefore one is not able to click into the pdf and select text for
|
||||
@ -18,20 +15,20 @@ For images, this works already as tesseract is used to create the PDF
|
||||
files. Tesseract creates the files with an additional text layer
|
||||
containing the OCRed text.
|
||||
|
||||
## Considered Options
|
||||
# Considered Options
|
||||
|
||||
* [ocrmypdf](https://github.com/jbarlow83/OCRmyPDF) OCRmyPDF adds an
|
||||
OCR text layer to scanned PDF files, allowing them to be searched
|
||||
|
||||
|
||||
### ocrmypdf
|
||||
## ocrmypdf
|
||||
|
||||
This is a very nice python tool, that uses tesseract to do OCR on each
|
||||
page and add the extracted text as a pdf text layer to the page.
|
||||
Additionally it creates PDF/A type pdfs, which are great for
|
||||
archiving. This fixes exactly the things stated above.
|
||||
|
||||
#### Integration
|
||||
### Integration
|
||||
|
||||
Docspell already has this built in for images. When converting images
|
||||
to a PDF (which is done early in processing), the process creates a
|
||||
@ -61,7 +58,7 @@ converted file containing the OCR-ed text as a pdf layer. If ocrmypdf
|
||||
is disabled, the converted file and the source file are the same for
|
||||
PDFs.
|
||||
|
||||
## Decision Outcome
|
||||
# Decision Outcome
|
||||
|
||||
Add ocrmypdf as an optional conversion from PDF to PDF. Ocrmypdf is
|
||||
distributed under the GPL-3 license.
|
14
website/site/content/docs/dev/adr/_index.md
Normal file
@ -0,0 +1,14 @@
|
||||
+++
|
||||
title = "ADRs"
|
||||
description = "Contains some ADRs, which are internal notes on decisions made."
|
||||
weight = 300
|
||||
sort_by = "weight"
|
||||
insert_anchor_links = "right"
|
||||
template = "pages.html"
|
||||
[extra]
|
||||
mktoc = true
|
||||
+++
|
||||
|
||||
This contains a list of ADRs, most of them are from very early. It
|
||||
often just contains notes that could go nowhere else, but still should
|
||||
be captured.
|
Before Width: | Height: | Size: 385 KiB After Width: | Height: | Size: 385 KiB |
Before Width: | Height: | Size: 443 KiB After Width: | Height: | Size: 443 KiB |
Before Width: | Height: | Size: 291 KiB After Width: | Height: | Size: 291 KiB |
Before Width: | Height: | Size: 353 KiB After Width: | Height: | Size: 353 KiB |
Before Width: | Height: | Size: 292 KiB After Width: | Height: | Size: 292 KiB |
Before Width: | Height: | Size: 145 KiB After Width: | Height: | Size: 145 KiB |
Before Width: | Height: | Size: 167 KiB After Width: | Height: | Size: 167 KiB |
Before Width: | Height: | Size: 135 KiB After Width: | Height: | Size: 135 KiB |
Before Width: | Height: | Size: 148 KiB After Width: | Height: | Size: 148 KiB |
Before Width: | Height: | Size: 142 KiB After Width: | Height: | Size: 142 KiB |
Before Width: | Height: | Size: 586 KiB After Width: | Height: | Size: 586 KiB |
Before Width: | Height: | Size: 479 KiB After Width: | Height: | Size: 479 KiB |
Before Width: | Height: | Size: 280 KiB After Width: | Height: | Size: 280 KiB |
Before Width: | Height: | Size: 270 KiB After Width: | Height: | Size: 270 KiB |
Before Width: | Height: | Size: 363 KiB After Width: | Height: | Size: 363 KiB |
Before Width: | Height: | Size: 418 KiB After Width: | Height: | Size: 418 KiB |
Before Width: | Height: | Size: 500 KiB After Width: | Height: | Size: 500 KiB |
Before Width: | Height: | Size: 349 KiB After Width: | Height: | Size: 349 KiB |
Before Width: | Height: | Size: 350 KiB After Width: | Height: | Size: 350 KiB |
Before Width: | Height: | Size: 296 KiB After Width: | Height: | Size: 296 KiB |
Before Width: | Height: | Size: 176 KiB After Width: | Height: | Size: 176 KiB |
Before Width: | Height: | Size: 174 KiB After Width: | Height: | Size: 174 KiB |
Before Width: | Height: | Size: 155 KiB After Width: | Height: | Size: 155 KiB |
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 49 KiB |
@ -1,7 +1,7 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Short Title
|
||||
---
|
||||
+++
|
||||
title = "Short Title"
|
||||
draft = true
|
||||
+++
|
||||
|
||||
# [short title of solved problem and solution]
|
||||
|
22
website/site/content/docs/dev/building.md
Normal file
@ -0,0 +1,22 @@
|
||||
+++
|
||||
title = "Building Docspell"
|
||||
weight = 0
|
||||
+++
|
||||
|
||||
|
||||
You must install [sbt](https://scala-sbt.org) and [Elm](https://elm-lang.org).
|
||||
|
||||
Clone the sources and run:
|
||||
|
||||
- `make` to compile all sources (Elm + Scala)
|
||||
- `make-zip` to create zip packages
|
||||
- `make-deb` to create debian packages
|
||||
- `make-tools` to create a zip containing the script in `tools/`
|
||||
- `make-pkg` for a clean compile + building all packages (zip + deb)
|
||||
|
||||
The `zip` and `deb` files can be found afterwards in:
|
||||
|
||||
```
|
||||
modules/restserver/target/universal
|
||||
modules/joex/target/universal
|
||||
```
|
@ -1,33 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Development
|
||||
permalink: dev
|
||||
---
|
||||
+++
|
||||
title = "Tips & Setup"
|
||||
weight = 20
|
||||
+++
|
||||
|
||||
|
||||
# {{page.title}}
|
||||
|
||||
|
||||
## Building
|
||||
|
||||
[Sbt](https://scala-sbt.org) is used to build the application. Clone
|
||||
the sources and run:
|
||||
|
||||
- `make` to compile all sources (Elm + Scala)
|
||||
- `make-zip` to create zip packages
|
||||
- `make-deb` to create debian packages
|
||||
- `make-tools` to create a zip containing the script in `tools/`
|
||||
- `make-pkg` for a clean compile + building all packages (zip + deb)
|
||||
|
||||
The zip files can be found afterwards in:
|
||||
|
||||
```
|
||||
modules/restserver/target/universal
|
||||
modules/joex/target/universal
|
||||
```
|
||||
|
||||
|
||||
## Starting Servers with `reStart`
|
||||
# Starting Servers with `reStart`
|
||||
|
||||
When developing, it's very convenient to use the [revolver sbt
|
||||
plugin](https://github.com/spray/sbt-revolver). Start the sbt console
|
||||
@ -53,14 +29,14 @@ sbt:docspell-root> reStart
|
||||
```
|
||||
|
||||
|
||||
## Custom config file
|
||||
# Custom config file
|
||||
|
||||
The sbt build is setup such that a file `dev.conf` in the directory
|
||||
`local` (at root of the source tree) is picked up as config file, if
|
||||
it exists. So you can create a custom config file for development. For
|
||||
example, a custom database for development may be setup this way:
|
||||
|
||||
```
|
||||
``` conf
|
||||
#jdbcurl = "jdbc:h2:///home/dev/workspace/projects/docspell/local/docspell-demo.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
||||
jdbcurl = "jdbc:postgresql://localhost:5432/docspelldev"
|
||||
#jdbcurl = "jdbc:mariadb://localhost:3306/docspelldev"
|
||||
@ -87,17 +63,17 @@ docspell.joex {
|
||||
}
|
||||
```
|
||||
|
||||
## Nix Expressions
|
||||
# Nix Expressions
|
||||
|
||||
The directory `/nix` contains nix expressions to install docspell via
|
||||
the nix package manager and to integrate it into NixOS.
|
||||
|
||||
### Testing NixOS Modules
|
||||
## Testing NixOS Modules
|
||||
|
||||
The modules can be build by building the `configuration-test.nix` file
|
||||
together with some nixpkgs version. For example:
|
||||
|
||||
``` shell
|
||||
``` bash
|
||||
nixos-rebuild build-vm -I nixos-config=./configuration-test.nix \
|
||||
-I nixpkgs=https://github.com/NixOS/nixpkgs-channels/archive/nixos-19.09.tar.gz
|
||||
```
|
||||
@ -108,21 +84,27 @@ the system configuration can be found behind the `./result/system`
|
||||
symlink. So it is possible to look at the generated systemd config for
|
||||
example:
|
||||
|
||||
``` shell
|
||||
``` bash
|
||||
cat result/system/etc/systemd/system/docspell-joex.service
|
||||
```
|
||||
|
||||
And with some more commands (there probably is an easier way…) the
|
||||
config file can be checked:
|
||||
|
||||
``` shell
|
||||
``` bash
|
||||
cat result/system/etc/systemd/system/docspell-joex.service | grep ExecStart | cut -d'=' -f2 | xargs cat | tail -n1 | awk '{print $NF}'| sed 's/.$//' | xargs cat | jq
|
||||
```
|
||||
|
||||
To see the module in action, the vm can be started (the first line
|
||||
sets more memory for the vm):
|
||||
|
||||
``` shell
|
||||
``` bash
|
||||
export QEMU_OPTS="-m 2048"
|
||||
./result/bin/run-docspelltest-vm
|
||||
```
|
||||
|
||||
|
||||
# Background Info
|
||||
|
||||
There is a list of [ADRs](@/docs/dev/adr/_index.md) containing
|
||||
internal/background info for various topics.
|
@ -1,8 +1,9 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Features and Limitations
|
||||
permalink: features
|
||||
---
|
||||
+++
|
||||
title = "Features and Limitations"
|
||||
weight = 10
|
||||
insert_anchor_links = "right"
|
||||
description = "A list of features and limitations."
|
||||
+++
|
||||
|
||||
# Features
|
||||
|
||||
@ -11,27 +12,28 @@ permalink: features
|
||||
account)
|
||||
- Handle multiple documents as one unit
|
||||
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
|
||||
- [Full-Text Search](doc/finding#full-text-search) based on [Apache
|
||||
SOLR](https://lucene.apache.org/solr)
|
||||
- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
|
||||
on [Apache SOLR](https://lucene.apache.org/solr)
|
||||
- Conversion to PDF: all files are converted into a PDF file. PDFs
|
||||
with only images (as often returned from scanners) are converted
|
||||
into searchable PDF/A pdfs.
|
||||
- Non-destructive: all your uploaded files are never modified and can
|
||||
always be downloaded untouched
|
||||
- Text is analysed to find and attach meta data automatically
|
||||
- [Manage document processing](doc/processing): cancel jobs, set
|
||||
priorities
|
||||
- [Manage document processing](@/docs/webapp/processing.md): cancel
|
||||
jobs, set priorities
|
||||
- Everything available via a [documented](https://www.openapis.org/)
|
||||
[REST Api](api); allows to [generate
|
||||
[REST Api](@/docs/api/_index.md); allows to [generate
|
||||
clients](https://openapi-generator.tech/docs/generators) for
|
||||
(almost) any language
|
||||
- mobile-friendly Web-UI
|
||||
- [Create “share-urls”](doc/uploading#anonymous-upload) to upload files
|
||||
anonymously
|
||||
- [Send documents via e-mail](doc/mailitem)
|
||||
- [E-Mail notification](doc/notifydueitems) for documents with due dates
|
||||
- [Read your mailboxes](doc/scanmailbox) via IMAP to import mails into
|
||||
docspell
|
||||
- [Create “share-urls”](@/docs/webapp/uploading.md#anonymous-upload)
|
||||
to upload files anonymously
|
||||
- [Send documents via e-mail](@/docs/webapp/mailitem.md)
|
||||
- [E-Mail notification](@/docs/webapp/notifydueitems.md) for documents
|
||||
with due dates
|
||||
- [Read your mailboxes](@/docs/webapp/scanmailbox.md) via IMAP to
|
||||
import mails into docspell
|
||||
- REST server and document processing are separate applications which
|
||||
can be scaled-out independently
|
||||
- Everything stored in a SQL database: PostgreSQL, MariaDB or H2
|
||||
@ -50,14 +52,14 @@ permalink: features
|
||||
- zip
|
||||
- [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions)
|
||||
(e-mail files in plain text MIME)
|
||||
- [Tooling](doc/tools):
|
||||
- [Watch a folder](doc/tools/consumedir): watch folders for changes
|
||||
and send files to docspell
|
||||
- [Simple CLI for uploading files](doc/tools/ds)
|
||||
- [Firefox plugin](doc/tools/browserext): right click on a link and
|
||||
send the file to docspell
|
||||
- [SMTP Gateway](doc/tools/smtpgateway): Setup a SMTP server that
|
||||
delivers mails directly to docspell.
|
||||
- Tooling:
|
||||
- [Watch a folder](@/docs/tools/consumedir.md): watch folders for
|
||||
changes and send files to docspell
|
||||
- [Simple CLI for uploading files](@/docs/tools/ds.md)
|
||||
- [Firefox plugin](@/docs/tools/browserext.md): right click on a
|
||||
link and send the file to docspell
|
||||
- [SMTP Gateway](@/docs/tools/smtpgateway.md): Setup a SMTP server
|
||||
that delivers mails directly to docspell.
|
||||
- License: GPLv3
|
||||
|
||||
|
9
website/site/content/docs/install/_index.md
Normal file
@ -0,0 +1,9 @@
|
||||
+++
|
||||
title = "Installation and Deployment"
|
||||
description = "There are multiple ways to install Docspell. This section contains detailed instructions."
|
||||
weight = 30
|
||||
sort_by = "weight"
|
||||
insert_anchor_links = "right"
|
||||
template = "pages.html"
|
||||
redirect_to = "/docs/install/quickstart"
|
||||
+++
|
@ -1,10 +1,96 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Nix/NixOS
|
||||
permalink: doc/nix
|
||||
---
|
||||
+++
|
||||
title = "Installing"
|
||||
weight = 20
|
||||
+++
|
||||
|
||||
# {{ page.title }}
|
||||
# Docker
|
||||
|
||||
There is a [docker-compose](https://docs.docker.com/compose/) setup
|
||||
available in the `/docker` folder. This setup is also taking care of
|
||||
all the necessary [prerequisites](@/docs/install/prereq.md) and
|
||||
creates a container to watch a directory for incoming files. It's only
|
||||
3 steps:
|
||||
|
||||
1. Clone the github repository
|
||||
```bash
|
||||
$ git clone https://github.com/eikek/docspell
|
||||
```
|
||||
2. Change into the `docker` directory:
|
||||
```bash
|
||||
$ cd docspell/docker
|
||||
```
|
||||
3. Run `docker-compose up`:
|
||||
|
||||
```bash
|
||||
$ export DOCSPELL_HEADER_VALUE="my-secret-123"
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
The environment variable defines a secret that is shared between
|
||||
some containers. You can define whatever you like. Please see the
|
||||
[consumedir.sh](@/docs/tools/consumedir.md#docker) docs for
|
||||
additional info.
|
||||
4. Goto `http://localhost:7880`, signup and login. When signing up,
|
||||
you can choose the same name for collective and user. Then login
|
||||
with this name and the password.
|
||||
|
||||
5. (Optional) Create a folder `./docs/<collective-name>` (the name you
|
||||
chose for the collective at registration) and place files in there
|
||||
for importing them.
|
||||
|
||||
The directory contains a file `docspell.conf` that you can
|
||||
[modify](@/docs/configure/_index.md) as needed.
|
||||
|
||||
# Download, Unpack, Run
|
||||
|
||||
You can install via zip or deb archives. Please see the
|
||||
[prerequisites](@/docs/install/prereq.md) first.
|
||||
|
||||
## Using zip files
|
||||
|
||||
You need to download the two files:
|
||||
|
||||
- [docspell-restserver-{{version()}}.zip](https://github.com/eikek/docspell/releases/download/v{{version()}}/docspell-restserver-{{version()}}.zip)
|
||||
- [docspell-joex-{{version()}}.zip](https://github.com/eikek/docspell/releases/download/v{{version()}}/docspell-joex-{{version()}}.zip)
|
||||
|
||||
|
||||
1. Unzip both files:
|
||||
``` bash
|
||||
$ unzip docspell-*.zip
|
||||
```
|
||||
2. Open two terminal windows and navigate to the the directory
|
||||
containing the zip files.
|
||||
3. Start both components executing:
|
||||
``` bash
|
||||
$ ./docspell-restserver*/bin/docspell-restserver
|
||||
```
|
||||
in one terminal and
|
||||
``` bash
|
||||
$ ./docspell-joex*/bin/docspell-joex
|
||||
```
|
||||
in the other.
|
||||
4. Point your browser to: <http://localhost:7880/app>
|
||||
5. Register a new account, sign in and try it.
|
||||
|
||||
Note, that this setup doesn't include watching a directory. You can
|
||||
use the [consumedir.sh](@/docs/tools/consumedir.md) tool for this or
|
||||
use the docker variant below.
|
||||
|
||||
## Using deb files
|
||||
|
||||
The DEB packages can be installed on Debian, or Debian based Distros:
|
||||
|
||||
``` bash
|
||||
$ sudo dpkg -i docspell*.deb
|
||||
```
|
||||
|
||||
Then the start scripts are in your `$PATH`. Run `docspell-restserver`
|
||||
or `docspell-joex` from a terminal window.
|
||||
|
||||
The packages come with a systemd unit file that will be installed to
|
||||
autostart the services.
|
||||
|
||||
# Nix
|
||||
|
||||
## Install via Nix
|
||||
|
||||
@ -56,15 +142,16 @@ $ nix-env -iA nixpkgs.docspell.server nixpkgs.docspell.joex nixpkgs.docspell.too
|
||||
|
||||
You may need to replace `nixpkgs` with `nixos` when you're on NixOS.
|
||||
|
||||
The expression `docspell.currentPkg` refers to the most current release
|
||||
of Docspell. So even if you use the tarball of the current master
|
||||
branch, the `release.nix` file only contains derivations for releases.
|
||||
The expression `docspell.currentPkg` is a shortcut for selecting the
|
||||
most current release. For example it translates to `docspell.pkg
|
||||
docspell.cfg.v@PVERSION@` – if the current version is `@VERSION@`.
|
||||
The expression `docspell.currentPkg` refers to the most current
|
||||
release of Docspell. So even if you use the tarball of the current
|
||||
master branch, the `release.nix` file only contains derivations for
|
||||
releases. The expression `docspell.currentPkg` is a shortcut for
|
||||
selecting the most current release. For example it translates to
|
||||
`docspell.pkg docspell.cfg.v{{ pversion() }}` – if the current version
|
||||
is `{{version()}}`.
|
||||
|
||||
|
||||
## Docspell as a service on NixOS
|
||||
## Docspell on NixOS {#nixos}
|
||||
|
||||
If you are running [NixOS](https://nixos.org), there is a module
|
||||
definition for installing Docspell as a service using systemd.
|
||||
@ -132,7 +219,7 @@ The modules files are only applicable to the newest version of
|
||||
Docspell. If you really need an older version, checkout the
|
||||
appropriate commit.
|
||||
|
||||
## NixOs Example
|
||||
### NixOS Example
|
||||
|
||||
This is a example system configuration that installs docspell with a
|
||||
postgres database. This snippet can be used to create a vm (using
|