docspell/website/site/content/docs/dev/adr/process-files.puml

44 lines
739 B
Plaintext
Raw Normal View History

2020-07-27 20:13:22 +00:00
@startuml
scale 1200 width
title: Processing Files
skinparam monochrome true
skinparam backgroundColor white
skinparam rectangle {
roundCorner<<Input>> 25
roundCorner<<Output>> 5
}
rectangle Input <<Input>> {
file "html"
file "plaintext"
file "image"
file "msoffice"
file "rtf"
file "odf"
file "pdf"
}
node toBoth [
PDF + TXT
]
node toPdf [
PDF
]
node toTxt [
TXT
]
image --> toBoth:<tesseract>
html --> toPdf:<wkhtmltopdf>
toPdf --> toTxt:[pdfbox]
plaintext --> html:[flexmark]
msoffice --> toPdf:<unoconv>
msoffice --> toTxt:[poi]
rtf --> toTxt:[jdk]
rtf --> toPdf:<unoconv>
odf --> toTxt:[tika]
odf --> toPdf:<unoconv>
pdf --> toTxt:<tesseract>
pdf --> toTxt:[pdfbox]
plaintext -> toTxt:[identity]
@enduml