mirror of
https://github.com/TheAnachronism/docspell.git
synced 2024-11-13 02:31:10 +00:00
44 lines
739 B
Plaintext
44 lines
739 B
Plaintext
@startuml
|
|
scale 1200 width
|
|
title: Processing Files
|
|
skinparam monochrome true
|
|
skinparam backgroundColor white
|
|
skinparam rectangle {
|
|
roundCorner<<Input>> 25
|
|
roundCorner<<Output>> 5
|
|
}
|
|
rectangle Input <<Input>> {
|
|
file "html"
|
|
file "plaintext"
|
|
file "image"
|
|
file "msoffice"
|
|
file "rtf"
|
|
file "odf"
|
|
file "pdf"
|
|
}
|
|
|
|
node toBoth [
|
|
PDF + TXT
|
|
]
|
|
node toPdf [
|
|
PDF
|
|
]
|
|
node toTxt [
|
|
TXT
|
|
]
|
|
|
|
image --> toBoth:<tesseract>
|
|
html --> toPdf:<wkhtmltopdf>
|
|
toPdf --> toTxt:[pdfbox]
|
|
plaintext --> html:[flexmark]
|
|
msoffice --> toPdf:<unoconv>
|
|
msoffice --> toTxt:[poi]
|
|
rtf --> toTxt:[jdk]
|
|
rtf --> toPdf:<unoconv>
|
|
odf --> toTxt:[tika]
|
|
odf --> toPdf:<unoconv>
|
|
pdf --> toTxt:<tesseract>
|
|
pdf --> toTxt:[pdfbox]
|
|
plaintext -> toTxt:[identity]
|
|
@enduml
|