From 5fe532001b196cb5a38fa19c154ae9da63b74620 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 23 Nov 2020 14:31:58 +0100 Subject: [PATCH 1/4] Allow to specify document lanugage with the request --- .../main/scala/docspell/backend/ops/OUpload.scala | 15 ++++++++++++--- .../scala/docspell/common/ScanMailboxArgs.scala | 4 +++- .../joex/scanmailbox/ScanMailboxTask.scala | 3 ++- .../src/main/resources/docspell-openapi.yml | 12 ++++++++++++ .../docspell/restserver/conv/Conversions.scala | 8 ++++++-- .../restserver/routes/ScanMailboxRoutes.scala | 6 ++++-- 6 files changed, 39 insertions(+), 9 deletions(-) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 8a0fc672..9183868e 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -67,7 +67,8 @@ object OUpload { validFileTypes: Seq[MimeType], skipDuplicates: Boolean, fileFilter: Glob, - tags: List[String] + tags: List[String], + language: Option[Language] ) case class UploadData[F[_]]( @@ -125,11 +126,19 @@ object OUpload { _ <- checkExistingItem(itemId, account.collective) files <- right(data.files.traverse(saveFile).map(_.flatten)) _ <- checkFileList(files) - lang <- right(store.transact(RCollective.findLanguage(account.collective))) + lang <- data.meta.language match { + case Some(lang) => right(lang.pure[F]) + case None => + right( + store + .transact(RCollective.findLanguage(account.collective)) + .map(_.getOrElse(Language.German)) + ) + } meta = ProcessItemArgs.ProcessMeta( account.collective, itemId, - lang.getOrElse(Language.German), + lang, data.meta.direction, data.meta.sourceAbbrev, data.meta.folderId, diff --git a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala index 781a3589..df9fa87d 100644 --- a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala @@ -35,7 +35,9 @@ case class ScanMailboxArgs( // set a list of tags to apply to new item tags: Option[List[String]], // a glob filter for the mail subject - subjectFilter: Option[Glob] + subjectFilter: Option[Glob], + // the language for extraction and analysis + language: Option[Language] ) object ScanMailboxArgs { diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 3478dd17..4390da3a 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -277,7 +277,8 @@ object ScanMailboxTask { Seq.empty, true, args.fileFilter.getOrElse(Glob.all), - args.tags.getOrElse(Nil) + args.tags.getOrElse(Nil), + args.language ) data = OUpload.UploadData( multiple = false, diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index bff39091..4d0cbb04 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -3826,6 +3826,12 @@ components: A glob to filter attachments to import by subject. type: string format: glob + language: + description: | + The language used for text extraction and analysis when + processing mails. + type: string + format: language ImapSettingsList: description: | @@ -4601,6 +4607,9 @@ components: The `tags` input allows to provide tags that should be applied to the item being created. This only works if the tags already exist. It is possible to specify their ids or names. + + The `language` of the document may be specified, otherwise the + one from settings is used. required: - multiple properties: @@ -4621,6 +4630,9 @@ components: fileFilter: type: string format: glob + language: + type: string + format: language Collective: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index 35a88eaa..48728535 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -326,13 +326,17 @@ trait Conversions { validFileTypes, m.skipDuplicates.getOrElse(false), m.fileFilter.getOrElse(Glob.all), - m.tags.map(_.items).getOrElse(Nil) + m.tags.map(_.items).getOrElse(Nil), + m.language ) ) ) ) .getOrElse( - (true, UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil)) + ( + true, + UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil, None) + ) .pure[F] ) diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala index 36ffbd1d..596fd3f6 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala @@ -116,7 +116,8 @@ object ScanMailboxRoutes { settings.itemFolder, settings.fileFilter, settings.tags.map(_.items), - settings.subjectFilter + settings.subjectFilter, + settings.language ) ) ) @@ -147,6 +148,7 @@ object ScanMailboxRoutes { task.args.itemFolder, task.args.tags.map(StringList.apply), task.args.fileFilter, - task.args.subjectFilter + task.args.subjectFilter, + task.args.language ) } From 7fee52f6f2fd7815c1470069b14ed25e435460cc Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 23 Nov 2020 21:00:19 +0100 Subject: [PATCH 2/4] Add language to scan-mailbox-form --- .../src/main/elm/Comp/ScanMailboxForm.elm | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm b/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm index 2a72e531..eb8bd811 100644 --- a/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm +++ b/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm @@ -20,12 +20,14 @@ import Api.Model.Tag exposing (Tag) import Api.Model.TagList exposing (TagList) import Comp.CalEventInput import Comp.Dropdown exposing (isDropdownChangeMsg) +import Comp.FixedDropdown import Comp.IntField import Comp.StringListInput import Comp.YesNoDimmer import Data.CalEvent exposing (CalEvent) import Data.Direction exposing (Direction(..)) import Data.Flags exposing (Flags) +import Data.Language exposing (Language) import Data.UiSettings exposing (UiSettings) import Data.Validated exposing (Validated(..)) import Html exposing (..) @@ -64,6 +66,8 @@ type alias Model = , existingTags : List String , fileFilter : Maybe String , subjectFilter : Maybe String + , languageModel : Comp.FixedDropdown.Model Language + , language : Maybe Language } @@ -96,6 +100,8 @@ type Msg | TagDropdownMsg (Comp.Dropdown.Msg Tag) | SetFileFilter String | SetSubjectFilter String + | LanguageMsg (Comp.FixedDropdown.Msg Language) + | RemoveLanguage initWith : Flags -> ScanMailboxSettings -> ( Model, Cmd Msg ) @@ -138,6 +144,9 @@ initWith flags s = |> Maybe.withDefault [] , fileFilter = s.fileFilter , subjectFilter = s.subjectFilter + , languageModel = + Comp.FixedDropdown.init (List.map mkLanguageItem Data.Language.all) + , language = Maybe.andThen Data.Language.fromString s.language } , Cmd.batch [ Api.getImapSettings flags "" ConnResp @@ -188,6 +197,9 @@ init flags = , existingTags = [] , fileFilter = Nothing , subjectFilter = Nothing + , languageModel = + Comp.FixedDropdown.init (List.map mkLanguageItem Data.Language.all) + , language = Nothing } , Cmd.batch [ Api.getImapSettings flags "" ConnResp @@ -197,6 +209,11 @@ init flags = ) +mkLanguageItem : Language -> Comp.FixedDropdown.Item Language +mkLanguageItem lang = + Comp.FixedDropdown.Item lang (Data.Language.toName lang) + + --- Update @@ -242,6 +259,7 @@ makeSettings model = List.map .id els |> StringList |> Just + , language = Maybe.map Data.Language.toIso3 model.language } in Data.Validated.map3 make @@ -597,6 +615,25 @@ update flags msg model = , Cmd.none ) + LanguageMsg lm -> + let + ( dm, sel ) = + Comp.FixedDropdown.update lm model.languageModel + in + ( { model + | languageModel = dm + , language = Util.Maybe.or [ sel, model.language ] + } + , NoAction + , Cmd.none + ) + + RemoveLanguage -> + ( { model | language = Nothing } + , NoAction + , Cmd.none + ) + --- View @@ -835,6 +872,29 @@ disappear then. [ text "Choose tags that should be applied to items." ] ] + , div [ class "field" ] + [ label [] + [ text "Language" + ] + , div [ class "ui action input" ] + [ Html.map LanguageMsg + (Comp.FixedDropdown.viewStyled "fluid" + (Maybe.map mkLanguageItem model.language) + model.languageModel + ) + , a + [ class "ui icon button" + , href "#" + , onClick RemoveLanguage + ] + [ i [ class "delete icon" ] [] + ] + ] + , div [ class "small-info" ] + [ text "Used for text extraction and text analysis. The " + , text "collective's default language is used, if not specified here." + ] + ] , div [ class "ui dividing header" ] [ text "Schedule" ] From 2384af1347c0e2a95f63a33c66186dc789b7c771 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 23 Nov 2020 21:12:33 +0100 Subject: [PATCH 3/4] Fix centering dropzone title --- modules/webapp/src/main/elm/Comp/Dropzone.elm | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/modules/webapp/src/main/elm/Comp/Dropzone.elm b/modules/webapp/src/main/elm/Comp/Dropzone.elm index 802d1353..b34381ec 100644 --- a/modules/webapp/src/main/elm/Comp/Dropzone.elm +++ b/modules/webapp/src/main/elm/Comp/Dropzone.elm @@ -17,7 +17,6 @@ import File.Select import Html exposing (..) import Html.Attributes exposing (..) import Html.Events exposing (..) -import Json.Decode as D import Util.Html exposing (onDragEnter, onDragLeave, onDragOver, onDropFiles) @@ -119,9 +118,7 @@ view model = ] [ div [ class "ui icon header" ] [ i [ class "mouse pointer icon" ] [] - , div [ class "content" ] - [ text "Drop files here" - ] + , text "Drop files here" ] , div [ class "ui horizontal divider" ] [ text "Or" From c1c92f09328c6413c407858ea0263404a26520e1 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 23 Nov 2020 21:12:45 +0100 Subject: [PATCH 4/4] Add language to upload page --- .../webapp/src/main/elm/Page/Upload/Data.elm | 15 +++++++++++++++ .../webapp/src/main/elm/Page/Upload/Update.elm | 18 +++++++++++++++++- .../webapp/src/main/elm/Page/Upload/View.elm | 13 +++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/modules/webapp/src/main/elm/Page/Upload/Data.elm b/modules/webapp/src/main/elm/Page/Upload/Data.elm index 896591ef..b7d0c23d 100644 --- a/modules/webapp/src/main/elm/Page/Upload/Data.elm +++ b/modules/webapp/src/main/elm/Page/Upload/Data.elm @@ -9,11 +9,14 @@ module Page.Upload.Data exposing , isIdle , isLoading , isSuccessAll + , mkLanguageItem , uploadAllTracker ) import Api.Model.BasicResult exposing (BasicResult) import Comp.Dropzone +import Comp.FixedDropdown +import Data.Language exposing (Language) import Dict exposing (Dict) import File exposing (File) import Http @@ -30,6 +33,8 @@ type alias Model = , loading : Dict String Int , dropzone : Comp.Dropzone.Model , skipDuplicates : Bool + , languageModel : Comp.FixedDropdown.Model Language + , language : Maybe Language } @@ -49,6 +54,11 @@ dropzoneSettings = } +mkLanguageItem : Language -> Comp.FixedDropdown.Item Language +mkLanguageItem lang = + Comp.FixedDropdown.Item lang (Data.Language.toName lang) + + emptyModel : Model emptyModel = { incoming = True @@ -59,6 +69,10 @@ emptyModel = , loading = Dict.empty , dropzone = Comp.Dropzone.init dropzoneSettings , skipDuplicates = True + , languageModel = + Comp.FixedDropdown.init + (List.map mkLanguageItem Data.Language.all) + , language = Nothing } @@ -71,6 +85,7 @@ type Msg | Clear | DropzoneMsg Comp.Dropzone.Msg | ToggleSkipDuplicates + | LanguageMsg (Comp.FixedDropdown.Msg Language) isLoading : Model -> File -> Bool diff --git a/modules/webapp/src/main/elm/Page/Upload/Update.elm b/modules/webapp/src/main/elm/Page/Upload/Update.elm index 3929435d..beac5223 100644 --- a/modules/webapp/src/main/elm/Page/Upload/Update.elm +++ b/modules/webapp/src/main/elm/Page/Upload/Update.elm @@ -3,13 +3,15 @@ module Page.Upload.Update exposing (update) import Api import Api.Model.ItemUploadMeta import Comp.Dropzone +import Comp.FixedDropdown import Data.Flags exposing (Flags) +import Data.Language import Dict import Http import Page.Upload.Data exposing (..) -import Ports import Set exposing (Set) import Util.File exposing (makeFileId) +import Util.Maybe update : Maybe String -> Flags -> Msg -> Model -> ( Model, Cmd Msg, Sub Msg ) @@ -39,6 +41,7 @@ update sourceId flags msg model = else Just "outgoing" + , language = Maybe.map Data.Language.toIso3 model.language } fileids = @@ -151,6 +154,19 @@ update sourceId flags msg model = in ( { model | files = nextFiles, dropzone = m2 }, Cmd.map DropzoneMsg c2, Sub.none ) + LanguageMsg lm -> + let + ( dm, sel ) = + Comp.FixedDropdown.update lm model.languageModel + in + ( { model + | languageModel = dm + , language = Util.Maybe.or [ sel, model.language ] + } + , Cmd.none + , Sub.none + ) + setCompleted : Model -> String -> Set String setCompleted model fileid = diff --git a/modules/webapp/src/main/elm/Page/Upload/View.elm b/modules/webapp/src/main/elm/Page/Upload/View.elm index 2712b619..b5b96ae3 100644 --- a/modules/webapp/src/main/elm/Page/Upload/View.elm +++ b/modules/webapp/src/main/elm/Page/Upload/View.elm @@ -1,6 +1,7 @@ module Page.Upload.View exposing (view) import Comp.Dropzone +import Comp.FixedDropdown import Comp.Progress import Dict import File exposing (File) @@ -219,5 +220,17 @@ renderForm model = , label [] [ text "Skip files already present in docspell" ] ] ] + , div [ class "inline field" ] + [ label [] [ text "Language:" ] + , Html.map LanguageMsg + (Comp.FixedDropdown.view + (Maybe.map mkLanguageItem model.language) + model.languageModel + ) + , div [ class "small-info" ] + [ text "Used for text extraction and analysis. The collective's " + , text "default language is used if not specified here." + ] + ] ] ]