mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 10:59:33 +00:00
Merge branch 'eikek:master' into master
This commit is contained in:
commit
35f53b7107
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@ -20,7 +20,7 @@ jobs:
|
|||||||
working-directory: modules/webapp
|
working-directory: modules/webapp
|
||||||
- name: Fetch tags
|
- name: Fetch tags
|
||||||
run: git fetch --depth=100 origin +refs/tags/*:refs/tags/*
|
run: git fetch --depth=100 origin +refs/tags/*:refs/tags/*
|
||||||
- uses: olafurpg/setup-scala@v12
|
- uses: olafurpg/setup-scala@v13
|
||||||
with:
|
with:
|
||||||
java-version: ${{ matrix.java }}
|
java-version: ${{ matrix.java }}
|
||||||
- name: Coursier cache
|
- name: Coursier cache
|
||||||
|
2
.github/workflows/release-nightly.yml
vendored
2
.github/workflows/release-nightly.yml
vendored
@ -14,7 +14,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v2.3.4
|
- uses: actions/checkout@v2.3.4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- uses: olafurpg/setup-scala@v12
|
- uses: olafurpg/setup-scala@v13
|
||||||
with:
|
with:
|
||||||
java-version: ${{ matrix.java }}
|
java-version: ${{ matrix.java }}
|
||||||
- uses: jorelali/setup-elm@v3
|
- uses: jorelali/setup-elm@v3
|
||||||
|
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@ -14,7 +14,7 @@ jobs:
|
|||||||
- uses: actions/checkout@v2.3.4
|
- uses: actions/checkout@v2.3.4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- uses: olafurpg/setup-scala@v12
|
- uses: olafurpg/setup-scala@v13
|
||||||
with:
|
with:
|
||||||
java-version: ${{ matrix.java }}
|
java-version: ${{ matrix.java }}
|
||||||
- uses: jorelali/setup-elm@v3
|
- uses: jorelali/setup-elm@v3
|
||||||
|
45
Changelog.md
45
Changelog.md
@ -1,15 +1,58 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## v0.25.1
|
||||||
|
|
||||||
|
*Jul 29, 2021*
|
||||||
|
|
||||||
|
- Fix solr fulltext search by adding the new japanese content field
|
||||||
|
|
||||||
|
The SOLR fulltext search is broken in 0.25.0, so this is a fixup
|
||||||
|
release.
|
||||||
|
|
||||||
## v0.25.0
|
## v0.25.0
|
||||||
|
|
||||||
*Unreleased*
|
*Jul 29, 2021*
|
||||||
|
|
||||||
|
- Introducing a new CLI tool (#345) that replaces all the shell
|
||||||
|
scripts from the `tools/` directory! https://github.com/docspell/dsc
|
||||||
|
- UI changes:
|
||||||
|
- year separators are now more prominent (#950)
|
||||||
|
- fixes a bug in the item counter in detail view when an item is
|
||||||
|
deleted (#920)
|
||||||
|
- German translation improvements (#901)
|
||||||
|
- The number of selected files is shown in upload page (#896)
|
||||||
|
- The created date of an item can now be used in queries (#925, #958)
|
||||||
|
- Setting tags api has been improved (#955)
|
||||||
|
- Task for converting pdfs is now behind the admin secret (#949)
|
||||||
|
- Task for generating preview images is now behind the admin secret (#915)
|
||||||
|
- respond with 404 when the source-id is not correct (#931)
|
||||||
|
- Update of core libraries (#890)
|
||||||
|
- Add Japanese to the list of document languages. Thanks @wallace11
|
||||||
|
for helping out (#948, #962)
|
||||||
|
- Fix setting the folder from metadata when processing a file and
|
||||||
|
allow to specifiy it by name or id (#940)
|
||||||
|
- Fixes docspell config file in docker-compose setup (#909)
|
||||||
|
- Fixes selecting the next job in the job executor (#898)
|
||||||
|
- Fixes a bug that prevents uploading more than one file at once
|
||||||
|
(#938)
|
||||||
|
|
||||||
### Rest API Changes
|
### Rest API Changes
|
||||||
|
|
||||||
|
- Removed `sec/item/convertallpdfs` endpoint in favor for new
|
||||||
|
`admin/attachments/convertallpdfs` endpoint which is now an admin
|
||||||
|
task
|
||||||
- Removed `sec/collective/previews` endpoint, in favor for new
|
- Removed `sec/collective/previews` endpoint, in favor for new
|
||||||
`admin/attachments/generatePreviews` endpoint which is now an admin
|
`admin/attachments/generatePreviews` endpoint which is now an admin
|
||||||
task to generate previews for all files. The now removed enpoint did
|
task to generate previews for all files. The now removed enpoint did
|
||||||
this only for one collective.
|
this only for one collective.
|
||||||
|
- `/sec/item/{id}/tags`: Setting tags to an item (replacing existing
|
||||||
|
tags) has been changed to allow tags to be specified as names or ids
|
||||||
|
- `/sec/item/{id}/tagsremove`: Added a route to remove tags for a
|
||||||
|
single item
|
||||||
|
|
||||||
|
### Configuration Changes
|
||||||
|
|
||||||
|
None.
|
||||||
|
|
||||||
|
|
||||||
## v0.24.0
|
## v0.24.0
|
||||||
|
@ -30,7 +30,8 @@ fulltext search and has great e-mail integration. Everything is
|
|||||||
accessible via a REST/HTTP api. A mobile friendly SPA web application
|
accessible via a REST/HTTP api. A mobile friendly SPA web application
|
||||||
is the default user interface. An [Android
|
is the default user interface. An [Android
|
||||||
app](https://github.com/docspell/android-client) exists for
|
app](https://github.com/docspell/android-client) exists for
|
||||||
conveniently uploading files from your phone/tablet. The [feature
|
conveniently uploading files from your phone/tablet and a
|
||||||
|
[cli](https://github.com/docspell/dsc). The [feature
|
||||||
overview](https://docspell.org/#feature-selection) lists some more
|
overview](https://docspell.org/#feature-selection) lists some more
|
||||||
points.
|
points.
|
||||||
|
|
||||||
|
@ -88,8 +88,8 @@ val elmSettings = Seq(
|
|||||||
Compile / unmanagedSourceDirectories += (Compile / sourceDirectory).value / "elm",
|
Compile / unmanagedSourceDirectories += (Compile / sourceDirectory).value / "elm",
|
||||||
headerSources / includeFilter := "*.elm",
|
headerSources / includeFilter := "*.elm",
|
||||||
headerMappings := headerMappings.value + (HeaderFileType("elm") -> HeaderCommentStyle(
|
headerMappings := headerMappings.value + (HeaderFileType("elm") -> HeaderCommentStyle(
|
||||||
new CommentBlockCreator("{-", " ", "-}"),
|
new CommentBlockCreator("{-", " ", "-}\n"),
|
||||||
HeaderPattern.commentBetween("\\{\\-", " ", "\\-\\}")
|
HeaderPattern.commentBetween("\\{\\-", " ", "\\-\\}")
|
||||||
))
|
))
|
||||||
)
|
)
|
||||||
val stylesSettings = Seq(
|
val stylesSettings = Seq(
|
||||||
|
@ -30,16 +30,18 @@ services:
|
|||||||
- solr
|
- solr
|
||||||
|
|
||||||
consumedir:
|
consumedir:
|
||||||
image: docspell/tools:latest
|
image: docspell/dsc:latest
|
||||||
container_name: docspell-consumedir
|
container_name: docspell-consumedir
|
||||||
command:
|
command:
|
||||||
- ds-consumedir
|
- dsc
|
||||||
- "-vmdi"
|
- "-d"
|
||||||
- "--path"
|
- "http://docspell-restserver:7880"
|
||||||
- "/opt/docs"
|
- "watch"
|
||||||
- "--iheader"
|
- "--delete"
|
||||||
|
- "-ir"
|
||||||
|
- "--header"
|
||||||
- "Docspell-Integration:$DOCSPELL_HEADER_VALUE"
|
- "Docspell-Integration:$DOCSPELL_HEADER_VALUE"
|
||||||
- "http://docspell-restserver:7880/api/v1/open/integration/item"
|
- "/opt/docs"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
env_file: ./.env
|
env_file: ./.env
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -29,6 +29,7 @@ RUN JDKPKG="openjdk11"; \
|
|||||||
tesseract-ocr-data-rus \
|
tesseract-ocr-data-rus \
|
||||||
tesseract-ocr-data-ron \
|
tesseract-ocr-data-ron \
|
||||||
tesseract-ocr-data-lav \
|
tesseract-ocr-data-lav \
|
||||||
|
tesseract-ocr-data-jpn \
|
||||||
unpaper \
|
unpaper \
|
||||||
wkhtmltopdf \
|
wkhtmltopdf \
|
||||||
libreoffice \
|
libreoffice \
|
||||||
|
@ -21,9 +21,7 @@ import docspell.common._
|
|||||||
object DateFind {
|
object DateFind {
|
||||||
|
|
||||||
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
|
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
|
||||||
TextSplitter
|
splitWords(text, lang)
|
||||||
.splitToken(text, " \t.,\n\r/".toSet)
|
|
||||||
.filter(w => lang != Language.Latvian || w.value != "gada")
|
|
||||||
.sliding(3)
|
.sliding(3)
|
||||||
.filter(_.size == 3)
|
.filter(_.size == 3)
|
||||||
.flatMap(q =>
|
.flatMap(q =>
|
||||||
@ -44,6 +42,20 @@ object DateFind {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private[this] val jpnChars =
|
||||||
|
("年月日" + MonthName.getAll(Language.Japanese).map(_.mkString).mkString).toSet
|
||||||
|
|
||||||
|
private def splitWords(text: String, lang: Language): Stream[Pure, Word] = {
|
||||||
|
val stext =
|
||||||
|
if (lang == Language.Japanese) {
|
||||||
|
text.map(c => if (jpnChars.contains(c)) c else ' ')
|
||||||
|
} else text
|
||||||
|
|
||||||
|
TextSplitter
|
||||||
|
.splitToken(stext, " \t.,\n\r/年月日".toSet)
|
||||||
|
.filter(w => lang != Language.Latvian || w.value != "gada")
|
||||||
|
}
|
||||||
|
|
||||||
case class SimpleDate(year: Int, month: Int, day: Int) {
|
case class SimpleDate(year: Int, month: Int, day: Int) {
|
||||||
def toLocalDate: LocalDate =
|
def toLocalDate: LocalDate =
|
||||||
LocalDate.of(if (year < 100) 2000 + year else year, month, day)
|
LocalDate.of(if (year < 100) 2000 + year else year, month, day)
|
||||||
@ -89,6 +101,7 @@ object DateFind {
|
|||||||
case Language.Swedish => ymd.or(dmy).or(mdy)
|
case Language.Swedish => ymd.or(dmy).or(mdy)
|
||||||
case Language.Dutch => dmy.or(ymd).or(mdy)
|
case Language.Dutch => dmy.or(ymd).or(mdy)
|
||||||
case Language.Latvian => dmy.or(lavLong).or(ymd)
|
case Language.Latvian => dmy.or(lavLong).or(ymd)
|
||||||
|
case Language.Japanese => ymd
|
||||||
}
|
}
|
||||||
p.read(parts) match {
|
p.read(parts) match {
|
||||||
case Result.Success(sds, _) =>
|
case Result.Success(sds, _) =>
|
||||||
|
@ -50,6 +50,8 @@ object MonthName {
|
|||||||
russian
|
russian
|
||||||
case Language.Latvian =>
|
case Language.Latvian =>
|
||||||
latvian
|
latvian
|
||||||
|
case Language.Japanese =>
|
||||||
|
japanese
|
||||||
}
|
}
|
||||||
|
|
||||||
private val numbers = List(
|
private val numbers = List(
|
||||||
@ -290,4 +292,19 @@ object MonthName {
|
|||||||
List("novembris", "nov."),
|
List("novembris", "nov."),
|
||||||
List("decembris", "dec.")
|
List("decembris", "dec.")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private val japanese = List(
|
||||||
|
List("1", "一"),
|
||||||
|
List("2", "二"),
|
||||||
|
List("3", "三"),
|
||||||
|
List("4", "四"),
|
||||||
|
List("5", "五"),
|
||||||
|
List("6", "六"),
|
||||||
|
List("7", "七"),
|
||||||
|
List("8", "八"),
|
||||||
|
List("9", "九"),
|
||||||
|
List("10", "十"),
|
||||||
|
List("11", "十一"),
|
||||||
|
List("12", "十二")
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
@ -143,4 +143,40 @@ class DateFindSpec extends FunSuite {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("find japanese dates") {
|
||||||
|
assertEquals(
|
||||||
|
DateFind
|
||||||
|
.findDates("今日の日付は2021.7.21です。", Language.Japanese)
|
||||||
|
.toVector,
|
||||||
|
Vector(
|
||||||
|
NerDateLabel(
|
||||||
|
LocalDate.of(2021, 7, 21),
|
||||||
|
NerLabel("2021.7.21", NerTag.Date, 6, 15)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assertEquals(
|
||||||
|
DateFind
|
||||||
|
.findDates("今日の日付は2021年7月21日です。", Language.Japanese)
|
||||||
|
.toVector,
|
||||||
|
Vector(
|
||||||
|
NerDateLabel(
|
||||||
|
LocalDate.of(2021, 7, 21),
|
||||||
|
NerLabel("2021年7月21", NerTag.Date, 6, 15)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assertEquals(
|
||||||
|
DateFind
|
||||||
|
.findDates("年月日2021年7月21日(日)", Language.Japanese)
|
||||||
|
.toVector,
|
||||||
|
Vector(
|
||||||
|
NerDateLabel(
|
||||||
|
LocalDate.of(2021, 7, 21),
|
||||||
|
NerLabel("2021年7月21", NerTag.Date, 3, 12)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -68,14 +68,14 @@ object JobFactory {
|
|||||||
args,
|
args,
|
||||||
"Create preview images",
|
"Create preview images",
|
||||||
now,
|
now,
|
||||||
submitter.getOrElse(DocspellSystem.taskGroup),
|
submitter.getOrElse(DocspellSystem.user),
|
||||||
Priority.Low,
|
Priority.Low,
|
||||||
Some(DocspellSystem.allPreviewTaskTracker)
|
Some(DocspellSystem.allPreviewTaskTracker)
|
||||||
)
|
)
|
||||||
|
|
||||||
def convertAllPdfs[F[_]: Sync](
|
def convertAllPdfs[F[_]: Sync](
|
||||||
collective: Option[Ident],
|
collective: Option[Ident],
|
||||||
account: AccountId,
|
submitter: Option[Ident],
|
||||||
prio: Priority
|
prio: Priority
|
||||||
): F[RJob] =
|
): F[RJob] =
|
||||||
for {
|
for {
|
||||||
@ -84,11 +84,11 @@ object JobFactory {
|
|||||||
job = RJob.newJob(
|
job = RJob.newJob(
|
||||||
id,
|
id,
|
||||||
ConvertAllPdfArgs.taskName,
|
ConvertAllPdfArgs.taskName,
|
||||||
account.collective,
|
collective.getOrElse(DocspellSystem.taskGroup),
|
||||||
ConvertAllPdfArgs(collective),
|
ConvertAllPdfArgs(collective),
|
||||||
s"Convert all pdfs not yet converted",
|
s"Convert all pdfs not yet converted",
|
||||||
now,
|
now,
|
||||||
account.user,
|
submitter.getOrElse(DocspellSystem.user),
|
||||||
prio,
|
prio,
|
||||||
collective
|
collective
|
||||||
.map(c => c / ConvertAllPdfArgs.taskName)
|
.map(c => c / ConvertAllPdfArgs.taskName)
|
||||||
|
@ -24,7 +24,7 @@ import org.log4s.getLogger
|
|||||||
trait OItem[F[_]] {
|
trait OItem[F[_]] {
|
||||||
|
|
||||||
/** Sets the given tags (removing all existing ones). */
|
/** Sets the given tags (removing all existing ones). */
|
||||||
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[UpdateResult]
|
def setTags(item: Ident, tagIds: List[String], collective: Ident): F[UpdateResult]
|
||||||
|
|
||||||
/** Sets tags for multiple items. The tags of the items will be
|
/** Sets tags for multiple items. The tags of the items will be
|
||||||
* replaced with the given ones. Same as `setTags` but for multiple
|
* replaced with the given ones. Same as `setTags` but for multiple
|
||||||
@ -32,7 +32,7 @@ trait OItem[F[_]] {
|
|||||||
*/
|
*/
|
||||||
def setTagsMultipleItems(
|
def setTagsMultipleItems(
|
||||||
items: NonEmptyList[Ident],
|
items: NonEmptyList[Ident],
|
||||||
tags: List[Ident],
|
tags: List[String],
|
||||||
collective: Ident
|
collective: Ident
|
||||||
): F[UpdateResult]
|
): F[UpdateResult]
|
||||||
|
|
||||||
@ -181,7 +181,7 @@ trait OItem[F[_]] {
|
|||||||
*/
|
*/
|
||||||
def convertAllPdf(
|
def convertAllPdf(
|
||||||
collective: Option[Ident],
|
collective: Option[Ident],
|
||||||
account: AccountId,
|
submitter: Option[Ident],
|
||||||
notifyJoex: Boolean
|
notifyJoex: Boolean
|
||||||
): F[UpdateResult]
|
): F[UpdateResult]
|
||||||
|
|
||||||
@ -304,19 +304,20 @@ object OItem {
|
|||||||
|
|
||||||
def setTags(
|
def setTags(
|
||||||
item: Ident,
|
item: Ident,
|
||||||
tagIds: List[Ident],
|
tagIds: List[String],
|
||||||
collective: Ident
|
collective: Ident
|
||||||
): F[UpdateResult] =
|
): F[UpdateResult] =
|
||||||
setTagsMultipleItems(NonEmptyList.of(item), tagIds, collective)
|
setTagsMultipleItems(NonEmptyList.of(item), tagIds, collective)
|
||||||
|
|
||||||
def setTagsMultipleItems(
|
def setTagsMultipleItems(
|
||||||
items: NonEmptyList[Ident],
|
items: NonEmptyList[Ident],
|
||||||
tags: List[Ident],
|
tags: List[String],
|
||||||
collective: Ident
|
collective: Ident
|
||||||
): F[UpdateResult] =
|
): F[UpdateResult] =
|
||||||
UpdateResult.fromUpdate(store.transact(for {
|
UpdateResult.fromUpdate(store.transact(for {
|
||||||
k <- RTagItem.deleteItemTags(items, collective)
|
k <- RTagItem.deleteItemTags(items, collective)
|
||||||
res <- items.traverse(i => RTagItem.setAllTags(i, tags))
|
rtags <- RTag.findAllByNameOrId(tags, collective)
|
||||||
|
res <- items.traverse(i => RTagItem.setAllTags(i, rtags.map(_.tagId)))
|
||||||
n = res.fold
|
n = res.fold
|
||||||
} yield k + n))
|
} yield k + n))
|
||||||
|
|
||||||
@ -687,11 +688,11 @@ object OItem {
|
|||||||
|
|
||||||
def convertAllPdf(
|
def convertAllPdf(
|
||||||
collective: Option[Ident],
|
collective: Option[Ident],
|
||||||
account: AccountId,
|
submitter: Option[Ident],
|
||||||
notifyJoex: Boolean
|
notifyJoex: Boolean
|
||||||
): F[UpdateResult] =
|
): F[UpdateResult] =
|
||||||
for {
|
for {
|
||||||
job <- JobFactory.convertAllPdfs[F](collective, account, Priority.Low)
|
job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low)
|
||||||
_ <- queue.insertIfNew(job)
|
_ <- queue.insertIfNew(job)
|
||||||
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
|
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
|
||||||
} yield UpdateResult.success
|
} yield UpdateResult.success
|
||||||
|
@ -108,6 +108,11 @@ object Language {
|
|||||||
val iso3 = "lav"
|
val iso3 = "lav"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case object Japanese extends Language {
|
||||||
|
val iso2 = "ja"
|
||||||
|
val iso3 = "jpn"
|
||||||
|
}
|
||||||
|
|
||||||
val all: List[Language] =
|
val all: List[Language] =
|
||||||
List(
|
List(
|
||||||
German,
|
German,
|
||||||
@ -124,7 +129,8 @@ object Language {
|
|||||||
Swedish,
|
Swedish,
|
||||||
Russian,
|
Russian,
|
||||||
Romanian,
|
Romanian,
|
||||||
Latvian
|
Latvian,
|
||||||
|
Japanese
|
||||||
)
|
)
|
||||||
|
|
||||||
def fromString(str: String): Either[String, Language] = {
|
def fromString(str: String): Either[String, Language] = {
|
||||||
|
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.exception;
|
||||||
|
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
public class WriteLimitReachedException extends SAXException {
|
||||||
|
|
||||||
|
//in case of (hopefully impossible) cyclic exception
|
||||||
|
private final static int MAX_DEPTH = 100;
|
||||||
|
|
||||||
|
private final int writeLimit;
|
||||||
|
public WriteLimitReachedException(int writeLimit) {
|
||||||
|
this.writeLimit = writeLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getMessage() {
|
||||||
|
return "Your document contained more than " + writeLimit
|
||||||
|
+ " characters, and so your requested limit has been"
|
||||||
|
+ " reached. To receive the full text of the document,"
|
||||||
|
+ " increase your limit. (Text up to the limit is"
|
||||||
|
+ " however available).";
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Checks whether the given exception (or any of it's root causes) was
|
||||||
|
* thrown by this handler as a signal of reaching the write limit.
|
||||||
|
*
|
||||||
|
* @param t throwable
|
||||||
|
* @return <code>true</code> if the write limit was reached,
|
||||||
|
* <code>false</code> otherwise
|
||||||
|
* @since Apache Tika 2.0
|
||||||
|
*/
|
||||||
|
public static boolean isWriteLimitReached(Throwable t) {
|
||||||
|
return isWriteLimitReached(t, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isWriteLimitReached(Throwable t, int depth) {
|
||||||
|
if (t == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (depth > MAX_DEPTH) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (t instanceof WriteLimitReachedException) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return t.getCause() != null && isWriteLimitReached(t.getCause(), depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void throwIfWriteLimitReached(Exception ex) throws SAXException {
|
||||||
|
throwIfWriteLimitReached(ex, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void throwIfWriteLimitReached(Exception ex, int depth) throws SAXException {
|
||||||
|
if (ex == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (depth > MAX_DEPTH) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (ex instanceof WriteLimitReachedException) {
|
||||||
|
throw (SAXException) ex;
|
||||||
|
} else {
|
||||||
|
isWriteLimitReached(ex.getCause(), depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.TikaCoreProperties;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||||
|
import org.apache.tika.utils.XMLReaderUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handler for macros in flat open documents
|
||||||
|
*/
|
||||||
|
class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
|
||||||
|
|
||||||
|
static String MODULE = "module";
|
||||||
|
static String NAME = "name";
|
||||||
|
private static String SOURCE_CODE = "source-code";
|
||||||
|
private final ContentHandler contentHandler;
|
||||||
|
private final ParseContext parseContext;
|
||||||
|
private final StringBuilder macroBuffer = new StringBuilder();
|
||||||
|
String macroName = null;
|
||||||
|
boolean inMacro = false;
|
||||||
|
private EmbeddedDocumentExtractor embeddedDocumentExtractor;
|
||||||
|
|
||||||
|
FlatOpenDocumentMacroHandler(ContentHandler contentHandler, ParseContext parseContext) {
|
||||||
|
super(contentHandler);
|
||||||
|
this.contentHandler = contentHandler;
|
||||||
|
this.parseContext = parseContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
|
||||||
|
throws SAXException {
|
||||||
|
if (MODULE.equals(localName)) {
|
||||||
|
macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
|
||||||
|
} else if (SOURCE_CODE.equals(localName)) {
|
||||||
|
inMacro = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||||
|
if (inMacro) {
|
||||||
|
macroBuffer.append(ch, start, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endElement(String namespaceURI, String localName, String qName)
|
||||||
|
throws SAXException {
|
||||||
|
if (SOURCE_CODE.equals(localName)) {
|
||||||
|
try {
|
||||||
|
handleMacro();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SAXException(e);
|
||||||
|
} finally {
|
||||||
|
resetMacroState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void resetMacroState() {
|
||||||
|
macroBuffer.setLength(0);
|
||||||
|
macroName = null;
|
||||||
|
inMacro = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void handleMacro() throws IOException, SAXException {
|
||||||
|
|
||||||
|
byte[] bytes = macroBuffer.toString().getBytes(StandardCharsets.UTF_8);
|
||||||
|
|
||||||
|
if (embeddedDocumentExtractor == null) {
|
||||||
|
embeddedDocumentExtractor =
|
||||||
|
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
|
||||||
|
}
|
||||||
|
Metadata embeddedMetadata = new Metadata();
|
||||||
|
if (!isBlank(macroName)) {
|
||||||
|
embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, macroName);
|
||||||
|
}
|
||||||
|
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
||||||
|
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
|
||||||
|
|
||||||
|
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
|
||||||
|
try (InputStream is = TikaInputStream.get(bytes)) {
|
||||||
|
embeddedDocumentExtractor
|
||||||
|
.parseEmbedded(is, contentHandler, embeddedMetadata, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isBlank(String s) {
|
||||||
|
return s == null || s.trim().isEmpty();
|
||||||
|
}
|
||||||
|
}
|
@ -1,31 +1,32 @@
|
|||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.odf;
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.tika.sax.ContentHandlerDecorator;
|
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.ContentHandler;
|
import org.xml.sax.ContentHandler;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
import org.xml.sax.helpers.AttributesImpl;
|
import org.xml.sax.helpers.AttributesImpl;
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.Locale;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Content handler decorator that:<ul>
|
* Content handler decorator that:<ul>
|
||||||
@ -35,14 +36,11 @@ import java.util.Locale;
|
|||||||
*/
|
*/
|
||||||
public class NSNormalizerContentHandler extends ContentHandlerDecorator {
|
public class NSNormalizerContentHandler extends ContentHandlerDecorator {
|
||||||
|
|
||||||
private static final String OLD_NS =
|
private static final String OLD_NS = "http://openoffice.org/2000/";
|
||||||
"http://openoffice.org/2000/";
|
|
||||||
|
|
||||||
private static final String NEW_NS =
|
private static final String NEW_NS = "urn:oasis:names:tc:opendocument:xmlns:";
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:";
|
|
||||||
|
|
||||||
private static final String DTD_PUBLIC_ID =
|
private static final String DTD_PUBLIC_ID = "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
|
||||||
"-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
|
|
||||||
|
|
||||||
public NSNormalizerContentHandler(ContentHandler handler) {
|
public NSNormalizerContentHandler(ContentHandler handler) {
|
||||||
super(handler);
|
super(handler);
|
||||||
@ -57,27 +55,24 @@ public class NSNormalizerContentHandler extends ContentHandlerDecorator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(
|
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
|
||||||
String namespaceURI, String localName, String qName,
|
throws SAXException {
|
||||||
Attributes atts) throws SAXException {
|
|
||||||
AttributesImpl natts = new AttributesImpl();
|
AttributesImpl natts = new AttributesImpl();
|
||||||
for (int i = 0; i < atts.getLength(); i++) {
|
for (int i = 0; i < atts.getLength(); i++) {
|
||||||
natts.addAttribute(
|
natts.addAttribute(mapOldNS(atts.getURI(i)), atts.getLocalName(i), atts.getQName(i),
|
||||||
mapOldNS(atts.getURI(i)), atts.getLocalName(i),
|
atts.getType(i), atts.getValue(i));
|
||||||
atts.getQName(i), atts.getType(i), atts.getValue(i));
|
|
||||||
}
|
}
|
||||||
super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
|
super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void endElement(String namespaceURI, String localName, String qName)
|
public void endElement(String namespaceURI, String localName, String qName)
|
||||||
throws SAXException {
|
throws SAXException {
|
||||||
super.endElement(mapOldNS(namespaceURI), localName, qName);
|
super.endElement(mapOldNS(namespaceURI), localName, qName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startPrefixMapping(String prefix, String uri)
|
public void startPrefixMapping(String prefix, String uri) throws SAXException {
|
||||||
throws SAXException {
|
|
||||||
super.startPrefixMapping(prefix, mapOldNS(uri));
|
super.startPrefixMapping(prefix, mapOldNS(uri));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,13 +82,13 @@ public class NSNormalizerContentHandler extends ContentHandlerDecorator {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public InputSource resolveEntity(String publicId, String systemId)
|
public InputSource resolveEntity(String publicId, String systemId)
|
||||||
throws IOException, SAXException {
|
throws IOException, SAXException {
|
||||||
if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
|
if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd")) ||
|
||||||
|| DTD_PUBLIC_ID.equals(publicId)) {
|
DTD_PUBLIC_ID.equals(publicId)) {
|
||||||
return new InputSource(new StringReader(""));
|
return new InputSource(new StringReader(""));
|
||||||
} else {
|
} else {
|
||||||
return super.resolveEntity(publicId, systemId);
|
return super.resolveEntity(publicId, systemId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -0,0 +1,564 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Stack;
|
||||||
|
import javax.xml.namespace.QName;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.binary.Base64;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.AttributesImpl;
|
||||||
|
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||||
|
import org.apache.tika.io.TikaInputStream;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.sax.ElementMappingContentHandler;
|
||||||
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
|
import org.apache.tika.utils.StringUtils;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Handler for the body element or odt flat files and content.xml of
|
||||||
|
traditional compressed odt files
|
||||||
|
*/
|
||||||
|
class OpenDocumentBodyHandler extends ElementMappingContentHandler {
|
||||||
|
|
||||||
|
public static final String TEXT_NS = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
||||||
|
public static final String TABLE_NS = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
||||||
|
public static final String STYLE_NS = "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
|
||||||
|
public static final String FORMATTING_OBJECTS_NS =
|
||||||
|
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
|
||||||
|
public static final String OFFICE_NS = "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
|
||||||
|
public static final String SVG_NS = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
|
||||||
|
public static final String PRESENTATION_NS =
|
||||||
|
"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
|
||||||
|
public static final String DRAW_NS = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
|
||||||
|
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
|
||||||
|
protected static final char[] TAB = new char[]{'\t'};
|
||||||
|
private static final String BINARY_DATA = "binary-data";
|
||||||
|
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
|
||||||
|
/**
|
||||||
|
* Mappings between ODF tag names and XHTML tag names
|
||||||
|
* (including attributes). All other tag names/attributes are ignored
|
||||||
|
* and left out from event stream.
|
||||||
|
*/
|
||||||
|
private static final HashMap<QName, TargetElement> MAPPINGS =
|
||||||
|
new HashMap<>();
|
||||||
|
private static final char[] SPACE = new char[]{' '};
|
||||||
|
private static final String CLASS = "class";
|
||||||
|
private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation");
|
||||||
|
private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note");
|
||||||
|
private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes");
|
||||||
|
|
||||||
|
static {
|
||||||
|
// general mappings of text:-tags
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "p"), new TargetElement(XHTML, "p"));
|
||||||
|
// text:h-tags are mapped specifically in startElement/endElement
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "line-break"), new TargetElement(XHTML, "br"));
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "list-item"), new TargetElement(XHTML, "li"));
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "note"), new TargetElement(XHTML, "span"));
|
||||||
|
MAPPINGS.put(new QName(OFFICE_NS, "annotation"), new TargetElement(XHTML,
|
||||||
|
"span"));
|
||||||
|
MAPPINGS.put(new QName(PRESENTATION_NS, "notes"), new TargetElement(XHTML,
|
||||||
|
"span"));
|
||||||
|
MAPPINGS.put(new QName(DRAW_NS, "object"), new TargetElement(XHTML,
|
||||||
|
"object"));
|
||||||
|
MAPPINGS.put(new QName(DRAW_NS, "text-box"), new TargetElement(XHTML, "div"));
|
||||||
|
MAPPINGS.put(new QName(SVG_NS, "title"), new TargetElement(XHTML, "span"));
|
||||||
|
MAPPINGS.put(new QName(SVG_NS, "desc"), new TargetElement(XHTML, "span"));
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "span"), new TargetElement(XHTML, "span"));
|
||||||
|
|
||||||
|
final HashMap<QName, QName> aAttsMapping = new HashMap<>();
|
||||||
|
aAttsMapping.put(new QName(XLINK_NS, "href"), new QName("href"));
|
||||||
|
aAttsMapping.put(new QName(XLINK_NS, "title"), new QName("title"));
|
||||||
|
MAPPINGS.put(new QName(TEXT_NS, "a"), new TargetElement(XHTML, "a",
|
||||||
|
aAttsMapping));
|
||||||
|
MAPPINGS.put(new QName(DRAW_NS, "a"), new TargetElement(XHTML, "a",
|
||||||
|
aAttsMapping));
|
||||||
|
|
||||||
|
// create HTML tables from table:-tags
|
||||||
|
MAPPINGS.put(new QName(TABLE_NS, "table"), new TargetElement(XHTML, "table"));
|
||||||
|
// repeating of rows is ignored; for columns, see below!
|
||||||
|
MAPPINGS.put(new QName(TABLE_NS, "table-row"), new TargetElement(XHTML, "tr"));
|
||||||
|
// special mapping for rowspan/colspan attributes
|
||||||
|
final HashMap<QName, QName> tableCellAttsMapping = new HashMap<>();
|
||||||
|
tableCellAttsMapping
|
||||||
|
.put(new QName(TABLE_NS, "number-columns-spanned"), new QName("colspan"));
|
||||||
|
tableCellAttsMapping.put(new QName(TABLE_NS, "number-rows-spanned"), new QName("rowspan"));
|
||||||
|
/* TODO: The following is not correct, the cell should be repeated not spanned!
|
||||||
|
* Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
|
||||||
|
* Problems may occur when both spanning and repeating is given, which is not allowed by
|
||||||
|
* spec.
|
||||||
|
* Cell spanning instead of repeating is not a problem, because OpenOffice uses it
|
||||||
|
* only for empty cells.
|
||||||
|
*/
|
||||||
|
tableCellAttsMapping
|
||||||
|
.put(new QName(TABLE_NS, "number-columns-repeated"), new QName("colspan"));
|
||||||
|
MAPPINGS.put(new QName(TABLE_NS, "table-cell"),
|
||||||
|
new TargetElement(XHTML, "td", tableCellAttsMapping));
|
||||||
|
}
|
||||||
|
|
||||||
|
private final ContentHandler handler;
|
||||||
|
private final ParseContext parseContext;
|
||||||
|
private final BitSet textNodeStack = new BitSet();
|
||||||
|
//have we written the start style tags
|
||||||
|
//yet for the current text style
|
||||||
|
boolean hasWrittenStartStyleTags = false;
|
||||||
|
//if we're in a binary-data tag
|
||||||
|
boolean inBinaryData = false;
|
||||||
|
private EmbeddedDocumentExtractor embeddedDocumentExtractor;
|
||||||
|
private StringBuilder base64BinaryDataBuffer = new StringBuilder();
|
||||||
|
private int nodeDepth = 0;
|
||||||
|
private int completelyFiltered = 0;
|
||||||
|
private Stack<String> headingStack = new Stack<>();
|
||||||
|
private Map<String, TextStyle> paragraphTextStyleMap = new HashMap<>();
|
||||||
|
private Map<String, TextStyle> textStyleMap = new HashMap<>();
|
||||||
|
private Map<String, ListStyle> listStyleMap = new HashMap<>();
|
||||||
|
private String currParagraphStyleName; //paragraph style name
|
||||||
|
private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs
|
||||||
|
private String currTextStyleName;
|
||||||
|
private Stack<ListStyle> listStyleStack = new Stack<>();
|
||||||
|
private ListStyle listStyle;
|
||||||
|
// True if we are currently in the named style:
|
||||||
|
private boolean curUnderlined;
|
||||||
|
private boolean curBold;
|
||||||
|
private boolean curItalic;
|
||||||
|
private int pDepth = 0;
|
||||||
|
OpenDocumentBodyHandler(ContentHandler handler, ParseContext parseContext) {
|
||||||
|
super(handler, MAPPINGS);
|
||||||
|
this.handler = handler;
|
||||||
|
this.parseContext = parseContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Attributes buildAttributes(String key, String value) {
|
||||||
|
AttributesImpl attrs = new AttributesImpl();
|
||||||
|
attrs.addAttribute("", key, key, "CDATA", value);
|
||||||
|
return attrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||||
|
if (inBinaryData) {
|
||||||
|
base64BinaryDataBuffer.append(ch, start, length);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// only forward content of tags from text:-namespace
|
||||||
|
if (completelyFiltered == 0 && nodeDepth > 0 && textNodeStack.get(nodeDepth - 1)) {
|
||||||
|
if (!hasWrittenStartStyleTags) {
|
||||||
|
updateStyleTags();
|
||||||
|
hasWrittenStartStyleTags = true;
|
||||||
|
}
|
||||||
|
super.characters(ch, start, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper for checking tags which need complete filtering
|
||||||
|
// (with sub-tags)
|
||||||
|
private boolean needsCompleteFiltering(String namespaceURI, String localName) {
|
||||||
|
if (TEXT_NS.equals(namespaceURI)) {
|
||||||
|
return localName.endsWith("-template") || localName.endsWith("-style");
|
||||||
|
}
|
||||||
|
return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
|
||||||
|
}
|
||||||
|
//<p> can appear inside comments and other things that are already inside <p>
|
||||||
|
//we need to track our pDepth and only output <p> if we're at the main level
|
||||||
|
|
||||||
|
// map the heading level to <hX> HTML tags
|
||||||
|
private String getXHTMLHeaderTagName(Attributes atts) {
|
||||||
|
String depthStr = atts.getValue(TEXT_NS, "outline-level");
|
||||||
|
if (depthStr == null) {
|
||||||
|
return "h1";
|
||||||
|
}
|
||||||
|
|
||||||
|
int depth = Integer.parseInt(depthStr);
|
||||||
|
if (depth >= 6) {
|
||||||
|
return "h6";
|
||||||
|
} else if (depth <= 1) {
|
||||||
|
return "h1";
|
||||||
|
} else {
|
||||||
|
return "h" + depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a node is a text node
|
||||||
|
*/
|
||||||
|
private boolean isTextNode(String namespaceURI, String localName) {
|
||||||
|
if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") &&
|
||||||
|
!localName.equals("page-count")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (SVG_NS.equals(namespaceURI)) {
|
||||||
|
return "title".equals(localName) || "desc".equals(localName);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startList(String name) throws SAXException {
|
||||||
|
String elementName = "ul";
|
||||||
|
if (name != null) {
|
||||||
|
ListStyle style = listStyleMap.get(name);
|
||||||
|
elementName = style != null ? style.getTag() : "ul";
|
||||||
|
listStyleStack.push(style);
|
||||||
|
}
|
||||||
|
handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void endList() throws SAXException {
|
||||||
|
String elementName = "ul";
|
||||||
|
if (!listStyleStack.isEmpty()) {
|
||||||
|
ListStyle style = listStyleStack.pop();
|
||||||
|
elementName = style != null ? style.getTag() : "ul";
|
||||||
|
}
|
||||||
|
handler.endElement(XHTML, elementName, elementName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startSpan(String name) throws SAXException {
|
||||||
|
if (name == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
currTextStyle = textStyleMap.get(name);
|
||||||
|
hasWrittenStartStyleTags = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startParagraph(String styleName) throws SAXException {
|
||||||
|
if (pDepth == 0) {
|
||||||
|
handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES);
|
||||||
|
if (styleName != null) {
|
||||||
|
currTextStyle = paragraphTextStyleMap.get(styleName);
|
||||||
|
}
|
||||||
|
hasWrittenStartStyleTags = false;
|
||||||
|
} else {
|
||||||
|
handler.characters(SPACE, 0, SPACE.length);
|
||||||
|
}
|
||||||
|
pDepth++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void endParagraph() throws SAXException {
|
||||||
|
closeStyleTags();
|
||||||
|
if (pDepth == 1) {
|
||||||
|
handler.endElement(XHTML, "p", "p");
|
||||||
|
} else {
|
||||||
|
handler.characters(SPACE, 0, SPACE.length);
|
||||||
|
}
|
||||||
|
pDepth--;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateStyleTags() throws SAXException {
|
||||||
|
|
||||||
|
if (currTextStyle == null) {
|
||||||
|
closeStyleTags();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (currTextStyle.bold != curBold) {
|
||||||
|
// Enforce nesting -- must close s and i tags
|
||||||
|
if (curUnderlined) {
|
||||||
|
handler.endElement(XHTML, "u", "u");
|
||||||
|
curUnderlined = false;
|
||||||
|
}
|
||||||
|
if (curItalic) {
|
||||||
|
handler.endElement(XHTML, "i", "i");
|
||||||
|
curItalic = false;
|
||||||
|
}
|
||||||
|
if (currTextStyle.bold) {
|
||||||
|
handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
|
||||||
|
} else {
|
||||||
|
handler.endElement(XHTML, "b", "b");
|
||||||
|
}
|
||||||
|
curBold = currTextStyle.bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currTextStyle.italic != curItalic) {
|
||||||
|
// Enforce nesting -- must close s tag
|
||||||
|
if (curUnderlined) {
|
||||||
|
handler.endElement(XHTML, "u", "u");
|
||||||
|
curUnderlined = false;
|
||||||
|
}
|
||||||
|
if (currTextStyle.italic) {
|
||||||
|
handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
|
||||||
|
} else {
|
||||||
|
handler.endElement(XHTML, "i", "i");
|
||||||
|
}
|
||||||
|
curItalic = currTextStyle.italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currTextStyle.underlined != curUnderlined) {
|
||||||
|
if (currTextStyle.underlined) {
|
||||||
|
handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
|
||||||
|
} else {
|
||||||
|
handler.endElement(XHTML, "u", "u");
|
||||||
|
}
|
||||||
|
curUnderlined = currTextStyle.underlined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void endSpan() throws SAXException {
|
||||||
|
updateStyleTags();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeStyleTags() throws SAXException {
|
||||||
|
// Close any still open style tags
|
||||||
|
if (curUnderlined) {
|
||||||
|
handler.endElement(XHTML, "u", "u");
|
||||||
|
curUnderlined = false;
|
||||||
|
}
|
||||||
|
if (curItalic) {
|
||||||
|
handler.endElement(XHTML, "i", "i");
|
||||||
|
curItalic = false;
|
||||||
|
}
|
||||||
|
if (curBold) {
|
||||||
|
handler.endElement(XHTML, "b", "b");
|
||||||
|
curBold = false;
|
||||||
|
}
|
||||||
|
currTextStyle = null;
|
||||||
|
hasWrittenStartStyleTags = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
|
||||||
|
throws SAXException {
|
||||||
|
|
||||||
|
if (DRAW_NS.equals(namespaceURI) && "image".equals(localName)) {
|
||||||
|
String link = attrs.getValue(XLINK_NS, "href");
|
||||||
|
AttributesImpl attr = new AttributesImpl();
|
||||||
|
if (!StringUtils.isEmpty(link)) {
|
||||||
|
attr.addAttribute("", "src", "src", "CDATA", "embedded:" + link);
|
||||||
|
}
|
||||||
|
handler.startElement(XHTMLContentHandler.XHTML, "img", "img", attr);
|
||||||
|
handler.endElement(XHTMLContentHandler.XHTML, "img", "img");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (BINARY_DATA.equals(localName)) {
|
||||||
|
inBinaryData = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// keep track of current node type. If it is a text node,
|
||||||
|
// a bit at the current depth its set in textNodeStack.
|
||||||
|
// characters() checks the top bit to determine, if the
|
||||||
|
// actual node is a text node to print out nodeDepth contains
|
||||||
|
// the depth of the current node and also marks top of stack.
|
||||||
|
assert nodeDepth >= 0;
|
||||||
|
|
||||||
|
// Set styles
|
||||||
|
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
||||||
|
String family = attrs.getValue(STYLE_NS, "family");
|
||||||
|
if ("text".equals(family)) {
|
||||||
|
currTextStyle = new TextStyle();
|
||||||
|
currTextStyleName = attrs.getValue(STYLE_NS, "name");
|
||||||
|
} else if ("paragraph".equals(family)) {
|
||||||
|
currTextStyle = new TextStyle();
|
||||||
|
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
|
||||||
|
}
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
||||||
|
listStyle = new ListStyle();
|
||||||
|
String name = attrs.getValue(STYLE_NS, "name");
|
||||||
|
listStyleMap.put(name, listStyle);
|
||||||
|
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI) &&
|
||||||
|
"text-properties".equals(localName)) {
|
||||||
|
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
|
||||||
|
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
|
||||||
|
currTextStyle.italic = true;
|
||||||
|
}
|
||||||
|
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
|
||||||
|
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight) ||
|
||||||
|
(fontWeight != null && Character.isDigit(fontWeight.charAt(0)) &&
|
||||||
|
Integer.parseInt(fontWeight) > 500)) {
|
||||||
|
currTextStyle.bold = true;
|
||||||
|
}
|
||||||
|
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
|
||||||
|
if (underlineStyle != null && !underlineStyle.equals("none")) {
|
||||||
|
currTextStyle.underlined = true;
|
||||||
|
}
|
||||||
|
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
|
||||||
|
if ("list-level-style-bullet".equals(localName)) {
|
||||||
|
listStyle.ordered = false;
|
||||||
|
} else if ("list-level-style-number".equals(localName)) {
|
||||||
|
listStyle.ordered = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
textNodeStack.set(nodeDepth++, isTextNode(namespaceURI, localName));
|
||||||
|
// filter *all* content of some tags
|
||||||
|
assert completelyFiltered >= 0;
|
||||||
|
|
||||||
|
if (needsCompleteFiltering(namespaceURI, localName)) {
|
||||||
|
completelyFiltered++;
|
||||||
|
}
|
||||||
|
// call next handler if no filtering
|
||||||
|
if (completelyFiltered == 0) {
|
||||||
|
// special handling of text:h, that are directly passed
|
||||||
|
// to incoming handler
|
||||||
|
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
||||||
|
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
|
||||||
|
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
||||||
|
startList(attrs.getValue(TEXT_NS, "style-name"));
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
||||||
|
startSpan(attrs.getValue(TEXT_NS, "style-name"));
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
||||||
|
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
|
||||||
|
handler.characters(SPACE, 0, 1);
|
||||||
|
} else if ("annotation".equals(localName)) {
|
||||||
|
closeStyleTags();
|
||||||
|
handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES);
|
||||||
|
} else if ("note".equals(localName)) {
|
||||||
|
closeStyleTags();
|
||||||
|
handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES);
|
||||||
|
} else if ("notes".equals(localName)) {
|
||||||
|
closeStyleTags();
|
||||||
|
handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES);
|
||||||
|
} else {
|
||||||
|
super.startElement(namespaceURI, localName, qName, attrs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endElement(String namespaceURI, String localName, String qName)
|
||||||
|
throws SAXException {
|
||||||
|
if (BINARY_DATA.equals(localName)) {
|
||||||
|
inBinaryData = false;
|
||||||
|
try {
|
||||||
|
processBinaryData();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SAXException(e);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
||||||
|
if (currTextStyle != null && currTextStyleName != null) {
|
||||||
|
textStyleMap.put(currTextStyleName, currTextStyle);
|
||||||
|
currTextStyleName = null;
|
||||||
|
currTextStyle = null;
|
||||||
|
} else if (currTextStyle != null && currParagraphStyleName != null) {
|
||||||
|
paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle);
|
||||||
|
currParagraphStyleName = null;
|
||||||
|
currTextStyle = null;
|
||||||
|
}
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
||||||
|
listStyle = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// call next handler if no filtering
|
||||||
|
if (completelyFiltered == 0) {
|
||||||
|
// special handling of text:h, that are directly passed
|
||||||
|
// to incoming handler
|
||||||
|
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
||||||
|
final String el = headingStack.pop();
|
||||||
|
handler.endElement(namespaceURI, el, el);
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
||||||
|
endList();
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
||||||
|
currTextStyle = null;
|
||||||
|
hasWrittenStartStyleTags = false;
|
||||||
|
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
||||||
|
endParagraph();
|
||||||
|
} else if ("annotation".equals(localName) || "note".equals(localName) ||
|
||||||
|
"notes".equals(localName)) {
|
||||||
|
closeStyleTags();
|
||||||
|
handler.endElement(namespaceURI, localName, localName);
|
||||||
|
} else {
|
||||||
|
super.endElement(namespaceURI, localName, qName);
|
||||||
|
}
|
||||||
|
|
||||||
|
// special handling of tabulators
|
||||||
|
if (TEXT_NS.equals(namespaceURI) &&
|
||||||
|
("tab-stop".equals(localName) || "tab".equals(localName))) {
|
||||||
|
this.characters(TAB, 0, TAB.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// revert filter for *all* content of some tags
|
||||||
|
if (needsCompleteFiltering(namespaceURI, localName)) {
|
||||||
|
completelyFiltered--;
|
||||||
|
}
|
||||||
|
assert completelyFiltered >= 0;
|
||||||
|
|
||||||
|
// reduce current node depth
|
||||||
|
nodeDepth--;
|
||||||
|
assert nodeDepth >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processBinaryData() throws IOException, SAXException {
|
||||||
|
|
||||||
|
//TODO: figure out whether we're in an inline image or a regular
|
||||||
|
//attachment and add that info to the embedded metadata
|
||||||
|
|
||||||
|
byte[] bytes = Base64.decodeBase64(base64BinaryDataBuffer.toString());
|
||||||
|
//clear state before parsing
|
||||||
|
base64BinaryDataBuffer.setLength(0);
|
||||||
|
inBinaryData = false;
|
||||||
|
|
||||||
|
if (embeddedDocumentExtractor == null) {
|
||||||
|
embeddedDocumentExtractor =
|
||||||
|
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
|
||||||
|
}
|
||||||
|
Metadata embeddedMetadata = new Metadata();
|
||||||
|
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
|
||||||
|
try (InputStream is = TikaInputStream.get(bytes)) {
|
||||||
|
embeddedDocumentExtractor.parseEmbedded(is, handler, embeddedMetadata, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startPrefixMapping(String prefix, String uri) {
|
||||||
|
// remove prefix mappings as they should not occur in XHTML
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endPrefixMapping(String prefix) {
|
||||||
|
// remove prefix mappings as they should not occur in XHTML
|
||||||
|
}
|
||||||
|
|
||||||
|
private interface Style {
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TextStyle implements Style {
|
||||||
|
public boolean italic;
|
||||||
|
public boolean bold;
|
||||||
|
public boolean underlined;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "TextStyle{" + "italic=" + italic + ", bold=" + bold + ", underlined=" +
|
||||||
|
underlined + '}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ListStyle implements Style {
|
||||||
|
public boolean ordered;
|
||||||
|
|
||||||
|
public String getTag() {
|
||||||
|
return ordered ? "ol" : "ul";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -16,591 +16,47 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.odf;
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.io.input.CloseShieldInputStream;
|
import org.apache.commons.io.input.CloseShieldInputStream;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.AbstractParser;
|
import org.apache.tika.parser.AbstractParser;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.sax.ElementMappingContentHandler;
|
|
||||||
import org.apache.tika.sax.ElementMappingContentHandler.TargetElement;
|
|
||||||
import org.apache.tika.sax.OfflineContentHandler;
|
import org.apache.tika.sax.OfflineContentHandler;
|
||||||
import org.apache.tika.sax.XHTMLContentHandler;
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
import org.apache.tika.utils.XMLReaderUtils;
|
import org.apache.tika.utils.XMLReaderUtils;
|
||||||
import org.xml.sax.Attributes;
|
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
import org.xml.sax.helpers.AttributesImpl;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
import javax.xml.namespace.QName;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.BitSet;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.Stack;
|
|
||||||
|
|
||||||
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parser for ODF <code>content.xml</code> files.
|
* Parser for ODF <code>content.xml</code> files.
|
||||||
*/
|
*/
|
||||||
public class OpenDocumentContentParser extends AbstractParser {
|
public class OpenDocumentContentParser extends AbstractParser {
|
||||||
private interface Style {
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class TextStyle implements Style {
|
|
||||||
public boolean italic;
|
|
||||||
public boolean bold;
|
|
||||||
public boolean underlined;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "TextStyle{" +
|
|
||||||
"italic=" + italic +
|
|
||||||
", bold=" + bold +
|
|
||||||
", underlined=" + underlined +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class ListStyle implements Style {
|
|
||||||
public boolean ordered;
|
|
||||||
|
|
||||||
public String getTag() {
|
|
||||||
return ordered ? "ol" : "ul";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class OpenDocumentElementMappingContentHandler extends
|
|
||||||
ElementMappingContentHandler {
|
|
||||||
private static final char[] SPACE = new char[]{ ' '};
|
|
||||||
private static final String CLASS = "class";
|
|
||||||
private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation");
|
|
||||||
private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note");
|
|
||||||
private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes");
|
|
||||||
|
|
||||||
private static Attributes buildAttributes(String key, String value) {
|
|
||||||
AttributesImpl attrs = new AttributesImpl();
|
|
||||||
attrs.addAttribute("", key, key, "CDATA", value);
|
|
||||||
return attrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final ContentHandler handler;
|
|
||||||
private final BitSet textNodeStack = new BitSet();
|
|
||||||
private int nodeDepth = 0;
|
|
||||||
private int completelyFiltered = 0;
|
|
||||||
private Stack<String> headingStack = new Stack<String>();
|
|
||||||
private Map<String, TextStyle> paragraphTextStyleMap = new HashMap<String, TextStyle>();
|
|
||||||
private Map<String, TextStyle> textStyleMap = new HashMap<String, TextStyle>();
|
|
||||||
private Map<String, ListStyle> listStyleMap = new HashMap<String, ListStyle>();
|
|
||||||
private String currParagraphStyleName; //paragraph style name
|
|
||||||
private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs
|
|
||||||
private String currTextStyleName;
|
|
||||||
|
|
||||||
private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
|
|
||||||
private ListStyle listStyle;
|
|
||||||
|
|
||||||
// True if we are currently in the named style:
|
|
||||||
private boolean curUnderlined;
|
|
||||||
private boolean curBold;
|
|
||||||
private boolean curItalic;
|
|
||||||
|
|
||||||
//have we written the start style tags
|
|
||||||
//yet for the current text style
|
|
||||||
boolean hasWrittenStartStyleTags = false;
|
|
||||||
|
|
||||||
private int pDepth = 0; //<p> can appear inside comments and other things that are already inside <p>
|
|
||||||
//we need to track our pDepth and only output <p> if we're at the main level
|
|
||||||
|
|
||||||
|
|
||||||
private OpenDocumentElementMappingContentHandler(ContentHandler handler,
|
|
||||||
Map<QName, TargetElement> mappings) {
|
|
||||||
super(handler, mappings);
|
|
||||||
this.handler = handler;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void characters(char[] ch, int start, int length)
|
|
||||||
throws SAXException {
|
|
||||||
// only forward content of tags from text:-namespace
|
|
||||||
if (completelyFiltered == 0 && nodeDepth > 0
|
|
||||||
&& textNodeStack.get(nodeDepth - 1)) {
|
|
||||||
if (!hasWrittenStartStyleTags) {
|
|
||||||
updateStyleTags();
|
|
||||||
hasWrittenStartStyleTags = true;
|
|
||||||
}
|
|
||||||
super.characters(ch, start, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// helper for checking tags which need complete filtering
|
|
||||||
// (with sub-tags)
|
|
||||||
private boolean needsCompleteFiltering(
|
|
||||||
String namespaceURI, String localName) {
|
|
||||||
if (TEXT_NS.equals(namespaceURI)) {
|
|
||||||
return localName.endsWith("-template")
|
|
||||||
|| localName.endsWith("-style");
|
|
||||||
}
|
|
||||||
return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// map the heading level to <hX> HTML tags
|
|
||||||
private String getXHTMLHeaderTagName(Attributes atts) {
|
|
||||||
String depthStr = atts.getValue(TEXT_NS, "outline-level");
|
|
||||||
if (depthStr == null) {
|
|
||||||
return "h1";
|
|
||||||
}
|
|
||||||
|
|
||||||
int depth = Integer.parseInt(depthStr);
|
|
||||||
if (depth >= 6) {
|
|
||||||
return "h6";
|
|
||||||
} else if (depth <= 1) {
|
|
||||||
return "h1";
|
|
||||||
} else {
|
|
||||||
return "h" + depth;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a node is a text node
|
|
||||||
*/
|
|
||||||
private boolean isTextNode(String namespaceURI, String localName) {
|
|
||||||
if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (SVG_NS.equals(namespaceURI)) {
|
|
||||||
return "title".equals(localName) ||
|
|
||||||
"desc".equals(localName);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void startList(String name) throws SAXException {
|
|
||||||
String elementName = "ul";
|
|
||||||
if (name != null) {
|
|
||||||
ListStyle style = listStyleMap.get(name);
|
|
||||||
elementName = style != null ? style.getTag() : "ul";
|
|
||||||
listStyleStack.push(style);
|
|
||||||
}
|
|
||||||
handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void endList() throws SAXException {
|
|
||||||
String elementName = "ul";
|
|
||||||
if (!listStyleStack.isEmpty()) {
|
|
||||||
ListStyle style = listStyleStack.pop();
|
|
||||||
elementName = style != null ? style.getTag() : "ul";
|
|
||||||
}
|
|
||||||
handler.endElement(XHTML, elementName, elementName);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void startSpan(String name) throws SAXException {
|
|
||||||
if (name == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
currTextStyle = textStyleMap.get(name);
|
|
||||||
hasWrittenStartStyleTags = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void startParagraph(String styleName) throws SAXException {
|
|
||||||
if (pDepth == 0) {
|
|
||||||
handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES);
|
|
||||||
if (styleName != null) {
|
|
||||||
currTextStyle = paragraphTextStyleMap.get(styleName);
|
|
||||||
}
|
|
||||||
hasWrittenStartStyleTags = false;
|
|
||||||
} else {
|
|
||||||
handler.characters(SPACE, 0, SPACE.length);
|
|
||||||
}
|
|
||||||
pDepth++;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void endParagraph() throws SAXException {
|
|
||||||
closeStyleTags();
|
|
||||||
if (pDepth == 1) {
|
|
||||||
handler.endElement(XHTML, "p", "p");
|
|
||||||
} else {
|
|
||||||
handler.characters(SPACE, 0, SPACE.length);
|
|
||||||
}
|
|
||||||
pDepth--;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private void updateStyleTags() throws SAXException {
|
|
||||||
|
|
||||||
if (currTextStyle == null) {
|
|
||||||
closeStyleTags();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (currTextStyle.bold != curBold) {
|
|
||||||
// Enforce nesting -- must close s and i tags
|
|
||||||
if (curUnderlined) {
|
|
||||||
handler.endElement(XHTML, "u", "u");
|
|
||||||
curUnderlined = false;
|
|
||||||
}
|
|
||||||
if (curItalic) {
|
|
||||||
handler.endElement(XHTML, "i", "i");
|
|
||||||
curItalic = false;
|
|
||||||
}
|
|
||||||
if (currTextStyle.bold) {
|
|
||||||
handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
|
|
||||||
} else {
|
|
||||||
handler.endElement(XHTML, "b", "b");
|
|
||||||
}
|
|
||||||
curBold = currTextStyle.bold;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (currTextStyle.italic != curItalic) {
|
|
||||||
// Enforce nesting -- must close s tag
|
|
||||||
if (curUnderlined) {
|
|
||||||
handler.endElement(XHTML, "u", "u");
|
|
||||||
curUnderlined = false;
|
|
||||||
}
|
|
||||||
if (currTextStyle.italic) {
|
|
||||||
handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
|
|
||||||
} else {
|
|
||||||
handler.endElement(XHTML, "i", "i");
|
|
||||||
}
|
|
||||||
curItalic = currTextStyle.italic;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (currTextStyle.underlined != curUnderlined) {
|
|
||||||
if (currTextStyle.underlined) {
|
|
||||||
handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
|
|
||||||
} else {
|
|
||||||
handler.endElement(XHTML, "u", "u");
|
|
||||||
}
|
|
||||||
curUnderlined = currTextStyle.underlined;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void endSpan() throws SAXException {
|
|
||||||
updateStyleTags();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void closeStyleTags() throws SAXException {
|
|
||||||
// Close any still open style tags
|
|
||||||
if (curUnderlined) {
|
|
||||||
handler.endElement(XHTML,"u", "u");
|
|
||||||
curUnderlined = false;
|
|
||||||
}
|
|
||||||
if (curItalic) {
|
|
||||||
handler.endElement(XHTML,"i", "i");
|
|
||||||
curItalic = false;
|
|
||||||
}
|
|
||||||
if (curBold) {
|
|
||||||
handler.endElement(XHTML,"b", "b");
|
|
||||||
curBold = false;
|
|
||||||
}
|
|
||||||
currTextStyle = null;
|
|
||||||
hasWrittenStartStyleTags = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void startElement(
|
|
||||||
String namespaceURI, String localName, String qName,
|
|
||||||
Attributes attrs) throws SAXException {
|
|
||||||
// keep track of current node type. If it is a text node,
|
|
||||||
// a bit at the current depth its set in textNodeStack.
|
|
||||||
// characters() checks the top bit to determine, if the
|
|
||||||
// actual node is a text node to print out nodeDepth contains
|
|
||||||
// the depth of the current node and also marks top of stack.
|
|
||||||
assert nodeDepth >= 0;
|
|
||||||
|
|
||||||
// Set styles
|
|
||||||
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
|
||||||
String family = attrs.getValue(STYLE_NS, "family");
|
|
||||||
if ("text".equals(family)) {
|
|
||||||
currTextStyle = new TextStyle();
|
|
||||||
currTextStyleName = attrs.getValue(STYLE_NS, "name");
|
|
||||||
} else if ("paragraph".equals(family)) {
|
|
||||||
currTextStyle = new TextStyle();
|
|
||||||
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
|
|
||||||
}
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
|
||||||
listStyle = new ListStyle();
|
|
||||||
String name = attrs.getValue(STYLE_NS, "name");
|
|
||||||
listStyleMap.put(name, listStyle);
|
|
||||||
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI)
|
|
||||||
&& "text-properties".equals(localName)) {
|
|
||||||
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
|
|
||||||
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
|
|
||||||
currTextStyle.italic = true;
|
|
||||||
}
|
|
||||||
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
|
|
||||||
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
|
|
||||||
|| (fontWeight != null && Character.isDigit(fontWeight.charAt(0))
|
|
||||||
&& Integer.valueOf(fontWeight) > 500)) {
|
|
||||||
currTextStyle.bold = true;
|
|
||||||
}
|
|
||||||
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
|
|
||||||
if (underlineStyle != null && !underlineStyle.equals("none")) {
|
|
||||||
currTextStyle.underlined = true;
|
|
||||||
}
|
|
||||||
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
|
|
||||||
if ("list-level-style-bullet".equals(localName)) {
|
|
||||||
listStyle.ordered = false;
|
|
||||||
} else if ("list-level-style-number".equals(localName)) {
|
|
||||||
listStyle.ordered = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
textNodeStack.set(nodeDepth++,
|
|
||||||
isTextNode(namespaceURI, localName));
|
|
||||||
// filter *all* content of some tags
|
|
||||||
assert completelyFiltered >= 0;
|
|
||||||
|
|
||||||
if (needsCompleteFiltering(namespaceURI, localName)) {
|
|
||||||
completelyFiltered++;
|
|
||||||
}
|
|
||||||
// call next handler if no filtering
|
|
||||||
if (completelyFiltered == 0) {
|
|
||||||
// special handling of text:h, that are directly passed
|
|
||||||
// to incoming handler
|
|
||||||
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
|
||||||
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
|
|
||||||
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
|
||||||
startList(attrs.getValue(TEXT_NS, "style-name"));
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
|
||||||
startSpan(attrs.getValue(TEXT_NS, "style-name"));
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
|
||||||
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
|
|
||||||
handler.characters(SPACE, 0, 1);
|
|
||||||
} else if ("annotation".equals(localName)) {
|
|
||||||
closeStyleTags();
|
|
||||||
handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES);
|
|
||||||
} else if ("note".equals(localName)) {
|
|
||||||
closeStyleTags();
|
|
||||||
handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES);
|
|
||||||
} else if ("notes".equals(localName)) {
|
|
||||||
closeStyleTags();
|
|
||||||
handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES);
|
|
||||||
} else {
|
|
||||||
super.startElement(namespaceURI, localName, qName, attrs);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void endElement(
|
|
||||||
String namespaceURI, String localName, String qName)
|
|
||||||
throws SAXException {
|
|
||||||
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
|
||||||
if (currTextStyle != null && currTextStyleName != null) {
|
|
||||||
textStyleMap.put(currTextStyleName, currTextStyle);
|
|
||||||
currTextStyleName = null;
|
|
||||||
currTextStyle = null;
|
|
||||||
} else if (currTextStyle != null && currParagraphStyleName != null) {
|
|
||||||
paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle);
|
|
||||||
currParagraphStyleName = null;
|
|
||||||
currTextStyle = null;
|
|
||||||
}
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
|
||||||
listStyle = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// call next handler if no filtering
|
|
||||||
if (completelyFiltered == 0) {
|
|
||||||
// special handling of text:h, that are directly passed
|
|
||||||
// to incoming handler
|
|
||||||
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
|
||||||
final String el = headingStack.pop();
|
|
||||||
handler.endElement(XHTMLContentHandler.XHTML, el, el);
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
|
||||||
endList();
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
|
||||||
currTextStyle = null;
|
|
||||||
hasWrittenStartStyleTags = false;
|
|
||||||
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
|
||||||
endParagraph();
|
|
||||||
} else if ("annotation".equals(localName) || "note".equals(localName) ||
|
|
||||||
"notes".equals(localName)) {
|
|
||||||
closeStyleTags();
|
|
||||||
handler.endElement("", localName, localName);
|
|
||||||
} else {
|
|
||||||
super.endElement(namespaceURI, localName, qName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// special handling of tabulators
|
|
||||||
if (TEXT_NS.equals(namespaceURI)
|
|
||||||
&& ("tab-stop".equals(localName)
|
|
||||||
|| "tab".equals(localName))) {
|
|
||||||
this.characters(TAB, 0, TAB.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// revert filter for *all* content of some tags
|
|
||||||
if (needsCompleteFiltering(namespaceURI, localName)) {
|
|
||||||
completelyFiltered--;
|
|
||||||
}
|
|
||||||
assert completelyFiltered >= 0;
|
|
||||||
|
|
||||||
// reduce current node depth
|
|
||||||
nodeDepth--;
|
|
||||||
assert nodeDepth >= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void startPrefixMapping(String prefix, String uri) {
|
|
||||||
// remove prefix mappings as they should not occur in XHTML
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void endPrefixMapping(String prefix) {
|
|
||||||
// remove prefix mappings as they should not occur in XHTML
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final String TEXT_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
|
||||||
|
|
||||||
public static final String TABLE_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
|
||||||
|
|
||||||
public static final String STYLE_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:style:1.0";
|
|
||||||
|
|
||||||
public static final String FORMATTING_OBJECTS_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
|
|
||||||
|
|
||||||
public static final String OFFICE_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:office:1.0";
|
|
||||||
|
|
||||||
public static final String SVG_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
|
|
||||||
|
|
||||||
public static final String PRESENTATION_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
|
|
||||||
|
|
||||||
public static final String DRAW_NS =
|
|
||||||
"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
|
|
||||||
|
|
||||||
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
|
|
||||||
|
|
||||||
protected static final char[] TAB = new char[]{'\t'};
|
|
||||||
|
|
||||||
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mappings between ODF tag names and XHTML tag names
|
|
||||||
* (including attributes). All other tag names/attributes are ignored
|
|
||||||
* and left out from event stream.
|
|
||||||
*/
|
|
||||||
private static final HashMap<QName, TargetElement> MAPPINGS =
|
|
||||||
new HashMap<QName, TargetElement>();
|
|
||||||
|
|
||||||
static {
|
|
||||||
// general mappings of text:-tags
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "p"),
|
|
||||||
new TargetElement(XHTML, "p"));
|
|
||||||
// text:h-tags are mapped specifically in startElement/endElement
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "line-break"),
|
|
||||||
new TargetElement(XHTML, "br"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "list-item"),
|
|
||||||
new TargetElement(XHTML, "li"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "note"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(OFFICE_NS, "annotation"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(PRESENTATION_NS, "notes"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(DRAW_NS, "object"),
|
|
||||||
new TargetElement(XHTML, "object"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(DRAW_NS, "text-box"),
|
|
||||||
new TargetElement(XHTML, "div"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(SVG_NS, "title"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(SVG_NS, "desc"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "span"),
|
|
||||||
new TargetElement(XHTML, "span"));
|
|
||||||
|
|
||||||
final HashMap<QName, QName> aAttsMapping =
|
|
||||||
new HashMap<QName, QName>();
|
|
||||||
aAttsMapping.put(
|
|
||||||
new QName(XLINK_NS, "href"),
|
|
||||||
new QName("href"));
|
|
||||||
aAttsMapping.put(
|
|
||||||
new QName(XLINK_NS, "title"),
|
|
||||||
new QName("title"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TEXT_NS, "a"),
|
|
||||||
new TargetElement(XHTML, "a", aAttsMapping));
|
|
||||||
|
|
||||||
// create HTML tables from table:-tags
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TABLE_NS, "table"),
|
|
||||||
new TargetElement(XHTML, "table"));
|
|
||||||
// repeating of rows is ignored; for columns, see below!
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TABLE_NS, "table-row"),
|
|
||||||
new TargetElement(XHTML, "tr"));
|
|
||||||
// special mapping for rowspan/colspan attributes
|
|
||||||
final HashMap<QName, QName> tableCellAttsMapping =
|
|
||||||
new HashMap<QName, QName>();
|
|
||||||
tableCellAttsMapping.put(
|
|
||||||
new QName(TABLE_NS, "number-columns-spanned"),
|
|
||||||
new QName("colspan"));
|
|
||||||
tableCellAttsMapping.put(
|
|
||||||
new QName(TABLE_NS, "number-rows-spanned"),
|
|
||||||
new QName("rowspan"));
|
|
||||||
/* TODO: The following is not correct, the cell should be repeated not spanned!
|
|
||||||
* Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
|
|
||||||
* Problems may occur when both spanning and repeating is given, which is not allowed by spec.
|
|
||||||
* Cell spanning instead of repeating is not a problem, because OpenOffice uses it
|
|
||||||
* only for empty cells.
|
|
||||||
*/
|
|
||||||
tableCellAttsMapping.put(
|
|
||||||
new QName(TABLE_NS, "number-columns-repeated"),
|
|
||||||
new QName("colspan"));
|
|
||||||
MAPPINGS.put(
|
|
||||||
new QName(TABLE_NS, "table-cell"),
|
|
||||||
new TargetElement(XHTML, "td", tableCellAttsMapping));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||||
return Collections.emptySet(); // not a top-level parser
|
return Collections.emptySet(); // not a top-level parser
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parse(
|
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
|
||||||
InputStream stream, ContentHandler handler,
|
ParseContext context) throws IOException, SAXException, TikaException {
|
||||||
Metadata metadata, ParseContext context)
|
parseInternal(stream, new XHTMLContentHandler(handler, metadata), metadata, context);
|
||||||
throws IOException, SAXException, TikaException {
|
|
||||||
parseInternal(stream,
|
|
||||||
new XHTMLContentHandler(handler, metadata),
|
|
||||||
metadata, context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void parseInternal(
|
void parseInternal(InputStream stream, final ContentHandler handler, Metadata metadata,
|
||||||
InputStream stream, final ContentHandler handler,
|
ParseContext context) throws IOException, SAXException, TikaException {
|
||||||
Metadata metadata, ParseContext context)
|
|
||||||
throws IOException, SAXException, TikaException {
|
|
||||||
|
|
||||||
DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
|
DefaultHandler dh = new OpenDocumentBodyHandler(handler, context);
|
||||||
|
|
||||||
|
|
||||||
XMLReaderUtils.parseSAX(
|
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream),
|
||||||
new CloseShieldInputStream(stream),
|
new OfflineContentHandler(new NSNormalizerContentHandler(dh)), context);
|
||||||
new OfflineContentHandler(
|
|
||||||
new NSNormalizerContentHandler(dh)),
|
|
||||||
context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.utils.XMLReaderUtils;
|
||||||
|
|
||||||
|
|
||||||
|
class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
|
||||||
|
|
||||||
|
OpenDocumentMacroHandler(ContentHandler contentHandler, ParseContext parseContext) {
|
||||||
|
super(contentHandler, parseContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
|
||||||
|
throws SAXException {
|
||||||
|
//in the compressed odf, there should only be one element in this file.
|
||||||
|
if (MODULE.equalsIgnoreCase(localName)) {
|
||||||
|
inMacro = true;
|
||||||
|
macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endElement(String namespaceURI, String localName, String qName)
|
||||||
|
throws SAXException {
|
||||||
|
if (MODULE.equals(localName)) {
|
||||||
|
try {
|
||||||
|
handleMacro();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SAXException(e);
|
||||||
|
} finally {
|
||||||
|
//this shouldn't be necessary in the compressed odf files
|
||||||
|
resetMacroState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import org.apache.tika.exception.EncryptedDocumentException;
|
||||||
|
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For now, this only looks for any encryption-data elements.
|
||||||
|
* If found this will throw an EncryptedDocumentException wrapped
|
||||||
|
* in a SAXException.
|
||||||
|
*
|
||||||
|
* If desired, we can add to this to actually extract information
|
||||||
|
* necessary for decryption. Please open an issue or pull
|
||||||
|
* request for this added functionality.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class OpenDocumentManifestHandler extends ContentHandlerDecorator {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startElement(
|
||||||
|
String namespaceURI, String localName, String qName,
|
||||||
|
Attributes attrs) throws SAXException {
|
||||||
|
if (localName.equals("encryption-data")) {
|
||||||
|
throw new SAXException(new EncryptedDocumentException());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -16,12 +16,21 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.odf;
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.apache.tika.metadata.DublinCore;
|
import org.apache.tika.metadata.DublinCore;
|
||||||
import org.apache.tika.metadata.MSOffice;
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.metadata.Office;
|
import org.apache.tika.metadata.Office;
|
||||||
import org.apache.tika.metadata.OfficeOpenXMLCore;
|
import org.apache.tika.metadata.OfficeOpenXMLCore;
|
||||||
|
import org.apache.tika.metadata.OfficeOpenXMLExtended;
|
||||||
import org.apache.tika.metadata.PagedText;
|
import org.apache.tika.metadata.PagedText;
|
||||||
import org.apache.tika.metadata.Property;
|
import org.apache.tika.metadata.Property;
|
||||||
import org.apache.tika.metadata.TikaCoreProperties;
|
import org.apache.tika.metadata.TikaCoreProperties;
|
||||||
@ -36,11 +45,6 @@ import org.apache.tika.sax.xpath.CompositeMatcher;
|
|||||||
import org.apache.tika.sax.xpath.Matcher;
|
import org.apache.tika.sax.xpath.Matcher;
|
||||||
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
||||||
import org.apache.tika.sax.xpath.XPathParser;
|
import org.apache.tika.sax.xpath.XPathParser;
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parser for OpenDocument <code>meta.xml</code> files.
|
* Parser for OpenDocument <code>meta.xml</code> files.
|
||||||
@ -54,68 +58,54 @@ public class OpenDocumentMetaParser extends XMLParser {
|
|||||||
private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
|
private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
|
||||||
private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
|
private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
|
||||||
|
|
||||||
/**
|
private static ContentHandler getDublinCoreHandler(Metadata metadata, Property property,
|
||||||
* @see OfficeOpenXMLCore#SUBJECT
|
String element) {
|
||||||
* @deprecated use OfficeOpenXMLCore#SUBJECT
|
return new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, element, metadata, property);
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
|
|
||||||
Property.composite(Office.INITIAL_AUTHOR,
|
|
||||||
new Property[]{Property.externalText("initial-creator")});
|
|
||||||
|
|
||||||
private static ContentHandler getDublinCoreHandler(
|
|
||||||
Metadata metadata, Property property, String element) {
|
|
||||||
return new ElementMetadataHandler(
|
|
||||||
DublinCore.NAMESPACE_URI_DC, element,
|
|
||||||
metadata, property);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ContentHandler getMeta(
|
private static ContentHandler getMeta(ContentHandler ch, Metadata md, Property property,
|
||||||
ContentHandler ch, Metadata md, Property property, String element) {
|
String element) {
|
||||||
Matcher matcher = new CompositeMatcher(
|
Matcher matcher = new CompositeMatcher(META_XPATH.parse("//meta:" + element),
|
||||||
META_XPATH.parse("//meta:" + element),
|
META_XPATH.parse("//meta:" + element + "//text()"));
|
||||||
META_XPATH.parse("//meta:" + element + "//text()"));
|
|
||||||
ContentHandler branch =
|
ContentHandler branch =
|
||||||
new MatchingContentHandler(new MetadataHandler(md, property), matcher);
|
new MatchingContentHandler(new MetadataHandler(md, property), matcher);
|
||||||
return new TeeContentHandler(ch, branch);
|
return new TeeContentHandler(ch, branch);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ContentHandler getUserDefined(
|
private static ContentHandler getUserDefined(ContentHandler ch, Metadata md) {
|
||||||
ContentHandler ch, Metadata md) {
|
Matcher matcher = new CompositeMatcher(META_XPATH.parse("//meta:user-defined/@meta:name"),
|
||||||
Matcher matcher = new CompositeMatcher(
|
META_XPATH.parse("//meta:user-defined//text()"));
|
||||||
META_XPATH.parse("//meta:user-defined/@meta:name"),
|
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes
|
||||||
META_XPATH.parse("//meta:user-defined//text()"));
|
// custom:Info1=Text1
|
||||||
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
|
|
||||||
ContentHandler branch = new MatchingContentHandler(
|
ContentHandler branch = new MatchingContentHandler(
|
||||||
new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
|
new AttributeDependantMetadataHandler(md, "meta:name",
|
||||||
matcher);
|
Office.USER_DEFINED_METADATA_NAME_PREFIX), matcher);
|
||||||
return new TeeContentHandler(ch, branch);
|
return new TeeContentHandler(ch, branch);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
@Deprecated
|
||||||
private static ContentHandler getStatistic(
|
private static ContentHandler getStatistic(ContentHandler ch, Metadata md, String name,
|
||||||
ContentHandler ch, Metadata md, String name, String attribute) {
|
String attribute) {
|
||||||
Matcher matcher =
|
Matcher matcher = META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
||||||
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
|
||||||
ContentHandler branch = new MatchingContentHandler(
|
ContentHandler branch = new MatchingContentHandler(
|
||||||
new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
|
new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
|
||||||
return new TeeContentHandler(ch, branch);
|
return new TeeContentHandler(ch, branch);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ContentHandler getStatistic(
|
private static ContentHandler getStatistic(ContentHandler ch, Metadata md, Property property,
|
||||||
ContentHandler ch, Metadata md, Property property, String attribute) {
|
String attribute) {
|
||||||
Matcher matcher =
|
Matcher matcher = META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
||||||
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
|
||||||
ContentHandler branch = new MatchingContentHandler(
|
ContentHandler branch = new MatchingContentHandler(
|
||||||
new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
|
new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
|
||||||
return new TeeContentHandler(ch, branch);
|
return new TeeContentHandler(ch, branch);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
|
static ContentHandler getContentHandler(Metadata md, ParseContext context,
|
||||||
|
ContentHandler... handlers) {
|
||||||
// We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
|
// We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
|
||||||
// Process the Dublin Core Attributes
|
// Process the Dublin Core Attributes
|
||||||
ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
|
ContentHandler ch =
|
||||||
getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
|
new TeeContentHandler(getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
|
||||||
getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
|
getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
|
||||||
getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
|
getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
|
||||||
getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
|
getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
|
||||||
@ -129,19 +119,20 @@ public class OpenDocumentMetaParser extends XMLParser {
|
|||||||
// Process the OO Meta Attributes
|
// Process the OO Meta Attributes
|
||||||
ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
|
ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
|
||||||
// ODF uses dc:date for modified
|
// ODF uses dc:date for modified
|
||||||
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
|
ch = new TeeContentHandler(ch,
|
||||||
DublinCore.NAMESPACE_URI_DC, "date",
|
new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, "date", md,
|
||||||
md, TikaCoreProperties.MODIFIED));
|
TikaCoreProperties.MODIFIED));
|
||||||
|
|
||||||
// ODF uses dc:subject for description
|
// ODF uses dc:subject for description
|
||||||
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
|
ch = new TeeContentHandler(ch,
|
||||||
DublinCore.NAMESPACE_URI_DC, "subject",
|
new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, "subject", md,
|
||||||
md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
|
OfficeOpenXMLCore.SUBJECT));
|
||||||
ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
|
|
||||||
|
|
||||||
ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
|
ch = getMeta(ch, md, Office.KEYWORDS, "keyword");
|
||||||
|
|
||||||
|
ch = getMeta(ch, md, OfficeOpenXMLExtended.TOTAL_TIME, "editing-duration");
|
||||||
ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
|
ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
|
||||||
ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
|
ch = getMeta(ch, md, TikaCoreProperties.CREATOR, "initial-creator");
|
||||||
ch = getMeta(ch, md, Property.externalText("generator"), "generator");
|
ch = getMeta(ch, md, Property.externalText("generator"), "generator");
|
||||||
|
|
||||||
// Process the user defined Meta Attributes
|
// Process the user defined Meta Attributes
|
||||||
@ -157,43 +148,48 @@ public class OpenDocumentMetaParser extends XMLParser {
|
|||||||
ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
|
ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
|
||||||
ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
|
ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
|
||||||
|
|
||||||
// Legacy, Tika-1.0 style attributes
|
if (handlers != null && handlers.length > 0) {
|
||||||
// TODO Remove these in Tika 2.0
|
ContentHandler[] newHandlers = new ContentHandler[handlers.length + 1];
|
||||||
ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
|
newHandlers[0] = ch;
|
||||||
ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
|
System.arraycopy(handlers, 0, newHandlers, 1, handlers.length);
|
||||||
ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
|
ch = new TeeContentHandler(newHandlers);
|
||||||
ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
|
}
|
||||||
ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
|
|
||||||
ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
|
|
||||||
ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
|
|
||||||
|
|
||||||
// Legacy Statistics Attributes, replaced with real keys above
|
|
||||||
// TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
|
|
||||||
ch = getStatistic(ch, md, "nbPage", "page-count");
|
|
||||||
ch = getStatistic(ch, md, "nbPara", "paragraph-count");
|
|
||||||
ch = getStatistic(ch, md, "nbWord", "word-count");
|
|
||||||
ch = getStatistic(ch, md, "nbCharacter", "character-count");
|
|
||||||
ch = getStatistic(ch, md, "nbTab", "table-count");
|
|
||||||
ch = getStatistic(ch, md, "nbObject", "object-count");
|
|
||||||
ch = getStatistic(ch, md, "nbImg", "image-count");
|
|
||||||
|
|
||||||
// Normalise the rest
|
// Normalise the rest
|
||||||
ch = new NSNormalizerContentHandler(ch);
|
ch = new NSNormalizerContentHandler(ch);
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md,
|
||||||
|
ParseContext context) {
|
||||||
|
return getContentHandler(md, context, super.getContentHandler(ch, md, context));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void parse(
|
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
|
||||||
InputStream stream, ContentHandler handler,
|
ParseContext context) throws IOException, SAXException, TikaException {
|
||||||
Metadata metadata, ParseContext context)
|
|
||||||
throws IOException, SAXException, TikaException {
|
|
||||||
super.parse(stream, handler, metadata, context);
|
super.parse(stream, handler, metadata, context);
|
||||||
// Copy subject to description for OO2
|
// Copy subject to description for OO2
|
||||||
String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
|
String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
|
||||||
if (odfSubject != null && !odfSubject.equals("") &&
|
if (odfSubject != null && !odfSubject.equals("") &&
|
||||||
(metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
|
(metadata.get(TikaCoreProperties.DESCRIPTION) == null ||
|
||||||
|
metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
|
||||||
metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
|
metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
|
||||||
}
|
}
|
||||||
|
//reset the dc:subject to include both keywords and subject
|
||||||
|
//We can't relying on composite keys in the MatchingContentHandlers
|
||||||
|
//because those are "setting" not "adding" to the Metadata object
|
||||||
|
List<String> subjects = new ArrayList<>();
|
||||||
|
if (metadata.getValues(Office.KEYWORDS) != null) {
|
||||||
|
subjects.addAll(Arrays.asList(metadata.getValues(Office.KEYWORDS)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (metadata.getValues(OfficeOpenXMLCore.SUBJECT) != null) {
|
||||||
|
subjects.addAll(Arrays.asList(metadata.getValues(OfficeOpenXMLCore.SUBJECT)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subjects.size() > 0) {
|
||||||
|
metadata.set(TikaCoreProperties.SUBJECT, subjects.toArray(new String[0]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,37 +16,44 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.odf;
|
package org.apache.tika.parser.odf;
|
||||||
|
|
||||||
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Enumeration;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipFile;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.io.input.CloseShieldInputStream;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.config.Field;
|
||||||
|
import org.apache.tika.exception.EncryptedDocumentException;
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
import org.apache.tika.exception.WriteLimitReachedException;
|
||||||
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||||
import org.apache.tika.io.TikaInputStream;
|
import org.apache.tika.io.TikaInputStream;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.metadata.TikaCoreProperties;
|
import org.apache.tika.metadata.TikaCoreProperties;
|
||||||
import org.apache.tika.metadata.TikaMetadataKeys;
|
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.AbstractParser;
|
import org.apache.tika.parser.AbstractParser;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.sax.EmbeddedContentHandler;
|
import org.apache.tika.sax.EmbeddedContentHandler;
|
||||||
import org.apache.tika.sax.EndDocumentShieldingContentHandler;
|
import org.apache.tika.sax.EndDocumentShieldingContentHandler;
|
||||||
|
import org.apache.tika.sax.OfflineContentHandler;
|
||||||
import org.apache.tika.sax.XHTMLContentHandler;
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
import org.xml.sax.ContentHandler;
|
import org.apache.tika.utils.XMLReaderUtils;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Enumeration;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipFile;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* OpenOffice parser
|
* OpenOffice parser
|
||||||
@ -58,47 +65,48 @@ public class OpenDocumentParser extends AbstractParser {
|
|||||||
*/
|
*/
|
||||||
private static final long serialVersionUID = -6410276875438618287L;
|
private static final long serialVersionUID = -6410276875438618287L;
|
||||||
|
|
||||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
|
||||||
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
|
new HashSet<>(Arrays.asList(MediaType.application("vnd.sun.xml.writer"),
|
||||||
MediaType.application("vnd.sun.xml.writer"),
|
MediaType.application("vnd.oasis.opendocument.text"),
|
||||||
MediaType.application("vnd.oasis.opendocument.text"),
|
MediaType.application("vnd.oasis.opendocument.graphics"),
|
||||||
MediaType.application("vnd.oasis.opendocument.graphics"),
|
MediaType.application("vnd.oasis.opendocument.presentation"),
|
||||||
MediaType.application("vnd.oasis.opendocument.presentation"),
|
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
|
||||||
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
|
MediaType.application("vnd.oasis.opendocument.chart"),
|
||||||
MediaType.application("vnd.oasis.opendocument.chart"),
|
MediaType.application("vnd.oasis.opendocument.image"),
|
||||||
MediaType.application("vnd.oasis.opendocument.image"),
|
MediaType.application("vnd.oasis.opendocument.formula"),
|
||||||
MediaType.application("vnd.oasis.opendocument.formula"),
|
MediaType.application("vnd.oasis.opendocument.text-master"),
|
||||||
MediaType.application("vnd.oasis.opendocument.text-master"),
|
MediaType.application("vnd.oasis.opendocument.text-web"),
|
||||||
MediaType.application("vnd.oasis.opendocument.text-web"),
|
MediaType.application("vnd.oasis.opendocument.text-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.text-template"),
|
MediaType.application("vnd.oasis.opendocument.graphics-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.graphics-template"),
|
MediaType.application("vnd.oasis.opendocument.presentation-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.presentation-template"),
|
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
|
MediaType.application("vnd.oasis.opendocument.chart-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.chart-template"),
|
MediaType.application("vnd.oasis.opendocument.image-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.image-template"),
|
MediaType.application("vnd.oasis.opendocument.formula-template"),
|
||||||
MediaType.application("vnd.oasis.opendocument.formula-template"),
|
MediaType.application("x-vnd.oasis.opendocument.text"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.text"),
|
MediaType.application("x-vnd.oasis.opendocument.graphics"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.graphics"),
|
MediaType.application("x-vnd.oasis.opendocument.presentation"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.presentation"),
|
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
|
MediaType.application("x-vnd.oasis.opendocument.chart"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.chart"),
|
MediaType.application("x-vnd.oasis.opendocument.image"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.image"),
|
MediaType.application("x-vnd.oasis.opendocument.formula"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.formula"),
|
MediaType.application("x-vnd.oasis.opendocument.text-master"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.text-master"),
|
MediaType.application("x-vnd.oasis.opendocument.text-web"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.text-web"),
|
MediaType.application("x-vnd.oasis.opendocument.text-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.text-template"),
|
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
|
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
|
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
|
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
|
MediaType.application("x-vnd.oasis.opendocument.image-template"),
|
||||||
MediaType.application("x-vnd.oasis.opendocument.image-template"),
|
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
|
||||||
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
|
|
||||||
|
|
||||||
private static final String META_NAME = "meta.xml";
|
private static final String META_NAME = "meta.xml";
|
||||||
|
private static final String MANIFEST_NAME = "META-INF/manifest.xml";
|
||||||
|
|
||||||
private Parser meta = new OpenDocumentMetaParser();
|
private Parser meta = new OpenDocumentMetaParser();
|
||||||
|
|
||||||
private Parser content = new OpenDocumentContentParser();
|
private Parser content = new OpenDocumentContentParser();
|
||||||
|
private boolean extractMacros = false;
|
||||||
|
|
||||||
public Parser getMetaParser() {
|
public Parser getMetaParser() {
|
||||||
return meta;
|
return meta;
|
||||||
@ -120,10 +128,10 @@ public class OpenDocumentParser extends AbstractParser {
|
|||||||
return SUPPORTED_TYPES;
|
return SUPPORTED_TYPES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parse(
|
public void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
|
||||||
InputStream stream, ContentHandler baseHandler,
|
ParseContext context) throws IOException, SAXException, TikaException {
|
||||||
Metadata metadata, ParseContext context)
|
|
||||||
throws IOException, SAXException, TikaException {
|
EmbeddedDocumentUtil embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
|
||||||
|
|
||||||
// Open the Zip stream
|
// Open the Zip stream
|
||||||
// Use a File if we can, and an already open zip is even better
|
// Use a File if we can, and an already open zip is even better
|
||||||
@ -145,85 +153,129 @@ public class OpenDocumentParser extends AbstractParser {
|
|||||||
|
|
||||||
// Prepare to handle the content
|
// Prepare to handle the content
|
||||||
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
|
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
|
||||||
|
xhtml.startDocument();
|
||||||
// As we don't know which of the metadata or the content
|
// As we don't know which of the metadata or the content
|
||||||
// we'll hit first, catch the endDocument call initially
|
// we'll hit first, catch the endDocument call initially
|
||||||
EndDocumentShieldingContentHandler handler =
|
EndDocumentShieldingContentHandler handler = new EndDocumentShieldingContentHandler(xhtml);
|
||||||
new EndDocumentShieldingContentHandler(xhtml);
|
|
||||||
|
|
||||||
if (zipFile != null) {
|
try {
|
||||||
try {
|
if (zipFile != null) {
|
||||||
handleZipFile(zipFile, metadata, context, handler);
|
try {
|
||||||
} finally {
|
handleZipFile(zipFile, metadata, context, handler, embeddedDocumentUtil);
|
||||||
//Do we want to close silently == catch an exception here?
|
} finally {
|
||||||
zipFile.close();
|
//Do we want to close silently == catch an exception here?
|
||||||
|
zipFile.close();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
handleZipStream(zipStream, metadata, context, handler, embeddedDocumentUtil);
|
||||||
|
} finally {
|
||||||
|
//Do we want to close silently == catch an exception here?
|
||||||
|
zipStream.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} catch (SAXException e) {
|
||||||
try {
|
if (e.getCause() instanceof EncryptedDocumentException) {
|
||||||
handleZipStream(zipStream, metadata, context, handler);
|
throw (EncryptedDocumentException)e.getCause();
|
||||||
} finally {
|
|
||||||
//Do we want to close silently == catch an exception here?
|
|
||||||
zipStream.close();
|
|
||||||
}
|
}
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only now call the end document
|
// Only now call the end document
|
||||||
if (handler.getEndDocumentWasCalled()) {
|
if (handler.isEndDocumentWasCalled()) {
|
||||||
handler.reallyEndDocument();
|
handler.reallyEndDocument();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
|
@Field
|
||||||
ZipEntry entry = zipStream.getNextEntry();
|
public void setExtractMacros(boolean extractMacros) {
|
||||||
if (entry == null) {
|
this.extractMacros = extractMacros;
|
||||||
throw new IOException("No entries found in ZipInputStream");
|
|
||||||
}
|
|
||||||
do {
|
|
||||||
handleZipEntry(entry, zipStream, metadata, context, handler);
|
|
||||||
entry = zipStream.getNextEntry();
|
|
||||||
} while (entry != null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleZipFile(ZipFile zipFile, Metadata metadata,
|
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context,
|
||||||
ParseContext context, EndDocumentShieldingContentHandler handler)
|
EndDocumentShieldingContentHandler handler,
|
||||||
throws IOException, TikaException, SAXException {
|
EmbeddedDocumentUtil embeddedDocumentUtil)
|
||||||
|
throws IOException, TikaException, SAXException {
|
||||||
|
ZipEntry entry = zipStream.getNextEntry();
|
||||||
|
if (entry == null) {
|
||||||
|
throw new IOException("No entries found in ZipInputStream");
|
||||||
|
}
|
||||||
|
List<SAXException> exceptions = new ArrayList<>();
|
||||||
|
do {
|
||||||
|
try {
|
||||||
|
handleZipEntry(entry, zipStream, metadata, context, handler,
|
||||||
|
embeddedDocumentUtil);
|
||||||
|
} catch (SAXException e) {
|
||||||
|
WriteLimitReachedException.throwIfWriteLimitReached(e);
|
||||||
|
if (e.getCause() instanceof EncryptedDocumentException) {
|
||||||
|
throw (EncryptedDocumentException)e.getCause();
|
||||||
|
} else {
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entry = zipStream.getNextEntry();
|
||||||
|
} while (entry != null);
|
||||||
|
|
||||||
|
if (exceptions.size() > 0) {
|
||||||
|
throw exceptions.get(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleZipFile(ZipFile zipFile, Metadata metadata, ParseContext context,
|
||||||
|
EndDocumentShieldingContentHandler handler,
|
||||||
|
EmbeddedDocumentUtil embeddedDocumentUtil)
|
||||||
|
throws IOException, TikaException, SAXException {
|
||||||
// If we can, process the metadata first, then the
|
// If we can, process the metadata first, then the
|
||||||
// rest of the file afterwards (TIKA-1353)
|
// rest of the file afterwards (TIKA-1353)
|
||||||
// Only possible to guarantee that when opened from a file not a stream
|
// Only possible to guarantee that when opened from a file not a stream
|
||||||
|
|
||||||
ZipEntry entry = zipFile.getEntry(META_NAME);
|
ZipEntry entry = zipFile.getEntry(MANIFEST_NAME);
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
|
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
|
||||||
|
handler, embeddedDocumentUtil);
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = zipFile.getEntry(META_NAME);
|
||||||
|
if (entry != null) {
|
||||||
|
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
|
||||||
|
handler, embeddedDocumentUtil);
|
||||||
}
|
}
|
||||||
|
|
||||||
Enumeration<? extends ZipEntry> entries = zipFile.entries();
|
Enumeration<? extends ZipEntry> entries = zipFile.entries();
|
||||||
while (entries.hasMoreElements()) {
|
while (entries.hasMoreElements()) {
|
||||||
entry = entries.nextElement();
|
entry = entries.nextElement();
|
||||||
if (!META_NAME.equals(entry.getName())) {
|
if (!META_NAME.equals(entry.getName())) {
|
||||||
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
|
handleZipEntry(entry, zipFile.getInputStream(entry), metadata,
|
||||||
|
context, handler, embeddedDocumentUtil);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
|
|
||||||
ParseContext context, EndDocumentShieldingContentHandler handler)
|
|
||||||
throws IOException, SAXException, TikaException {
|
|
||||||
if (entry == null) return;
|
|
||||||
|
|
||||||
if (entry.getName().equals("mimetype")) {
|
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
|
||||||
|
ParseContext context, ContentHandler handler,
|
||||||
|
EmbeddedDocumentUtil embeddedDocumentUtil)
|
||||||
|
throws IOException, SAXException, TikaException {
|
||||||
|
|
||||||
|
|
||||||
|
if (entry.getName().contains("manifest.xml")) {
|
||||||
|
checkForEncryption(zip, context);
|
||||||
|
} else if (entry.getName().equals("mimetype")) {
|
||||||
String type = IOUtils.toString(zip, UTF_8);
|
String type = IOUtils.toString(zip, UTF_8);
|
||||||
metadata.set(Metadata.CONTENT_TYPE, type);
|
metadata.set(Metadata.CONTENT_TYPE, type);
|
||||||
} else if (entry.getName().equals(META_NAME)) {
|
} else if (entry.getName().equals(META_NAME)) {
|
||||||
meta.parse(zip, new DefaultHandler(), metadata, context);
|
meta.parse(zip, new DefaultHandler(), metadata, context);
|
||||||
} else if (entry.getName().endsWith("content.xml")) {
|
} else if (entry.getName().endsWith("content.xml")) {
|
||||||
if (content instanceof OpenDocumentContentParser) {
|
if (content instanceof OpenDocumentContentParser) {
|
||||||
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
|
((OpenDocumentContentParser) content)
|
||||||
|
.parseInternal(zip, handler, metadata, context);
|
||||||
} else {
|
} else {
|
||||||
// Foreign content parser was set:
|
// Foreign content parser was set:
|
||||||
content.parse(zip, handler, metadata, context);
|
content.parse(zip, handler, metadata, context);
|
||||||
}
|
}
|
||||||
} else if (entry.getName().endsWith("styles.xml")) {
|
} else if (entry.getName().endsWith("styles.xml")) {
|
||||||
if (content instanceof OpenDocumentContentParser) {
|
if (content instanceof OpenDocumentContentParser) {
|
||||||
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
|
((OpenDocumentContentParser) content)
|
||||||
|
.parseInternal(zip, handler, metadata, context);
|
||||||
} else {
|
} else {
|
||||||
// Foreign content parser was set:
|
// Foreign content parser was set:
|
||||||
content.parse(zip, handler, metadata, context);
|
content.parse(zip, handler, metadata, context);
|
||||||
@ -231,26 +283,87 @@ public class OpenDocumentParser extends AbstractParser {
|
|||||||
} else {
|
} else {
|
||||||
String embeddedName = entry.getName();
|
String embeddedName = entry.getName();
|
||||||
//scrape everything under Thumbnails/ and Pictures/
|
//scrape everything under Thumbnails/ and Pictures/
|
||||||
if (embeddedName.contains("Thumbnails/") ||
|
if (embeddedName.contains("Thumbnails/") || embeddedName.contains("Pictures/")) {
|
||||||
embeddedName.contains("Pictures/")) {
|
|
||||||
EmbeddedDocumentExtractor embeddedDocumentExtractor =
|
|
||||||
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
|
|
||||||
Metadata embeddedMetadata = new Metadata();
|
Metadata embeddedMetadata = new Metadata();
|
||||||
embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName());
|
TikaInputStream stream = TikaInputStream.get(zip);
|
||||||
/* if (embeddedName.startsWith("Thumbnails/")) {
|
|
||||||
|
embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, entry.getName());
|
||||||
|
if (embeddedName.startsWith("Thumbnails/")) {
|
||||||
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
||||||
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
|
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL.toString());
|
||||||
}*/
|
}
|
||||||
|
|
||||||
if (embeddedName.contains("Pictures/")) {
|
if (embeddedName.contains("Pictures/")) {
|
||||||
embeddedMetadata.set(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE,
|
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
||||||
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
|
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
|
||||||
|
|
||||||
|
MediaType embeddedMimeType =
|
||||||
|
embeddedDocumentUtil.getDetector().detect(stream, embeddedMetadata);
|
||||||
|
if (embeddedMimeType != null) {
|
||||||
|
embeddedMetadata.set(Metadata.CONTENT_TYPE, embeddedMimeType.toString());
|
||||||
|
}
|
||||||
|
stream.reset();
|
||||||
}
|
}
|
||||||
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
|
|
||||||
embeddedDocumentExtractor.parseEmbedded(zip,
|
if (embeddedDocumentUtil.shouldParseEmbedded(embeddedMetadata)) {
|
||||||
new EmbeddedContentHandler(handler), embeddedMetadata, false);
|
embeddedDocumentUtil.parseEmbedded(stream, new EmbeddedContentHandler(handler),
|
||||||
|
embeddedMetadata, false);
|
||||||
}
|
}
|
||||||
|
} else if (extractMacros && embeddedName.contains("Basic/")) {
|
||||||
|
//process all files under Basic/; let maybeHandleMacro figure
|
||||||
|
//out if it is a macro or not
|
||||||
|
maybeHandleMacro(zip, embeddedName, handler, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void maybeHandleMacro(InputStream is, String embeddedName, ContentHandler handler,
|
||||||
|
ParseContext context)
|
||||||
|
throws TikaException, IOException, SAXException {
|
||||||
|
//should probably run XMLRootExtractor on the inputstream
|
||||||
|
//or read the macro manifest for the names of the macros
|
||||||
|
//rather than relying on the script file name
|
||||||
|
if (ignoreScriptFile(embeddedName)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Metadata embeddedMetadata = new Metadata();
|
||||||
|
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
||||||
|
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
|
||||||
|
handler = new OpenDocumentMacroHandler(handler, context);
|
||||||
|
XMLReaderUtils.parseSAX(new CloseShieldInputStream(is),
|
||||||
|
new OfflineContentHandler(new EmbeddedContentHandler(handler)), context);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkForEncryption(InputStream stream, ParseContext context)
|
||||||
|
throws SAXException, TikaException, IOException {
|
||||||
|
try {
|
||||||
|
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream),
|
||||||
|
new OfflineContentHandler(new EmbeddedContentHandler(
|
||||||
|
new OpenDocumentManifestHandler())), context);
|
||||||
|
} catch (SAXException e) {
|
||||||
|
if (e.getCause() != null
|
||||||
|
&& e.getCause() instanceof EncryptedDocumentException) {
|
||||||
|
throw (EncryptedDocumentException)e.getCause();
|
||||||
|
}
|
||||||
|
//otherwise...swallow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean ignoreScriptFile(String embeddedName) {
|
||||||
|
if (embeddedName.contains("Basic/")) {
|
||||||
|
if (embeddedName.contains("script-lb.xml")) {
|
||||||
|
return true;
|
||||||
|
} else if (embeddedName.contains("script-lc.xml")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//shouldn't ever get here, but if it isn't under Basic/, ignore it
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,13 +16,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.apache.tika.metadata.Property;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.Property;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for SAX handlers that map SAX events into document metadata.
|
* Base class for SAX handlers that map SAX events into document metadata.
|
||||||
*
|
*
|
||||||
@ -39,11 +40,12 @@ class AbstractMetadataHandler extends DefaultHandler {
|
|||||||
this.property = null;
|
this.property = null;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected AbstractMetadataHandler(Metadata metadata, Property property) {
|
protected AbstractMetadataHandler(Metadata metadata, Property property) {
|
||||||
this.metadata = metadata;
|
this.metadata = metadata;
|
||||||
this.property = property;
|
this.property = property;
|
||||||
this.name = property.getName();
|
this.name = property.getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds the given metadata value. The value is ignored if it is
|
* Adds the given metadata value. The value is ignored if it is
|
||||||
@ -59,9 +61,9 @@ class AbstractMetadataHandler extends DefaultHandler {
|
|||||||
List<String> previous = Arrays.asList(metadata.getValues(name));
|
List<String> previous = Arrays.asList(metadata.getValues(name));
|
||||||
if (!previous.contains(value)) {
|
if (!previous.contains(value)) {
|
||||||
if (property != null) {
|
if (property != null) {
|
||||||
metadata.add(property, value);
|
metadata.add(property, value);
|
||||||
} else {
|
} else {
|
||||||
metadata.add(name, value);
|
metadata.add(name, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -69,23 +71,23 @@ class AbstractMetadataHandler extends DefaultHandler {
|
|||||||
String previous = metadata.get(name);
|
String previous = metadata.get(name);
|
||||||
if (previous != null && previous.length() > 0) {
|
if (previous != null && previous.length() > 0) {
|
||||||
if (!previous.equals(value)) {
|
if (!previous.equals(value)) {
|
||||||
if (property != null) {
|
if (property != null) {
|
||||||
if (property.isMultiValuePermitted()) {
|
if (property.isMultiValuePermitted()) {
|
||||||
metadata.add(property, value);
|
metadata.add(property, value);
|
||||||
} else {
|
} else {
|
||||||
// Replace the existing value if isMultiValuePermitted is false
|
// Replace the existing value if isMultiValuePermitted is false
|
||||||
metadata.set(property, value);
|
metadata.set(property, value);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
metadata.add(name, value);
|
metadata.add(name, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (property != null) {
|
if (property != null) {
|
||||||
metadata.set(property, value);
|
metadata.set(property, value);
|
||||||
} else {
|
} else {
|
||||||
metadata.set(name, value);
|
metadata.set(name, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,15 +16,16 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This adds a Metadata entry for a given node.
|
* This adds a Metadata entry for a given node.
|
||||||
* The textual content of the node is used as the
|
* The textual content of the node is used as the
|
||||||
* value, and the Metadata name is taken from
|
* value, and the Metadata name is taken from
|
||||||
* an attribute, with a prefix if required.
|
* an attribute, with a prefix if required.
|
||||||
*/
|
*/
|
||||||
public class AttributeDependantMetadataHandler extends DefaultHandler {
|
public class AttributeDependantMetadataHandler extends DefaultHandler {
|
||||||
|
|
||||||
@ -32,20 +33,20 @@ public class AttributeDependantMetadataHandler extends DefaultHandler {
|
|||||||
|
|
||||||
private final String nameHoldingAttribute;
|
private final String nameHoldingAttribute;
|
||||||
private final String namePrefix;
|
private final String namePrefix;
|
||||||
|
private final StringBuilder buffer = new StringBuilder();
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
private final StringBuilder buffer = new StringBuilder();
|
public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute,
|
||||||
|
String namePrefix) {
|
||||||
public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute, String namePrefix) {
|
|
||||||
this.metadata = metadata;
|
this.metadata = metadata;
|
||||||
this.nameHoldingAttribute = nameHoldingAttribute;
|
this.nameHoldingAttribute = nameHoldingAttribute;
|
||||||
this.namePrefix = namePrefix;
|
this.namePrefix = namePrefix;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addMetadata(String value) {
|
public void addMetadata(String value) {
|
||||||
if(name == null || name.length() == 0) {
|
if (name == null || name.length() == 0) {
|
||||||
// We didn't find the attribute which holds the name
|
// We didn't find the attribute which holds the name
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (value.length() > 0) {
|
if (value.length() > 0) {
|
||||||
String previous = metadata.get(name);
|
String previous = metadata.get(name);
|
||||||
@ -61,20 +62,19 @@ public class AttributeDependantMetadataHandler extends DefaultHandler {
|
|||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void startElement(
|
public void startElement(String uri, String localName, String name, Attributes attributes) {
|
||||||
String uri, String localName, String name, Attributes attributes) {
|
|
||||||
String rawName = attributes.getValue(nameHoldingAttribute);
|
String rawName = attributes.getValue(nameHoldingAttribute);
|
||||||
if (rawName != null) {
|
if (rawName != null) {
|
||||||
if (namePrefix == null) {
|
if (namePrefix == null) {
|
||||||
this.name = rawName;
|
this.name = rawName;
|
||||||
} else {
|
} else {
|
||||||
this.name = namePrefix + rawName;
|
this.name = namePrefix + rawName;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All other attributes are ignored
|
// All other attributes are ignored
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void characters(char[] ch, int start, int length) {
|
public void characters(char[] ch, int start, int length) {
|
||||||
buffer.append(ch, start, length);
|
buffer.append(ch, start, length);
|
||||||
}
|
}
|
||||||
|
@ -16,11 +16,12 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.apache.tika.metadata.Property;
|
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.Property;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SAX event handler that maps the contents of an XML attribute into
|
* SAX event handler that maps the contents of an XML attribute into
|
||||||
* a metadata field.
|
* a metadata field.
|
||||||
@ -33,26 +34,25 @@ public class AttributeMetadataHandler extends AbstractMetadataHandler {
|
|||||||
|
|
||||||
private final String localName;
|
private final String localName;
|
||||||
|
|
||||||
public AttributeMetadataHandler(
|
public AttributeMetadataHandler(String uri, String localName, Metadata metadata, String name) {
|
||||||
String uri, String localName, Metadata metadata, String name) {
|
|
||||||
super(metadata, name);
|
super(metadata, name);
|
||||||
this.uri = uri;
|
this.uri = uri;
|
||||||
this.localName = localName;
|
this.localName = localName;
|
||||||
}
|
}
|
||||||
public AttributeMetadataHandler(
|
|
||||||
String uri, String localName, Metadata metadata, Property property) {
|
public AttributeMetadataHandler(String uri, String localName, Metadata metadata,
|
||||||
super(metadata, property);
|
Property property) {
|
||||||
this.uri = uri;
|
super(metadata, property);
|
||||||
this.localName = localName;
|
this.uri = uri;
|
||||||
}
|
this.localName = localName;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(
|
public void startElement(String uri, String localName, String qName, Attributes attributes)
|
||||||
String uri, String localName, String qName, Attributes attributes)
|
throws SAXException {
|
||||||
throws SAXException {
|
|
||||||
for (int i = 0; i < attributes.getLength(); i++) {
|
for (int i = 0; i < attributes.getLength(); i++) {
|
||||||
if (attributes.getURI(i).equals(this.uri)
|
if (attributes.getURI(i).equals(this.uri) &&
|
||||||
&& attributes.getLocalName(i).equals(this.localName)) {
|
attributes.getLocalName(i).equals(this.localName)) {
|
||||||
addMetadata(attributes.getValue(i).trim());
|
addMetadata(attributes.getValue(i).trim());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,45 +16,45 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
|
||||||
import org.apache.tika.metadata.DublinCore;
|
import org.apache.tika.metadata.DublinCore;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.metadata.Property;
|
import org.apache.tika.metadata.Property;
|
||||||
import org.apache.tika.metadata.TikaCoreProperties;
|
import org.apache.tika.metadata.TikaCoreProperties;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.sax.TeeContentHandler;
|
import org.apache.tika.sax.TeeContentHandler;
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dublin Core metadata parser
|
* Dublin Core metadata parser
|
||||||
*/
|
*/
|
||||||
public class DcXMLParser extends XMLParser {
|
public class DcXMLParser extends XMLParser {
|
||||||
|
|
||||||
/** Serial version UID */
|
/**
|
||||||
|
* Serial version UID
|
||||||
|
*/
|
||||||
private static final long serialVersionUID = 4905318835463880819L;
|
private static final long serialVersionUID = 4905318835463880819L;
|
||||||
|
|
||||||
private static ContentHandler getDublinCoreHandler(
|
private static ContentHandler getDublinCoreHandler(Metadata metadata, Property property,
|
||||||
Metadata metadata, Property property, String element) {
|
String element) {
|
||||||
return new ElementMetadataHandler(
|
return new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, element, metadata, property);
|
||||||
DublinCore.NAMESPACE_URI_DC, element,
|
|
||||||
metadata, property);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ContentHandler getContentHandler(
|
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
|
||||||
ContentHandler handler, Metadata metadata, ParseContext context) {
|
ParseContext context) {
|
||||||
return new TeeContentHandler(
|
return new TeeContentHandler(super.getContentHandler(handler, metadata, context),
|
||||||
super.getContentHandler(handler, metadata, context),
|
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.SUBJECT, "subject"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
|
getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
|
||||||
getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,13 +16,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
import java.util.Arrays;
|
||||||
import org.apache.tika.metadata.Property;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.Property;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SAX event handler that maps the contents of an XML element into
|
* SAX event handler that maps the contents of an XML element into
|
||||||
@ -44,21 +45,17 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
private final Metadata metadata;
|
private final Metadata metadata;
|
||||||
|
|
||||||
private final String name;
|
private final String name;
|
||||||
private Property targetProperty;
|
|
||||||
|
|
||||||
private final boolean allowDuplicateValues;
|
private final boolean allowDuplicateValues;
|
||||||
private final boolean allowEmptyValues;
|
private final boolean allowEmptyValues;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The buffer used to capture characters when inside a bag li element.
|
* The buffer used to capture characters when inside a bag li element.
|
||||||
*/
|
*/
|
||||||
private final StringBuilder bufferBagged = new StringBuilder();
|
private final StringBuilder bufferBagged = new StringBuilder();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The buffer used to capture characters inside standard elements.
|
* The buffer used to capture characters inside standard elements.
|
||||||
*/
|
*/
|
||||||
private final StringBuilder bufferBagless = new StringBuilder();
|
private final StringBuilder bufferBagless = new StringBuilder();
|
||||||
|
private Property targetProperty;
|
||||||
/**
|
/**
|
||||||
* Whether or not the value was found in a standard element structure or inside a bag.
|
* Whether or not the value was found in a standard element structure or inside a bag.
|
||||||
*/
|
*/
|
||||||
@ -70,13 +67,12 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
/**
|
/**
|
||||||
* Constructor for string metadata keys.
|
* Constructor for string metadata keys.
|
||||||
*
|
*
|
||||||
* @param uri the uri of the namespace of the element
|
* @param uri the uri of the namespace of the element
|
||||||
* @param localName the local name of the element
|
* @param localName the local name of the element
|
||||||
* @param metadata the Tika metadata object to populate
|
* @param metadata the Tika metadata object to populate
|
||||||
* @param name the Tika metadata field key
|
* @param name the Tika metadata field key
|
||||||
*/
|
*/
|
||||||
public ElementMetadataHandler(
|
public ElementMetadataHandler(String uri, String localName, Metadata metadata, String name) {
|
||||||
String uri, String localName, Metadata metadata, String name) {
|
|
||||||
super(metadata, name);
|
super(metadata, name);
|
||||||
this.uri = uri;
|
this.uri = uri;
|
||||||
this.localName = localName;
|
this.localName = localName;
|
||||||
@ -91,15 +87,15 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
* Constructor for string metadata keys which allows change of behavior
|
* Constructor for string metadata keys which allows change of behavior
|
||||||
* for duplicate and empty entry values.
|
* for duplicate and empty entry values.
|
||||||
*
|
*
|
||||||
* @param uri the uri of the namespace of the element
|
* @param uri the uri of the namespace of the element
|
||||||
* @param localName the local name of the element
|
* @param localName the local name of the element
|
||||||
* @param metadata the Tika metadata object to populate
|
* @param metadata the Tika metadata object to populate
|
||||||
* @param name the Tika metadata field key
|
* @param name the Tika metadata field key
|
||||||
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
||||||
* @param allowEmptyValues add empty values to the Tika metadata
|
* @param allowEmptyValues add empty values to the Tika metadata
|
||||||
*/
|
*/
|
||||||
public ElementMetadataHandler(
|
public ElementMetadataHandler(String uri, String localName, Metadata metadata, String name,
|
||||||
String uri, String localName, Metadata metadata, String name, boolean allowDuplicateValues, boolean allowEmptyValues) {
|
boolean allowDuplicateValues, boolean allowEmptyValues) {
|
||||||
super(metadata, name);
|
super(metadata, name);
|
||||||
this.uri = uri;
|
this.uri = uri;
|
||||||
this.localName = localName;
|
this.localName = localName;
|
||||||
@ -113,13 +109,13 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
/**
|
/**
|
||||||
* Constructor for Property metadata keys.
|
* Constructor for Property metadata keys.
|
||||||
*
|
*
|
||||||
* @param uri the uri of the namespace of the element
|
* @param uri the uri of the namespace of the element
|
||||||
* @param localName the local name of the element
|
* @param localName the local name of the element
|
||||||
* @param metadata the Tika metadata object to populate
|
* @param metadata the Tika metadata object to populate
|
||||||
* @param targetProperty the Tika metadata Property key
|
* @param targetProperty the Tika metadata Property key
|
||||||
*/
|
*/
|
||||||
public ElementMetadataHandler(
|
public ElementMetadataHandler(String uri, String localName, Metadata metadata,
|
||||||
String uri, String localName, Metadata metadata, Property targetProperty) {
|
Property targetProperty) {
|
||||||
super(metadata, targetProperty);
|
super(metadata, targetProperty);
|
||||||
this.uri = uri;
|
this.uri = uri;
|
||||||
this.localName = localName;
|
this.localName = localName;
|
||||||
@ -135,15 +131,16 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
* Constructor for Property metadata keys which allows change of behavior
|
* Constructor for Property metadata keys which allows change of behavior
|
||||||
* for duplicate and empty entry values.
|
* for duplicate and empty entry values.
|
||||||
*
|
*
|
||||||
* @param uri the uri of the namespace of the element
|
* @param uri the uri of the namespace of the element
|
||||||
* @param localName the local name of the element
|
* @param localName the local name of the element
|
||||||
* @param metadata the Tika metadata object to populate
|
* @param metadata the Tika metadata object to populate
|
||||||
* @param targetProperty the Tika metadata Property key
|
* @param targetProperty the Tika metadata Property key
|
||||||
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
||||||
* @param allowEmptyValues add empty values to the Tika metadata
|
* @param allowEmptyValues add empty values to the Tika metadata
|
||||||
*/
|
*/
|
||||||
public ElementMetadataHandler(
|
public ElementMetadataHandler(String uri, String localName, Metadata metadata,
|
||||||
String uri, String localName, Metadata metadata, Property targetProperty, boolean allowDuplicateValues, boolean allowEmptyValues) {
|
Property targetProperty, boolean allowDuplicateValues,
|
||||||
|
boolean allowEmptyValues) {
|
||||||
super(metadata, targetProperty);
|
super(metadata, targetProperty);
|
||||||
this.uri = uri;
|
this.uri = uri;
|
||||||
this.localName = localName;
|
this.localName = localName;
|
||||||
@ -162,16 +159,13 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
protected boolean isMatchingElement(String uri, String localName) {
|
protected boolean isMatchingElement(String uri, String localName) {
|
||||||
// match if we're inside the parent element or within some bag element
|
// match if we're inside the parent element or within some bag element
|
||||||
return (uri.equals(this.uri) && localName.equals(this.localName)) ||
|
return (uri.equals(this.uri) && localName.equals(this.localName)) ||
|
||||||
(parentMatchLevel > 0 &&
|
(parentMatchLevel > 0 &&
|
||||||
((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) ||
|
((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) ||
|
||||||
(uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI))
|
(uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI))));
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(
|
public void startElement(String uri, String localName, String name, Attributes attributes) {
|
||||||
String uri, String localName, String name, Attributes attributes) {
|
|
||||||
if (isMatchingElement(uri, localName)) {
|
if (isMatchingElement(uri, localName)) {
|
||||||
matchLevel++;
|
matchLevel++;
|
||||||
}
|
}
|
||||||
@ -230,7 +224,8 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
|
|||||||
value = "";
|
value = "";
|
||||||
}
|
}
|
||||||
String[] previous = metadata.getValues(name);
|
String[] previous = metadata.getValues(name);
|
||||||
if (previous == null || !Arrays.asList(previous).contains(value) || allowDuplicateValues) {
|
if (previous == null || !Arrays.asList(previous).contains(value) ||
|
||||||
|
allowDuplicateValues) {
|
||||||
metadata.add(targetProperty, value);
|
metadata.add(targetProperty, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,64 +16,68 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
|
||||||
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
|
||||||
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.apache.tika.metadata.TikaMetadataKeys;
|
|
||||||
import org.apache.tika.mime.MediaType;
|
|
||||||
import org.apache.tika.parser.ParseContext;
|
|
||||||
import org.xml.sax.Attributes;
|
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.codec.binary.Base64;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
||||||
|
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.TikaCoreProperties;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
|
||||||
public class FictionBookParser extends XMLParser {
|
public class FictionBookParser extends XMLParser {
|
||||||
private static final long serialVersionUID = 4195954546491524374L;
|
private static final long serialVersionUID = 4195954546491524374L;
|
||||||
|
|
||||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
private static final Set<MediaType> SUPPORTED_TYPES =
|
||||||
Collections.singleton(MediaType.application("x-fictionbook+xml"));
|
Collections.singleton(MediaType.application("x-fictionbook+xml"));
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||||
return SUPPORTED_TYPES;
|
return SUPPORTED_TYPES;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata, ParseContext context) {
|
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
|
||||||
|
ParseContext context) {
|
||||||
return new BinaryElementsDataHandler(
|
return new BinaryElementsDataHandler(
|
||||||
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler);
|
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class BinaryElementsDataHandler extends DefaultHandler {
|
private static class BinaryElementsDataHandler extends DefaultHandler {
|
||||||
private static final String ELEMENT_BINARY = "binary";
|
private static final String ELEMENT_BINARY = "binary";
|
||||||
|
|
||||||
private boolean binaryMode = false;
|
|
||||||
private static final String ATTRIBUTE_ID = "id";
|
private static final String ATTRIBUTE_ID = "id";
|
||||||
|
private static final String ATTRIBUTE_CONTENT_TYPE = "content-type";
|
||||||
private final EmbeddedDocumentExtractor partExtractor;
|
private final EmbeddedDocumentExtractor partExtractor;
|
||||||
private final ContentHandler handler;
|
private final ContentHandler handler;
|
||||||
private final StringBuilder binaryData = new StringBuilder();
|
private final StringBuilder binaryData = new StringBuilder();
|
||||||
|
private boolean binaryMode = false;
|
||||||
private Metadata metadata;
|
private Metadata metadata;
|
||||||
private static final String ATTRIBUTE_CONTENT_TYPE = "content-type";
|
|
||||||
|
|
||||||
private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor, ContentHandler handler) {
|
private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor,
|
||||||
|
ContentHandler handler) {
|
||||||
this.partExtractor = partExtractor;
|
this.partExtractor = partExtractor;
|
||||||
this.handler = handler;
|
this.handler = handler;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
|
public void startElement(String uri, String localName, String qName, Attributes attributes)
|
||||||
|
throws SAXException {
|
||||||
binaryMode = ELEMENT_BINARY.equals(localName);
|
binaryMode = ELEMENT_BINARY.equals(localName);
|
||||||
if (binaryMode) {
|
if (binaryMode) {
|
||||||
binaryData.setLength(0);
|
binaryData.setLength(0);
|
||||||
metadata = new Metadata();
|
metadata = new Metadata();
|
||||||
|
|
||||||
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
|
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY,
|
||||||
|
attributes.getValue(ATTRIBUTE_ID));
|
||||||
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
|
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -83,11 +87,8 @@ public class FictionBookParser extends XMLParser {
|
|||||||
if (binaryMode) {
|
if (binaryMode) {
|
||||||
try {
|
try {
|
||||||
partExtractor.parseEmbedded(
|
partExtractor.parseEmbedded(
|
||||||
new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())),
|
new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())),
|
||||||
handler,
|
handler, metadata, true);
|
||||||
metadata,
|
|
||||||
true
|
|
||||||
);
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new SAXException("IOException in parseEmbedded", e);
|
throw new SAXException("IOException in parseEmbedded", e);
|
||||||
}
|
}
|
||||||
|
@ -16,19 +16,20 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.apache.tika.metadata.Property;
|
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.Property;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This adds Metadata entries with a specified name for
|
* This adds Metadata entries with a specified name for
|
||||||
* the textual content of a node (if present), and
|
* the textual content of a node (if present), and
|
||||||
* all attribute values passed through the matcher
|
* all attribute values passed through the matcher
|
||||||
* (but not their names).
|
* (but not their names).
|
||||||
*
|
*
|
||||||
* @deprecated Use the {@link AttributeMetadataHandler} and
|
* @deprecated Use the {@link AttributeMetadataHandler} and
|
||||||
* {@link ElementMetadataHandler} classes instead
|
* {@link ElementMetadataHandler} classes instead
|
||||||
*/
|
*/
|
||||||
public class MetadataHandler extends DefaultHandler {
|
public class MetadataHandler extends DefaultHandler {
|
||||||
|
|
||||||
@ -44,11 +45,12 @@ public class MetadataHandler extends DefaultHandler {
|
|||||||
this.property = null;
|
this.property = null;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MetadataHandler(Metadata metadata, Property property) {
|
public MetadataHandler(Metadata metadata, Property property) {
|
||||||
this.metadata = metadata;
|
this.metadata = metadata;
|
||||||
this.property = property;
|
this.property = property;
|
||||||
this.name = property.getName();
|
this.name = property.getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addMetadata(String value) {
|
public void addMetadata(String value) {
|
||||||
if (value.length() > 0) {
|
if (value.length() > 0) {
|
||||||
@ -56,11 +58,11 @@ public class MetadataHandler extends DefaultHandler {
|
|||||||
if (previous != null && previous.length() > 0) {
|
if (previous != null && previous.length() > 0) {
|
||||||
value = previous + ", " + value;
|
value = previous + ", " + value;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.property != null) {
|
if (this.property != null) {
|
||||||
metadata.set(property, value);
|
metadata.set(property, value);
|
||||||
} else {
|
} else {
|
||||||
metadata.set(name, value);
|
metadata.set(name, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -70,14 +72,13 @@ public class MetadataHandler extends DefaultHandler {
|
|||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void startElement(
|
public void startElement(String uri, String localName, String name, Attributes attributes) {
|
||||||
String uri, String localName, String name, Attributes attributes) {
|
|
||||||
for (int i = 0; i < attributes.getLength(); i++) {
|
for (int i = 0; i < attributes.getLength(); i++) {
|
||||||
addMetadata(attributes.getValue(i));
|
addMetadata(attributes.getValue(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void characters(char[] ch, int start, int length) {
|
public void characters(char[] ch, int start, int length) {
|
||||||
buffer.append(ch, start, length);
|
buffer.append(ch, start, length);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.sax.TextAndAttributeContentHandler;
|
||||||
|
|
||||||
|
public class TextAndAttributeXMLParser extends XMLParser {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 7796914007312429473L;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
|
||||||
|
ParseContext context) {
|
||||||
|
return new TextAndAttributeContentHandler(handler, true);
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,17 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.tika.parser.xml;
|
package org.apache.tika.parser.xml;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.io.input.CloseShieldInputStream;
|
import org.apache.commons.io.input.CloseShieldInputStream;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
@ -28,52 +38,41 @@ import org.apache.tika.sax.TaggedContentHandler;
|
|||||||
import org.apache.tika.sax.TextContentHandler;
|
import org.apache.tika.sax.TextContentHandler;
|
||||||
import org.apache.tika.sax.XHTMLContentHandler;
|
import org.apache.tika.sax.XHTMLContentHandler;
|
||||||
import org.apache.tika.utils.XMLReaderUtils;
|
import org.apache.tika.utils.XMLReaderUtils;
|
||||||
import org.xml.sax.ContentHandler;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XML parser.
|
* XML parser.
|
||||||
*/
|
*/
|
||||||
public class XMLParser extends AbstractParser {
|
public class XMLParser extends AbstractParser {
|
||||||
|
|
||||||
/** Serial version UID */
|
/**
|
||||||
|
* Serial version UID
|
||||||
|
*/
|
||||||
private static final long serialVersionUID = -6028836725280212837L;
|
private static final long serialVersionUID = -6028836725280212837L;
|
||||||
|
|
||||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
|
||||||
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
|
new HashSet<>(
|
||||||
MediaType.application("xml"),
|
Arrays.asList(MediaType.application("xml"), MediaType.image("svg+xml"))));
|
||||||
MediaType.image("svg+xml"))));
|
|
||||||
|
|
||||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||||
return SUPPORTED_TYPES;
|
return SUPPORTED_TYPES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parse(
|
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
|
||||||
InputStream stream, ContentHandler handler,
|
ParseContext context) throws IOException, SAXException, TikaException {
|
||||||
Metadata metadata, ParseContext context)
|
|
||||||
throws IOException, SAXException, TikaException {
|
|
||||||
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
|
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
|
||||||
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
|
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
|
||||||
}
|
}
|
||||||
|
|
||||||
final XHTMLContentHandler xhtml =
|
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
|
||||||
new XHTMLContentHandler(handler, metadata);
|
|
||||||
xhtml.startDocument();
|
xhtml.startDocument();
|
||||||
xhtml.startElement("p");
|
xhtml.startElement("p");
|
||||||
|
|
||||||
TaggedContentHandler tagged = new TaggedContentHandler(handler);
|
TaggedContentHandler tagged = new TaggedContentHandler(handler);
|
||||||
try {
|
try {
|
||||||
XMLReaderUtils.parseSAX(
|
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream), new OfflineContentHandler(
|
||||||
new CloseShieldInputStream(stream),
|
new EmbeddedContentHandler(
|
||||||
new OfflineContentHandler(new EmbeddedContentHandler(
|
getContentHandler(tagged, metadata, context))),
|
||||||
getContentHandler(tagged, metadata, context))), context);
|
context);
|
||||||
} catch (SAXException e) {
|
} catch (SAXException e) {
|
||||||
tagged.throwIfCauseOf(e);
|
tagged.throwIfCauseOf(e);
|
||||||
throw new TikaException("XML parse error", e);
|
throw new TikaException("XML parse error", e);
|
||||||
@ -83,8 +82,8 @@ public class XMLParser extends AbstractParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ContentHandler getContentHandler(
|
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
|
||||||
ContentHandler handler, Metadata metadata, ParseContext context) {
|
ParseContext context) {
|
||||||
return new TextContentHandler(handler, true);
|
return new TextContentHandler(handler, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,206 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.tika.utils;
|
||||||
|
|
||||||
|
public class StringUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The empty String {@code ""}.
|
||||||
|
*
|
||||||
|
* @since 2.0
|
||||||
|
*/
|
||||||
|
public static final String EMPTY = "";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A String for a space character.
|
||||||
|
*
|
||||||
|
* @since 3.2
|
||||||
|
*/
|
||||||
|
public static final String SPACE = " ";
|
||||||
|
|
||||||
|
static int PAD_LIMIT = 10000;
|
||||||
|
|
||||||
|
public static boolean isEmpty(final CharSequence cs) {
|
||||||
|
return cs == null || cs.length() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isBlank(final String s) {
|
||||||
|
return s == null || s.trim().length() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Left pad a String with a specified String.</p>
|
||||||
|
*
|
||||||
|
* <p>Pad to a size of {@code size}.</p>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* StringUtils.leftPad(null, *, *) = null
|
||||||
|
* StringUtils.leftPad("", 3, "z") = "zzz"
|
||||||
|
* StringUtils.leftPad("bat", 3, "yz") = "bat"
|
||||||
|
* StringUtils.leftPad("bat", 5, "yz") = "yzbat"
|
||||||
|
* StringUtils.leftPad("bat", 8, "yz") = "yzyzybat"
|
||||||
|
* StringUtils.leftPad("bat", 1, "yz") = "bat"
|
||||||
|
* StringUtils.leftPad("bat", -1, "yz") = "bat"
|
||||||
|
* StringUtils.leftPad("bat", 5, null) = " bat"
|
||||||
|
* StringUtils.leftPad("bat", 5, "") = " bat"
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @param str the String to pad out, may be null
|
||||||
|
* @param size the size to pad to
|
||||||
|
* @param padStr the String to pad with, null or empty treated as single space
|
||||||
|
* @return left padded String or original String if no padding is necessary,
|
||||||
|
* {@code null} if null String input
|
||||||
|
*/
|
||||||
|
public static String leftPad(final String str, final int size, String padStr) {
|
||||||
|
if (str == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (isEmpty(padStr)) {
|
||||||
|
padStr = SPACE;
|
||||||
|
}
|
||||||
|
final int padLen = padStr.length();
|
||||||
|
final int strLen = str.length();
|
||||||
|
final int pads = size - strLen;
|
||||||
|
if (pads <= 0) {
|
||||||
|
return str; // returns original String when possible
|
||||||
|
}
|
||||||
|
if (padLen == 1 && pads <= PAD_LIMIT) {
|
||||||
|
return leftPad(str, size, padStr.charAt(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pads == padLen) {
|
||||||
|
return padStr.concat(str);
|
||||||
|
} else if (pads < padLen) {
|
||||||
|
return padStr.substring(0, pads).concat(str);
|
||||||
|
} else {
|
||||||
|
final char[] padding = new char[pads];
|
||||||
|
final char[] padChars = padStr.toCharArray();
|
||||||
|
for (int i = 0; i < pads; i++) {
|
||||||
|
padding[i] = padChars[i % padLen];
|
||||||
|
}
|
||||||
|
return new String(padding).concat(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static String leftPad(final String str, final int size, final char padChar) {
|
||||||
|
if (str == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final int pads = size - str.length();
|
||||||
|
if (pads <= 0) {
|
||||||
|
return str; // returns original String when possible
|
||||||
|
}
|
||||||
|
if (pads > PAD_LIMIT) {
|
||||||
|
return leftPad(str, size, String.valueOf(padChar));
|
||||||
|
}
|
||||||
|
return repeat(padChar, pads).concat(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Returns padding using the specified delimiter repeated
|
||||||
|
* to a given length.</p>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* StringUtils.repeat('e', 0) = ""
|
||||||
|
* StringUtils.repeat('e', 3) = "eee"
|
||||||
|
* StringUtils.repeat('e', -2) = ""
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>Note: this method does not support padding with
|
||||||
|
* <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a>
|
||||||
|
* as they require a pair of {@code char}s to be represented.
|
||||||
|
* If you are needing to support full I18N of your applications
|
||||||
|
* consider using {@link #repeat(String, int)} instead.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param ch character to repeat
|
||||||
|
* @param repeat number of times to repeat char, negative treated as zero
|
||||||
|
* @return String with repeated character
|
||||||
|
* @see #repeat(String, int)
|
||||||
|
*/
|
||||||
|
public static String repeat(final char ch, final int repeat) {
|
||||||
|
if (repeat <= 0) {
|
||||||
|
return EMPTY;
|
||||||
|
}
|
||||||
|
final char[] buf = new char[repeat];
|
||||||
|
for (int i = repeat - 1; i >= 0; i--) {
|
||||||
|
buf[i] = ch;
|
||||||
|
}
|
||||||
|
return new String(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Padding
|
||||||
|
//-----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Repeat a String {@code repeat} times to form a
|
||||||
|
* new String.</p>
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* StringUtils.repeat(null, 2) = null
|
||||||
|
* StringUtils.repeat("", 0) = ""
|
||||||
|
* StringUtils.repeat("", 2) = ""
|
||||||
|
* StringUtils.repeat("a", 3) = "aaa"
|
||||||
|
* StringUtils.repeat("ab", 2) = "abab"
|
||||||
|
* StringUtils.repeat("a", -2) = ""
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @param str the String to repeat, may be null
|
||||||
|
* @param repeat number of times to repeat str, negative treated as zero
|
||||||
|
* @return a new String consisting of the original String repeated,
|
||||||
|
* {@code null} if null String input
|
||||||
|
*/
|
||||||
|
public static String repeat(final String str, final int repeat) {
|
||||||
|
// Performance tuned for 2.0 (JDK1.4)
|
||||||
|
|
||||||
|
if (str == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (repeat <= 0) {
|
||||||
|
return EMPTY;
|
||||||
|
}
|
||||||
|
final int inputLength = str.length();
|
||||||
|
if (repeat == 1 || inputLength == 0) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
if (inputLength == 1 && repeat <= PAD_LIMIT) {
|
||||||
|
return repeat(str.charAt(0), repeat);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int outputLength = inputLength * repeat;
|
||||||
|
switch (inputLength) {
|
||||||
|
case 1:
|
||||||
|
return repeat(str.charAt(0), repeat);
|
||||||
|
case 2:
|
||||||
|
final char ch0 = str.charAt(0);
|
||||||
|
final char ch1 = str.charAt(1);
|
||||||
|
final char[] output2 = new char[outputLength];
|
||||||
|
for (int i = repeat * 2 - 2; i >= 0; i--, i--) {
|
||||||
|
output2[i] = ch0;
|
||||||
|
output2[i + 1] = ch1;
|
||||||
|
}
|
||||||
|
return new String(output2);
|
||||||
|
default:
|
||||||
|
final StringBuilder buf = new StringBuilder(outputLength);
|
||||||
|
for (int i = 0; i < repeat; i++) {
|
||||||
|
buf.append(str);
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,7 @@ import munit._
|
|||||||
class OdfExtractTest extends FunSuite {
|
class OdfExtractTest extends FunSuite {
|
||||||
|
|
||||||
val files = List(
|
val files = List(
|
||||||
ExampleFiles.examples_sample_odt -> 6372,
|
ExampleFiles.examples_sample_odt -> 6367,
|
||||||
ExampleFiles.examples_sample_ods -> 717
|
ExampleFiles.examples_sample_ods -> 717
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ import fs2.Stream
|
|||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
import org.apache.tika.config.TikaConfig
|
import org.apache.tika.config.TikaConfig
|
||||||
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaMetadataKeys}
|
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
|
||||||
import org.apache.tika.mime.MediaType
|
import org.apache.tika.mime.MediaType
|
||||||
import org.apache.tika.parser.txt.Icu4jEncodingDetector
|
import org.apache.tika.parser.txt.Icu4jEncodingDetector
|
||||||
|
|
||||||
@ -40,7 +40,7 @@ object TikaMimetype {
|
|||||||
|
|
||||||
private def makeMetadata(hint: MimeTypeHint): Metadata = {
|
private def makeMetadata(hint: MimeTypeHint): Metadata = {
|
||||||
val md = new Metadata
|
val md = new Metadata
|
||||||
hint.filename.foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
|
hint.filename.foreach(md.set(TikaCoreProperties.RESOURCE_NAME_KEY, _))
|
||||||
hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
|
hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
|
||||||
md
|
md
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,13 @@ object SolrSetup {
|
|||||||
"Add latvian content field",
|
"Add latvian content field",
|
||||||
addContentField(Language.Latvian)
|
addContentField(Language.Latvian)
|
||||||
),
|
),
|
||||||
SolrMigration.reIndexAll(13, "Re-Index after adding latvian content field")
|
SolrMigration.reIndexAll(13, "Re-Index after adding latvian content field"),
|
||||||
|
SolrMigration[F](
|
||||||
|
14,
|
||||||
|
"Add japanese content field",
|
||||||
|
addContentField(Language.Japanese)
|
||||||
|
),
|
||||||
|
SolrMigration.reIndexAll(15, "Re-Index after adding japanese content field")
|
||||||
)
|
)
|
||||||
|
|
||||||
def addFolderField: F[Unit] =
|
def addFolderField: F[Unit] =
|
||||||
|
@ -2,7 +2,7 @@ openapi: 3.0.0
|
|||||||
|
|
||||||
info:
|
info:
|
||||||
title: Docspell JOEX
|
title: Docspell JOEX
|
||||||
version: 0.25.0-SNAPSHOT
|
version: 0.26.0-SNAPSHOT
|
||||||
description: |
|
description: |
|
||||||
This is the remote API to the job executor component of Docspell.
|
This is the remote API to the job executor component of Docspell.
|
||||||
Docspell is a free document management system focused on small
|
Docspell is a free document management system focused on small
|
||||||
|
@ -53,6 +53,7 @@ object ItemQuery {
|
|||||||
case object ItemId extends StringAttr
|
case object ItemId extends StringAttr
|
||||||
case object Date extends DateAttr
|
case object Date extends DateAttr
|
||||||
case object DueDate extends DateAttr
|
case object DueDate extends DateAttr
|
||||||
|
case object CreatedDate extends DateAttr
|
||||||
case object AttachCount extends IntAttr
|
case object AttachCount extends IntAttr
|
||||||
|
|
||||||
object Correspondent {
|
object Correspondent {
|
||||||
|
@ -31,6 +31,9 @@ object AttrParser {
|
|||||||
val dueDate: P[Attr.DateAttr] =
|
val dueDate: P[Attr.DateAttr] =
|
||||||
P.ignoreCase(C.due).as(Attr.DueDate)
|
P.ignoreCase(C.due).as(Attr.DueDate)
|
||||||
|
|
||||||
|
val created: P[Attr.DateAttr] =
|
||||||
|
P.ignoreCase(C.created).as(Attr.CreatedDate)
|
||||||
|
|
||||||
val corrOrgId: P[Attr.StringAttr] =
|
val corrOrgId: P[Attr.StringAttr] =
|
||||||
P.ignoreCase(C.corrOrgId)
|
P.ignoreCase(C.corrOrgId)
|
||||||
.as(Attr.Correspondent.OrgId)
|
.as(Attr.Correspondent.OrgId)
|
||||||
@ -78,7 +81,7 @@ object AttrParser {
|
|||||||
attachCountAttr
|
attachCountAttr
|
||||||
|
|
||||||
val dateAttr: P[Attr.DateAttr] =
|
val dateAttr: P[Attr.DateAttr] =
|
||||||
P.oneOf(List(date, dueDate))
|
P.oneOf(List(date, dueDate, created))
|
||||||
|
|
||||||
val stringAttr: P[Attr.StringAttr] =
|
val stringAttr: P[Attr.StringAttr] =
|
||||||
P.oneOf(
|
P.oneOf(
|
||||||
|
@ -23,6 +23,8 @@ object Constants {
|
|||||||
val corrOrgName = "corr.org.name"
|
val corrOrgName = "corr.org.name"
|
||||||
val corrPersId = "corr.pers.id"
|
val corrPersId = "corr.pers.id"
|
||||||
val corrPersName = "corr.pers.name"
|
val corrPersName = "corr.pers.name"
|
||||||
|
val created = "created"
|
||||||
|
val createdIn = "createdIn"
|
||||||
val customField = "f"
|
val customField = "f"
|
||||||
val customFieldId = "f.id"
|
val customFieldId = "f.id"
|
||||||
val date = "date"
|
val date = "date"
|
||||||
|
@ -35,6 +35,9 @@ object MacroParser {
|
|||||||
val dueDateRangeMacro: P[Expr.DateRangeMacro] =
|
val dueDateRangeMacro: P[Expr.DateRangeMacro] =
|
||||||
dateRangeMacroImpl(C.dueIn, Attr.DueDate)
|
dateRangeMacroImpl(C.dueIn, Attr.DueDate)
|
||||||
|
|
||||||
|
val createdDateRangeMacro: P[Expr.DateRangeMacro] =
|
||||||
|
dateRangeMacroImpl(C.createdIn, Attr.CreatedDate)
|
||||||
|
|
||||||
val yearDateMacro: P[Expr.YearMacro] =
|
val yearDateMacro: P[Expr.YearMacro] =
|
||||||
yearMacroImpl(C.year, Attr.Date)
|
yearMacroImpl(C.year, Attr.Date)
|
||||||
|
|
||||||
@ -52,6 +55,7 @@ object MacroParser {
|
|||||||
namesMacro,
|
namesMacro,
|
||||||
dateRangeMacro,
|
dateRangeMacro,
|
||||||
dueDateRangeMacro,
|
dueDateRangeMacro,
|
||||||
|
createdDateRangeMacro,
|
||||||
yearDateMacro,
|
yearDateMacro,
|
||||||
corrMacro,
|
corrMacro,
|
||||||
concMacro
|
concMacro
|
||||||
|
@ -56,6 +56,14 @@ class SimpleExprParserTest extends FunSuite with ValueHelper {
|
|||||||
p.parseAll("due<2021-03-14"),
|
p.parseAll("due<2021-03-14"),
|
||||||
Right(dateExpr(Operator.Lt, Attr.DueDate, ld(2021, 3, 14)))
|
Right(dateExpr(Operator.Lt, Attr.DueDate, ld(2021, 3, 14)))
|
||||||
)
|
)
|
||||||
|
assertEquals(
|
||||||
|
p.parseAll("created:2021-03-14"),
|
||||||
|
Right(dateExpr(Operator.Like, Attr.CreatedDate, ld(2021, 3, 14)))
|
||||||
|
)
|
||||||
|
assertEquals(
|
||||||
|
p.parseAll("created<2021-03-14"),
|
||||||
|
Right(dateExpr(Operator.Lt, Attr.CreatedDate, ld(2021, 3, 14)))
|
||||||
|
)
|
||||||
assertEquals(
|
assertEquals(
|
||||||
p.parseAll("due~=2021-03-14,2021-03-13"),
|
p.parseAll("due~=2021-03-14,2021-03-13"),
|
||||||
Right(Expr.InDateExpr(Attr.DueDate, Nel.of(ld(2021, 3, 14), ld(2021, 3, 13))))
|
Right(Expr.InDateExpr(Attr.DueDate, Nel.of(ld(2021, 3, 14), ld(2021, 3, 13))))
|
||||||
|
@ -2,7 +2,7 @@ openapi: 3.0.0
|
|||||||
|
|
||||||
info:
|
info:
|
||||||
title: Docspell
|
title: Docspell
|
||||||
version: 0.25.0-SNAPSHOT
|
version: 0.26.0-SNAPSHOT
|
||||||
description: |
|
description: |
|
||||||
This is the remote API to Docspell. Docspell is a free document
|
This is the remote API to Docspell. Docspell is a free document
|
||||||
management system focused on small groups or families.
|
management system focused on small groups or families.
|
||||||
@ -1350,6 +1350,37 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/BasicResult"
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
|
/admin/attachments/convertallpdfs:
|
||||||
|
post:
|
||||||
|
operationId: "admin-attachments-convertallpdf"
|
||||||
|
tags: [Attachment, Admin]
|
||||||
|
summary: Convert all PDF files not yet converted
|
||||||
|
description: |
|
||||||
|
Docspell converts PDF files into PDF/A files by default, if
|
||||||
|
the OcrMyPDF tool is configured.
|
||||||
|
|
||||||
|
This endpoint can be used to submit a task that runs this on
|
||||||
|
all files that have not been converted yet in this way.
|
||||||
|
|
||||||
|
This conversion tool has been added in version 0.9.0 and so
|
||||||
|
older files can be "migrated" this way, or maybe after
|
||||||
|
enabling the tool (it is optional).
|
||||||
|
|
||||||
|
The task finds all files collective and submits a task for
|
||||||
|
each file to convert. These tasks are submitted with a low
|
||||||
|
priority so that normal processing can still proceed.
|
||||||
|
|
||||||
|
The body of the request should be empty.
|
||||||
|
security:
|
||||||
|
- adminHeader: []
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Ok
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
/sec/source:
|
/sec/source:
|
||||||
get:
|
get:
|
||||||
operationId: "sec-source-get-all"
|
operationId: "sec-source-get-all"
|
||||||
@ -1428,33 +1459,6 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/BasicResult"
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
/sec/item/convertallpdfs:
|
|
||||||
post:
|
|
||||||
operationId: "sec-item-convert-all-pdfs"
|
|
||||||
tags: [ Item ]
|
|
||||||
summary: Convert all non-converted pdfs.
|
|
||||||
description: |
|
|
||||||
Submits a job that will find all pdf files that have not been
|
|
||||||
converted and converts them using the ocrmypdf tool (if
|
|
||||||
enabled). This tool has been added in version 0.9.0 and so
|
|
||||||
older files can be "migrated" this way, or maybe after
|
|
||||||
enabling the tool.
|
|
||||||
|
|
||||||
The task finds all files of the current collective and submits
|
|
||||||
task for each file to convert. These tasks are submitted with
|
|
||||||
a low priority so that normal processing can still proceed.
|
|
||||||
|
|
||||||
The body of the request should be empty.
|
|
||||||
security:
|
|
||||||
- authTokenHeader: []
|
|
||||||
responses:
|
|
||||||
200:
|
|
||||||
description: Ok
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/BasicResult"
|
|
||||||
|
|
||||||
/sec/item/search:
|
/sec/item/search:
|
||||||
get:
|
get:
|
||||||
operationId: "sec-item-search-by-get"
|
operationId: "sec-item-search-by-get"
|
||||||
@ -1624,6 +1628,8 @@ paths:
|
|||||||
Update the tags associated to an item. This will remove all
|
Update the tags associated to an item. This will remove all
|
||||||
existing ones and sets the given tags, such that after this
|
existing ones and sets the given tags, such that after this
|
||||||
returns, the item has exactly the tags as given.
|
returns, the item has exactly the tags as given.
|
||||||
|
|
||||||
|
Tags may be specified as names or ids.
|
||||||
security:
|
security:
|
||||||
- authTokenHeader: []
|
- authTokenHeader: []
|
||||||
parameters:
|
parameters:
|
||||||
@ -1632,7 +1638,7 @@ paths:
|
|||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/ReferenceList"
|
$ref: "#/components/schemas/StringList"
|
||||||
responses:
|
responses:
|
||||||
200:
|
200:
|
||||||
description: Ok
|
description: Ok
|
||||||
@ -1668,7 +1674,7 @@ paths:
|
|||||||
$ref: "#/components/schemas/BasicResult"
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
/sec/item/{id}/taglink:
|
/sec/item/{id}/taglink:
|
||||||
post:
|
put:
|
||||||
operationId: "sec-item-link-tags"
|
operationId: "sec-item-link-tags"
|
||||||
tags: [Item]
|
tags: [Item]
|
||||||
summary: Link existing tags to an item.
|
summary: Link existing tags to an item.
|
||||||
@ -1721,6 +1727,31 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/BasicResult"
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
|
/sec/item/{id}/tagsremove:
|
||||||
|
post:
|
||||||
|
operationId: "sec-item-remove-tags"
|
||||||
|
tags: [ Item ]
|
||||||
|
summary: Remove tags from an item
|
||||||
|
description: |
|
||||||
|
Remove the given tags from the item. The tags can be specified
|
||||||
|
via ids or names.
|
||||||
|
security:
|
||||||
|
- authTokenHeader: []
|
||||||
|
parameters:
|
||||||
|
- $ref: "#/components/parameters/id"
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/StringList"
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Ok
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/BasicResult"
|
||||||
|
|
||||||
/sec/item/{id}/direction:
|
/sec/item/{id}/direction:
|
||||||
put:
|
put:
|
||||||
operationId: "sec-item-set-direction"
|
operationId: "sec-item-set-direction"
|
||||||
|
@ -194,13 +194,21 @@ object AttachmentRoutes {
|
|||||||
val dsl = Http4sDsl[F]
|
val dsl = Http4sDsl[F]
|
||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
HttpRoutes.of { case POST -> Root / "generatePreviews" =>
|
HttpRoutes.of {
|
||||||
for {
|
case POST -> Root / "generatePreviews" =>
|
||||||
res <- backend.item.generateAllPreviews(MakePreviewArgs.StoreMode.Replace, true)
|
for {
|
||||||
resp <- Ok(
|
res <- backend.item.generateAllPreviews(MakePreviewArgs.StoreMode.Replace, true)
|
||||||
Conversions.basicResult(res, "Generate all previews task submitted.")
|
resp <- Ok(
|
||||||
)
|
Conversions.basicResult(res, "Generate all previews task submitted.")
|
||||||
} yield resp
|
)
|
||||||
|
} yield resp
|
||||||
|
|
||||||
|
case POST -> Root / "convertallpdfs" =>
|
||||||
|
for {
|
||||||
|
res <-
|
||||||
|
backend.item.convertAllPdf(None, None, true)
|
||||||
|
resp <- Ok(Conversions.basicResult(res, "Convert all PDFs task submitted"))
|
||||||
|
} yield resp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,9 +59,12 @@ object ItemMultiRoutes extends MultiIdSupport {
|
|||||||
for {
|
for {
|
||||||
json <- req.as[ItemsAndRefs]
|
json <- req.as[ItemsAndRefs]
|
||||||
items <- readIds[F](json.items)
|
items <- readIds[F](json.items)
|
||||||
tags <- json.refs.traverse(readId[F])
|
res <- backend.item.setTagsMultipleItems(
|
||||||
res <- backend.item.setTagsMultipleItems(items, tags, user.account.collective)
|
items,
|
||||||
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
|
json.refs,
|
||||||
|
user.account.collective
|
||||||
|
)
|
||||||
|
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
case req @ POST -> Root / "tags" =>
|
case req @ POST -> Root / "tags" =>
|
||||||
|
@ -47,13 +47,6 @@ object ItemRoutes {
|
|||||||
import dsl._
|
import dsl._
|
||||||
|
|
||||||
HttpRoutes.of {
|
HttpRoutes.of {
|
||||||
case POST -> Root / "convertallpdfs" =>
|
|
||||||
for {
|
|
||||||
res <-
|
|
||||||
backend.item.convertAllPdf(user.account.collective.some, user.account, true)
|
|
||||||
resp <- Ok(Conversions.basicResult(res, "Task submitted"))
|
|
||||||
} yield resp
|
|
||||||
|
|
||||||
case GET -> Root / "search" :? QP.Query(q) :? QP.Limit(limit) :? QP.Offset(
|
case GET -> Root / "search" :? QP.Query(q) :? QP.Limit(limit) :? QP.Offset(
|
||||||
offset
|
offset
|
||||||
) :? QP.WithDetails(detailFlag) =>
|
) :? QP.WithDetails(detailFlag) =>
|
||||||
@ -153,8 +146,8 @@ object ItemRoutes {
|
|||||||
|
|
||||||
case req @ PUT -> Root / Ident(id) / "tags" =>
|
case req @ PUT -> Root / Ident(id) / "tags" =>
|
||||||
for {
|
for {
|
||||||
tags <- req.as[ReferenceList].map(_.items)
|
tags <- req.as[StringList].map(_.items)
|
||||||
res <- backend.item.setTags(id, tags.map(_.id), user.account.collective)
|
res <- backend.item.setTags(id, tags, user.account.collective)
|
||||||
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
|
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
@ -180,6 +173,17 @@ object ItemRoutes {
|
|||||||
resp <- Ok(Conversions.basicResult(res, "Tags linked"))
|
resp <- Ok(Conversions.basicResult(res, "Tags linked"))
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
|
case req @ POST -> Root / Ident(id) / "tagsremove" =>
|
||||||
|
for {
|
||||||
|
json <- req.as[StringList]
|
||||||
|
res <- backend.item.removeTagsMultipleItems(
|
||||||
|
NonEmptyList.of(id),
|
||||||
|
json.items,
|
||||||
|
user.account.collective
|
||||||
|
)
|
||||||
|
resp <- Ok(Conversions.basicResult(res, "Tags removed"))
|
||||||
|
} yield resp
|
||||||
|
|
||||||
case req @ PUT -> Root / Ident(id) / "direction" =>
|
case req @ PUT -> Root / Ident(id) / "direction" =>
|
||||||
for {
|
for {
|
||||||
dir <- req.as[DirectionValue]
|
dir <- req.as[DirectionValue]
|
||||||
|
@ -171,12 +171,16 @@ object ItemQueryGenerator {
|
|||||||
tables.item.id.in(select.withSelect(Nel.of(RItem.as("i").id.s)))
|
tables.item.id.in(select.withSelect(Nel.of(RItem.as("i").id.s)))
|
||||||
|
|
||||||
case Expr.AttachId(id) =>
|
case Expr.AttachId(id) =>
|
||||||
tables.item.id.in(
|
val idWildcard = QueryWildcard(id)
|
||||||
Select(
|
val query =
|
||||||
select(RAttachment.T.itemId),
|
if (id == idWildcard) {
|
||||||
from(RAttachment.T),
|
|
||||||
RAttachment.T.id.cast[String] === id
|
RAttachment.T.id.cast[String] === id
|
||||||
).distinct
|
} else {
|
||||||
|
RAttachment.T.id.cast[String].like(idWildcard)
|
||||||
|
}
|
||||||
|
|
||||||
|
tables.item.id.in(
|
||||||
|
Select(select(RAttachment.T.itemId), from(RAttachment.T), query).distinct
|
||||||
)
|
)
|
||||||
|
|
||||||
case Expr.Fulltext(_) =>
|
case Expr.Fulltext(_) =>
|
||||||
@ -228,6 +232,8 @@ object ItemQueryGenerator {
|
|||||||
coalesce(tables.item.itemDate.s, tables.item.created.s).s
|
coalesce(tables.item.itemDate.s, tables.item.created.s).s
|
||||||
case Attr.DueDate =>
|
case Attr.DueDate =>
|
||||||
tables.item.dueDate.s
|
tables.item.dueDate.s
|
||||||
|
case Attr.CreatedDate =>
|
||||||
|
tables.item.created.s
|
||||||
}
|
}
|
||||||
|
|
||||||
private def stringColumn(tables: Tables)(attr: Attr.StringAttr): Column[String] =
|
private def stringColumn(tables: Tables)(attr: Attr.StringAttr): Column[String] =
|
||||||
|
@ -11,6 +11,7 @@ import java.time.LocalDate
|
|||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.query.ItemQueryParser
|
import docspell.query.ItemQueryParser
|
||||||
import docspell.store.qb.DSL._
|
import docspell.store.qb.DSL._
|
||||||
|
import docspell.store.qb.Select
|
||||||
import docspell.store.qb.generator.{ItemQueryGenerator, Tables}
|
import docspell.store.qb.generator.{ItemQueryGenerator, Tables}
|
||||||
import docspell.store.queries.AttachCountTable
|
import docspell.store.queries.AttachCountTable
|
||||||
import docspell.store.records._
|
import docspell.store.records._
|
||||||
@ -56,4 +57,31 @@ class ItemQueryGeneratorTest extends FunSuite {
|
|||||||
assertEquals(cond, expect)
|
assertEquals(cond, expect)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("attach.id with wildcard") {
|
||||||
|
val q = ItemQueryParser.parseUnsafe("attach.id=abcde*")
|
||||||
|
val cond = ItemQueryGenerator(now, tables, Ident.unsafe("coll"))(q)
|
||||||
|
val expect = tables.item.id.in(
|
||||||
|
Select(
|
||||||
|
select(RAttachment.T.itemId),
|
||||||
|
from(RAttachment.T),
|
||||||
|
RAttachment.T.id.cast[String].like("abcde%")
|
||||||
|
).distinct
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals(cond, expect)
|
||||||
|
}
|
||||||
|
|
||||||
|
test("attach.id with equals") {
|
||||||
|
val q = ItemQueryParser.parseUnsafe("attach.id=abcde")
|
||||||
|
val cond = ItemQueryGenerator(now, tables, Ident.unsafe("coll"))(q)
|
||||||
|
val expect = tables.item.id.in(
|
||||||
|
Select(
|
||||||
|
select(RAttachment.T.itemId),
|
||||||
|
from(RAttachment.T),
|
||||||
|
RAttachment.T.id.cast[String] === "abcde"
|
||||||
|
).distinct
|
||||||
|
)
|
||||||
|
|
||||||
|
assertEquals(cond, expect)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Api exposing
|
module Api exposing
|
||||||
( addConcEquip
|
( addConcEquip
|
||||||
, addConcPerson
|
, addConcPerson
|
||||||
@ -1782,12 +1783,12 @@ itemDetail flags id receive =
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
setTags : Flags -> String -> ReferenceList -> (Result Http.Error BasicResult -> msg) -> Cmd msg
|
setTags : Flags -> String -> StringList -> (Result Http.Error BasicResult -> msg) -> Cmd msg
|
||||||
setTags flags item tags receive =
|
setTags flags item tags receive =
|
||||||
Http2.authPut
|
Http2.authPut
|
||||||
{ url = flags.config.baseUrl ++ "/api/v1/sec/item/" ++ item ++ "/tags"
|
{ url = flags.config.baseUrl ++ "/api/v1/sec/item/" ++ item ++ "/tags"
|
||||||
, account = getAccount flags
|
, account = getAccount flags
|
||||||
, body = Http.jsonBody (Api.Model.ReferenceList.encode tags)
|
, body = Http.jsonBody (Api.Model.StringList.encode tags)
|
||||||
, expect = Http.expectJson receive Api.Model.BasicResult.decoder
|
, expect = Http.expectJson receive Api.Model.BasicResult.decoder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module App.Data exposing
|
module App.Data exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module App.Update exposing
|
module App.Update exposing
|
||||||
( initPage
|
( initPage
|
||||||
, update
|
, update
|
||||||
@ -330,10 +331,18 @@ updateItemDetail lmsg model =
|
|||||||
|
|
||||||
( hm, hc, hs ) =
|
( hm, hc, hs ) =
|
||||||
updateHome (Page.Home.Data.SetLinkTarget result.linkTarget) model_
|
updateHome (Page.Home.Data.SetLinkTarget result.linkTarget) model_
|
||||||
|
|
||||||
|
( hm1, hc1, hs1 ) =
|
||||||
|
case result.removedItem of
|
||||||
|
Just removedId ->
|
||||||
|
updateHome (Page.Home.Data.RemoveItem removedId) hm
|
||||||
|
|
||||||
|
Nothing ->
|
||||||
|
( hm, hc, hs )
|
||||||
in
|
in
|
||||||
( hm
|
( hm1
|
||||||
, Cmd.batch [ Cmd.map ItemDetailMsg result.cmd, hc ]
|
, Cmd.batch [ Cmd.map ItemDetailMsg result.cmd, hc, hc1 ]
|
||||||
, Sub.batch [ Sub.map ItemDetailMsg result.sub, hs ]
|
, Sub.batch [ Sub.map ItemDetailMsg result.sub, hs, hs1 ]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module App.View2 exposing (view)
|
module App.View2 exposing (view)
|
||||||
|
|
||||||
import Api.Model.AuthResult exposing (AuthResult)
|
import Api.Model.AuthResult exposing (AuthResult)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.AddressForm exposing
|
module Comp.AddressForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.AttachmentMeta exposing
|
module Comp.AttachmentMeta exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.Basic exposing
|
module Comp.Basic exposing
|
||||||
( editLinkLabel
|
( editLinkLabel
|
||||||
, editLinkTableCell
|
, editLinkTableCell
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.BasicSizeField exposing
|
module Comp.BasicSizeField exposing
|
||||||
( Msg
|
( Msg
|
||||||
, update
|
, update
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CalEventInput exposing
|
module Comp.CalEventInput exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ChangePasswordForm exposing
|
module Comp.ChangePasswordForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ClassifierSettingsForm exposing
|
module Comp.ClassifierSettingsForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CollectiveSettingsForm exposing
|
module Comp.CollectiveSettingsForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ColorTagger exposing
|
module Comp.ColorTagger exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ConfirmModal exposing
|
module Comp.ConfirmModal exposing
|
||||||
( Settings
|
( Settings
|
||||||
, defaultSettings
|
, defaultSettings
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ContactField exposing
|
module Comp.ContactField exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CustomFieldForm exposing
|
module Comp.CustomFieldForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CustomFieldInput exposing
|
module Comp.CustomFieldInput exposing
|
||||||
( FieldResult(..)
|
( FieldResult(..)
|
||||||
, Model
|
, Model
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CustomFieldManage exposing
|
module Comp.CustomFieldManage exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CustomFieldMultiInput exposing
|
module Comp.CustomFieldMultiInput exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.CustomFieldTable exposing
|
module Comp.CustomFieldTable exposing
|
||||||
( Action(..)
|
( Action(..)
|
||||||
, Model
|
, Model
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.DatePicker exposing
|
module Comp.DatePicker exposing
|
||||||
( Msg
|
( Msg
|
||||||
, defaultSettings
|
, defaultSettings
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.DetailEdit exposing
|
module Comp.DetailEdit exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.Dropdown exposing
|
module Comp.Dropdown exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
-- inspired from here: https://ellie-app.com/3T5mNms7SwKa1
|
-- inspired from here: https://ellie-app.com/3T5mNms7SwKa1
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EmailInput exposing
|
module Comp.EmailInput exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EmailSettingsForm exposing
|
module Comp.EmailSettingsForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EmailSettingsManage exposing
|
module Comp.EmailSettingsManage exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EmailSettingsTable exposing
|
module Comp.EmailSettingsTable exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EquipmentForm exposing
|
module Comp.EquipmentForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EquipmentManage exposing
|
module Comp.EquipmentManage exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.EquipmentTable exposing
|
module Comp.EquipmentTable exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ExpandCollapse exposing
|
module Comp.ExpandCollapse exposing
|
||||||
( collapseToggle
|
( collapseToggle
|
||||||
, expandToggle
|
, expandToggle
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FieldListSelect exposing
|
module Comp.FieldListSelect exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FixedDropdown exposing
|
module Comp.FixedDropdown exposing
|
||||||
( Item
|
( Item
|
||||||
, Model
|
, Model
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FolderDetail exposing
|
module Comp.FolderDetail exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FolderManage exposing
|
module Comp.FolderManage exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FolderSelect exposing
|
module Comp.FolderSelect exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.FolderTable exposing
|
module Comp.FolderTable exposing
|
||||||
( Action(..)
|
( Action(..)
|
||||||
, Model
|
, Model
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ImapSettingsForm exposing
|
module Comp.ImapSettingsForm exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ImapSettingsManage exposing
|
module Comp.ImapSettingsManage exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ImapSettingsTable exposing
|
module Comp.ImapSettingsTable exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.IntField exposing
|
module Comp.IntField exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemCard exposing
|
module Comp.ItemCard exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg
|
, Msg
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemCardList exposing
|
module Comp.ItemCardList exposing
|
||||||
( Model
|
( Model
|
||||||
, Msg(..)
|
, Msg(..)
|
||||||
@ -45,6 +46,7 @@ type Msg
|
|||||||
= SetResults ItemLightList
|
= SetResults ItemLightList
|
||||||
| AddResults ItemLightList
|
| AddResults ItemLightList
|
||||||
| ItemCardMsg ItemLight Comp.ItemCard.Msg
|
| ItemCardMsg ItemLight Comp.ItemCard.Msg
|
||||||
|
| RemoveItem String
|
||||||
|
|
||||||
|
|
||||||
init : Model
|
init : Model
|
||||||
@ -144,6 +146,13 @@ updateDrag dm _ msg model =
|
|||||||
result.selection
|
result.selection
|
||||||
result.linkTarget
|
result.linkTarget
|
||||||
|
|
||||||
|
RemoveItem id ->
|
||||||
|
UpdateResult { model | results = removeItemById id model.results }
|
||||||
|
Cmd.none
|
||||||
|
dm
|
||||||
|
Data.ItemSelection.Inactive
|
||||||
|
Comp.LinkTarget.LinkNone
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--- View2
|
--- View2
|
||||||
@ -170,13 +179,13 @@ viewGroup2 : Texts -> Model -> ViewConfig -> UiSettings -> ItemLightGroup -> Htm
|
|||||||
viewGroup2 texts model cfg settings group =
|
viewGroup2 texts model cfg settings group =
|
||||||
div [ class "ds-item-group" ]
|
div [ class "ds-item-group" ]
|
||||||
[ div
|
[ div
|
||||||
[ class "flex py-0 mt-2 flex flex-row items-center"
|
[ class "flex py-1 mt-2 mb-2 flex flex-row items-center"
|
||||||
, class "bg-white dark:bg-bluegray-800 text-lg z-35"
|
, class "bg-white dark:bg-bluegray-800 text-xl font-bold z-35"
|
||||||
, class "relative sticky top-10"
|
, class "relative sticky top-10"
|
||||||
]
|
]
|
||||||
[ hr
|
[ hr
|
||||||
[ class S.border
|
[ class S.border2
|
||||||
, class "flex-grow"
|
, class "w-16"
|
||||||
]
|
]
|
||||||
[]
|
[]
|
||||||
, div [ class "px-6" ]
|
, div [ class "px-6" ]
|
||||||
@ -186,7 +195,7 @@ viewGroup2 texts model cfg settings group =
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
, hr
|
, hr
|
||||||
[ class S.border
|
[ class S.border2
|
||||||
, class "flex-grow"
|
, class "flex-grow"
|
||||||
]
|
]
|
||||||
[]
|
[]
|
||||||
@ -231,3 +240,15 @@ isMultiSelectMode cfg =
|
|||||||
|
|
||||||
Data.ItemSelection.Inactive ->
|
Data.ItemSelection.Inactive ->
|
||||||
False
|
False
|
||||||
|
|
||||||
|
|
||||||
|
removeItemById : String -> ItemLightList -> ItemLightList
|
||||||
|
removeItemById id list =
|
||||||
|
let
|
||||||
|
filterItem item =
|
||||||
|
item.id /= id
|
||||||
|
|
||||||
|
filterGroup group =
|
||||||
|
{ group | items = List.filter filterItem group.items }
|
||||||
|
in
|
||||||
|
{ list | groups = List.map filterGroup list.groups }
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail exposing
|
module Comp.ItemDetail exposing
|
||||||
( Model
|
( Model
|
||||||
, emptyModel
|
, emptyModel
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.AddFilesForm exposing (view)
|
module Comp.ItemDetail.AddFilesForm exposing (view)
|
||||||
|
|
||||||
import Comp.Dropzone
|
import Comp.Dropzone
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.ConfirmModalView exposing (view)
|
module Comp.ItemDetail.ConfirmModalView exposing (view)
|
||||||
|
|
||||||
import Comp.ConfirmModal
|
import Comp.ConfirmModal
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.EditForm exposing (formTabs, view2)
|
module Comp.ItemDetail.EditForm exposing (formTabs, view2)
|
||||||
|
|
||||||
import Comp.CustomFieldMultiInput
|
import Comp.CustomFieldMultiInput
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.FieldTabState exposing (EditTab(..), allTabs, findTab, tabName, tabState)
|
module Comp.ItemDetail.FieldTabState exposing (EditTab(..), allTabs, findTab, tabName, tabState)
|
||||||
|
|
||||||
import Comp.CustomFieldMultiInput
|
import Comp.CustomFieldMultiInput
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.FormChange exposing
|
module Comp.ItemDetail.FormChange exposing
|
||||||
( FormChange(..)
|
( FormChange(..)
|
||||||
, multiUpdate
|
, multiUpdate
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.ItemInfoHeader exposing (view)
|
module Comp.ItemDetail.ItemInfoHeader exposing (view)
|
||||||
|
|
||||||
import Api.Model.IdName exposing (IdName)
|
import Api.Model.IdName exposing (IdName)
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
{-
|
{-
|
||||||
Copyright 2020 Docspell Contributors
|
Copyright 2020 Docspell Contributors
|
||||||
|
|
||||||
SPDX-License-Identifier: GPL-3.0-or-later
|
SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
|
||||||
module Comp.ItemDetail.Model exposing
|
module Comp.ItemDetail.Model exposing
|
||||||
( AttachmentRename
|
( AttachmentRename
|
||||||
, ConfirmModalValue(..)
|
, ConfirmModalValue(..)
|
||||||
@ -275,7 +276,7 @@ type Msg
|
|||||||
| ItemModalCancelled
|
| ItemModalCancelled
|
||||||
| RequestDelete
|
| RequestDelete
|
||||||
| SaveResp (Result Http.Error BasicResult)
|
| SaveResp (Result Http.Error BasicResult)
|
||||||
| DeleteResp (Result Http.Error BasicResult)
|
| DeleteResp String (Result Http.Error BasicResult)
|
||||||
| GetItemResp (Result Http.Error ItemDetail)
|
| GetItemResp (Result Http.Error ItemDetail)
|
||||||
| GetProposalResp (Result Http.Error ItemProposals)
|
| GetProposalResp (Result Http.Error ItemProposals)
|
||||||
| RemoveDueDate
|
| RemoveDueDate
|
||||||
@ -351,22 +352,23 @@ type alias UpdateResult =
|
|||||||
, cmd : Cmd Msg
|
, cmd : Cmd Msg
|
||||||
, sub : Sub Msg
|
, sub : Sub Msg
|
||||||
, linkTarget : LinkTarget
|
, linkTarget : LinkTarget
|
||||||
|
, removedItem : Maybe String
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
resultModel : Model -> UpdateResult
|
resultModel : Model -> UpdateResult
|
||||||
resultModel model =
|
resultModel model =
|
||||||
UpdateResult model Cmd.none Sub.none Comp.LinkTarget.LinkNone
|
UpdateResult model Cmd.none Sub.none Comp.LinkTarget.LinkNone Nothing
|
||||||
|
|
||||||
|
|
||||||
resultModelCmd : ( Model, Cmd Msg ) -> UpdateResult
|
resultModelCmd : ( Model, Cmd Msg ) -> UpdateResult
|
||||||
resultModelCmd ( model, cmd ) =
|
resultModelCmd ( model, cmd ) =
|
||||||
UpdateResult model cmd Sub.none Comp.LinkTarget.LinkNone
|
UpdateResult model cmd Sub.none Comp.LinkTarget.LinkNone Nothing
|
||||||
|
|
||||||
|
|
||||||
resultModelCmdSub : ( Model, Cmd Msg, Sub Msg ) -> UpdateResult
|
resultModelCmdSub : ( Model, Cmd Msg, Sub Msg ) -> UpdateResult
|
||||||
resultModelCmdSub ( model, cmd, sub ) =
|
resultModelCmdSub ( model, cmd, sub ) =
|
||||||
UpdateResult model cmd sub Comp.LinkTarget.LinkNone
|
UpdateResult model cmd sub Comp.LinkTarget.LinkNone Nothing
|
||||||
|
|
||||||
|
|
||||||
personMatchesOrg : Model -> Bool
|
personMatchesOrg : Model -> Bool
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user