Merge branch 'eikek:master' into master

This commit is contained in:
monnypython 2021-08-05 12:23:51 +02:00 committed by GitHub
commit 35f53b7107
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
349 changed files with 3468 additions and 1756 deletions

View File

@ -20,7 +20,7 @@ jobs:
working-directory: modules/webapp
- name: Fetch tags
run: git fetch --depth=100 origin +refs/tags/*:refs/tags/*
- uses: olafurpg/setup-scala@v12
- uses: olafurpg/setup-scala@v13
with:
java-version: ${{ matrix.java }}
- name: Coursier cache

View File

@ -14,7 +14,7 @@ jobs:
- uses: actions/checkout@v2.3.4
with:
fetch-depth: 0
- uses: olafurpg/setup-scala@v12
- uses: olafurpg/setup-scala@v13
with:
java-version: ${{ matrix.java }}
- uses: jorelali/setup-elm@v3

View File

@ -14,7 +14,7 @@ jobs:
- uses: actions/checkout@v2.3.4
with:
fetch-depth: 0
- uses: olafurpg/setup-scala@v12
- uses: olafurpg/setup-scala@v13
with:
java-version: ${{ matrix.java }}
- uses: jorelali/setup-elm@v3

View File

@ -1,15 +1,58 @@
# Changelog
## v0.25.1
*Jul 29, 2021*
- Fix solr fulltext search by adding the new japanese content field
The SOLR fulltext search is broken in 0.25.0, so this is a fixup
release.
## v0.25.0
*Unreleased*
*Jul 29, 2021*
- Introducing a new CLI tool (#345) that replaces all the shell
scripts from the `tools/` directory! https://github.com/docspell/dsc
- UI changes:
- year separators are now more prominent (#950)
- fixes a bug in the item counter in detail view when an item is
deleted (#920)
- German translation improvements (#901)
- The number of selected files is shown in upload page (#896)
- The created date of an item can now be used in queries (#925, #958)
- Setting tags api has been improved (#955)
- Task for converting pdfs is now behind the admin secret (#949)
- Task for generating preview images is now behind the admin secret (#915)
- respond with 404 when the source-id is not correct (#931)
- Update of core libraries (#890)
- Add Japanese to the list of document languages. Thanks @wallace11
for helping out (#948, #962)
- Fix setting the folder from metadata when processing a file and
allow to specifiy it by name or id (#940)
- Fixes docspell config file in docker-compose setup (#909)
- Fixes selecting the next job in the job executor (#898)
- Fixes a bug that prevents uploading more than one file at once
(#938)
### Rest API Changes
- Removed `sec/item/convertallpdfs` endpoint in favor for new
`admin/attachments/convertallpdfs` endpoint which is now an admin
task
- Removed `sec/collective/previews` endpoint, in favor for new
`admin/attachments/generatePreviews` endpoint which is now an admin
task to generate previews for all files. The now removed enpoint did
this only for one collective.
- `/sec/item/{id}/tags`: Setting tags to an item (replacing existing
tags) has been changed to allow tags to be specified as names or ids
- `/sec/item/{id}/tagsremove`: Added a route to remove tags for a
single item
### Configuration Changes
None.
## v0.24.0

View File

@ -30,7 +30,8 @@ fulltext search and has great e-mail integration. Everything is
accessible via a REST/HTTP api. A mobile friendly SPA web application
is the default user interface. An [Android
app](https://github.com/docspell/android-client) exists for
conveniently uploading files from your phone/tablet. The [feature
conveniently uploading files from your phone/tablet and a
[cli](https://github.com/docspell/dsc). The [feature
overview](https://docspell.org/#feature-selection) lists some more
points.

View File

@ -88,8 +88,8 @@ val elmSettings = Seq(
Compile / unmanagedSourceDirectories += (Compile / sourceDirectory).value / "elm",
headerSources / includeFilter := "*.elm",
headerMappings := headerMappings.value + (HeaderFileType("elm") -> HeaderCommentStyle(
new CommentBlockCreator("{-", " ", "-}"),
HeaderPattern.commentBetween("\\{\\-", " ", "\\-\\}")
new CommentBlockCreator("{-", " ", "-}\n"),
HeaderPattern.commentBetween("\\{\\-", " ", "\\-\\}")
))
)
val stylesSettings = Seq(

View File

@ -30,16 +30,18 @@ services:
- solr
consumedir:
image: docspell/tools:latest
image: docspell/dsc:latest
container_name: docspell-consumedir
command:
- ds-consumedir
- "-vmdi"
- "--path"
- "/opt/docs"
- "--iheader"
- dsc
- "-d"
- "http://docspell-restserver:7880"
- "watch"
- "--delete"
- "-ir"
- "--header"
- "Docspell-Integration:$DOCSPELL_HEADER_VALUE"
- "http://docspell-restserver:7880/api/v1/open/integration/item"
- "/opt/docs"
restart: unless-stopped
env_file: ./.env
volumes:

View File

@ -29,6 +29,7 @@ RUN JDKPKG="openjdk11"; \
tesseract-ocr-data-rus \
tesseract-ocr-data-ron \
tesseract-ocr-data-lav \
tesseract-ocr-data-jpn \
unpaper \
wkhtmltopdf \
libreoffice \

View File

@ -21,9 +21,7 @@ import docspell.common._
object DateFind {
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
TextSplitter
.splitToken(text, " \t.,\n\r/".toSet)
.filter(w => lang != Language.Latvian || w.value != "gada")
splitWords(text, lang)
.sliding(3)
.filter(_.size == 3)
.flatMap(q =>
@ -44,6 +42,20 @@ object DateFind {
)
)
private[this] val jpnChars =
("年月日" + MonthName.getAll(Language.Japanese).map(_.mkString).mkString).toSet
private def splitWords(text: String, lang: Language): Stream[Pure, Word] = {
val stext =
if (lang == Language.Japanese) {
text.map(c => if (jpnChars.contains(c)) c else ' ')
} else text
TextSplitter
.splitToken(stext, " \t.,\n\r/年月日".toSet)
.filter(w => lang != Language.Latvian || w.value != "gada")
}
case class SimpleDate(year: Int, month: Int, day: Int) {
def toLocalDate: LocalDate =
LocalDate.of(if (year < 100) 2000 + year else year, month, day)
@ -89,6 +101,7 @@ object DateFind {
case Language.Swedish => ymd.or(dmy).or(mdy)
case Language.Dutch => dmy.or(ymd).or(mdy)
case Language.Latvian => dmy.or(lavLong).or(ymd)
case Language.Japanese => ymd
}
p.read(parts) match {
case Result.Success(sds, _) =>

View File

@ -50,6 +50,8 @@ object MonthName {
russian
case Language.Latvian =>
latvian
case Language.Japanese =>
japanese
}
private val numbers = List(
@ -290,4 +292,19 @@ object MonthName {
List("novembris", "nov."),
List("decembris", "dec.")
)
private val japanese = List(
List("1", "一"),
List("2", "二"),
List("3", "三"),
List("4", "四"),
List("5", "五"),
List("6", "六"),
List("7", "七"),
List("8", "八"),
List("9", "九"),
List("10", "十"),
List("11", "十一"),
List("12", "十二")
)
}

View File

@ -143,4 +143,40 @@ class DateFindSpec extends FunSuite {
)
}
test("find japanese dates") {
assertEquals(
DateFind
.findDates("今日の日付は2021.7.21です。", Language.Japanese)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2021, 7, 21),
NerLabel("2021.7.21", NerTag.Date, 6, 15)
)
)
)
assertEquals(
DateFind
.findDates("今日の日付は2021年7月21日です。", Language.Japanese)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2021, 7, 21),
NerLabel("2021年7月21", NerTag.Date, 6, 15)
)
)
)
assertEquals(
DateFind
.findDates("年月日2021年7月21日", Language.Japanese)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2021, 7, 21),
NerLabel("2021年7月21", NerTag.Date, 3, 12)
)
)
)
}
}

View File

@ -68,14 +68,14 @@ object JobFactory {
args,
"Create preview images",
now,
submitter.getOrElse(DocspellSystem.taskGroup),
submitter.getOrElse(DocspellSystem.user),
Priority.Low,
Some(DocspellSystem.allPreviewTaskTracker)
)
def convertAllPdfs[F[_]: Sync](
collective: Option[Ident],
account: AccountId,
submitter: Option[Ident],
prio: Priority
): F[RJob] =
for {
@ -84,11 +84,11 @@ object JobFactory {
job = RJob.newJob(
id,
ConvertAllPdfArgs.taskName,
account.collective,
collective.getOrElse(DocspellSystem.taskGroup),
ConvertAllPdfArgs(collective),
s"Convert all pdfs not yet converted",
now,
account.user,
submitter.getOrElse(DocspellSystem.user),
prio,
collective
.map(c => c / ConvertAllPdfArgs.taskName)

View File

@ -24,7 +24,7 @@ import org.log4s.getLogger
trait OItem[F[_]] {
/** Sets the given tags (removing all existing ones). */
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[UpdateResult]
def setTags(item: Ident, tagIds: List[String], collective: Ident): F[UpdateResult]
/** Sets tags for multiple items. The tags of the items will be
* replaced with the given ones. Same as `setTags` but for multiple
@ -32,7 +32,7 @@ trait OItem[F[_]] {
*/
def setTagsMultipleItems(
items: NonEmptyList[Ident],
tags: List[Ident],
tags: List[String],
collective: Ident
): F[UpdateResult]
@ -181,7 +181,7 @@ trait OItem[F[_]] {
*/
def convertAllPdf(
collective: Option[Ident],
account: AccountId,
submitter: Option[Ident],
notifyJoex: Boolean
): F[UpdateResult]
@ -304,19 +304,20 @@ object OItem {
def setTags(
item: Ident,
tagIds: List[Ident],
tagIds: List[String],
collective: Ident
): F[UpdateResult] =
setTagsMultipleItems(NonEmptyList.of(item), tagIds, collective)
def setTagsMultipleItems(
items: NonEmptyList[Ident],
tags: List[Ident],
tags: List[String],
collective: Ident
): F[UpdateResult] =
UpdateResult.fromUpdate(store.transact(for {
k <- RTagItem.deleteItemTags(items, collective)
res <- items.traverse(i => RTagItem.setAllTags(i, tags))
k <- RTagItem.deleteItemTags(items, collective)
rtags <- RTag.findAllByNameOrId(tags, collective)
res <- items.traverse(i => RTagItem.setAllTags(i, rtags.map(_.tagId)))
n = res.fold
} yield k + n))
@ -687,11 +688,11 @@ object OItem {
def convertAllPdf(
collective: Option[Ident],
account: AccountId,
submitter: Option[Ident],
notifyJoex: Boolean
): F[UpdateResult] =
for {
job <- JobFactory.convertAllPdfs[F](collective, account, Priority.Low)
job <- JobFactory.convertAllPdfs[F](collective, submitter, Priority.Low)
_ <- queue.insertIfNew(job)
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
} yield UpdateResult.success

View File

@ -108,6 +108,11 @@ object Language {
val iso3 = "lav"
}
case object Japanese extends Language {
val iso2 = "ja"
val iso3 = "jpn"
}
val all: List[Language] =
List(
German,
@ -124,7 +129,8 @@ object Language {
Swedish,
Russian,
Romanian,
Latvian
Latvian,
Japanese
)
def fromString(str: String): Either[String, Language] = {

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.exception;
import org.xml.sax.SAXException;
public class WriteLimitReachedException extends SAXException {
//in case of (hopefully impossible) cyclic exception
private final static int MAX_DEPTH = 100;
private final int writeLimit;
public WriteLimitReachedException(int writeLimit) {
this.writeLimit = writeLimit;
}
@Override
public String getMessage() {
return "Your document contained more than " + writeLimit
+ " characters, and so your requested limit has been"
+ " reached. To receive the full text of the document,"
+ " increase your limit. (Text up to the limit is"
+ " however available).";
}
/**
* Checks whether the given exception (or any of it's root causes) was
* thrown by this handler as a signal of reaching the write limit.
*
* @param t throwable
* @return <code>true</code> if the write limit was reached,
* <code>false</code> otherwise
* @since Apache Tika 2.0
*/
public static boolean isWriteLimitReached(Throwable t) {
return isWriteLimitReached(t, 0);
}
private static boolean isWriteLimitReached(Throwable t, int depth) {
if (t == null) {
return false;
}
if (depth > MAX_DEPTH) {
return false;
}
if (t instanceof WriteLimitReachedException) {
return true;
} else {
return t.getCause() != null && isWriteLimitReached(t.getCause(), depth + 1);
}
}
public static void throwIfWriteLimitReached(Exception ex) throws SAXException {
throwIfWriteLimitReached(ex, 0);
}
private static void throwIfWriteLimitReached(Exception ex, int depth) throws SAXException {
if (ex == null) {
return;
}
if (depth > MAX_DEPTH) {
return;
}
if (ex instanceof WriteLimitReachedException) {
throw (SAXException) ex;
} else {
isWriteLimitReached(ex.getCause(), depth + 1);
}
}
}

View File

@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.utils.XMLReaderUtils;
/**
* Handler for macros in flat open documents
*/
class FlatOpenDocumentMacroHandler extends ContentHandlerDecorator {
static String MODULE = "module";
static String NAME = "name";
private static String SOURCE_CODE = "source-code";
private final ContentHandler contentHandler;
private final ParseContext parseContext;
private final StringBuilder macroBuffer = new StringBuilder();
String macroName = null;
boolean inMacro = false;
private EmbeddedDocumentExtractor embeddedDocumentExtractor;
FlatOpenDocumentMacroHandler(ContentHandler contentHandler, ParseContext parseContext) {
super(contentHandler);
this.contentHandler = contentHandler;
this.parseContext = parseContext;
}
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
throws SAXException {
if (MODULE.equals(localName)) {
macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
} else if (SOURCE_CODE.equals(localName)) {
inMacro = true;
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (inMacro) {
macroBuffer.append(ch, start, length);
}
}
@Override
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
if (SOURCE_CODE.equals(localName)) {
try {
handleMacro();
} catch (IOException e) {
throw new SAXException(e);
} finally {
resetMacroState();
}
}
}
protected void resetMacroState() {
macroBuffer.setLength(0);
macroName = null;
inMacro = false;
}
protected void handleMacro() throws IOException, SAXException {
byte[] bytes = macroBuffer.toString().getBytes(StandardCharsets.UTF_8);
if (embeddedDocumentExtractor == null) {
embeddedDocumentExtractor =
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
}
Metadata embeddedMetadata = new Metadata();
if (!isBlank(macroName)) {
embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, macroName);
}
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
try (InputStream is = TikaInputStream.get(bytes)) {
embeddedDocumentExtractor
.parseEmbedded(is, contentHandler, embeddedMetadata, false);
}
}
}
private static boolean isBlank(String s) {
return s == null || s.trim().isEmpty();
}
}

View File

@ -1,31 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
import org.apache.tika.sax.ContentHandlerDecorator;
/**
* Content handler decorator that:<ul>
@ -35,14 +36,11 @@ import java.util.Locale;
*/
public class NSNormalizerContentHandler extends ContentHandlerDecorator {
private static final String OLD_NS =
"http://openoffice.org/2000/";
private static final String OLD_NS = "http://openoffice.org/2000/";
private static final String NEW_NS =
"urn:oasis:names:tc:opendocument:xmlns:";
private static final String NEW_NS = "urn:oasis:names:tc:opendocument:xmlns:";
private static final String DTD_PUBLIC_ID =
"-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
private static final String DTD_PUBLIC_ID = "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
public NSNormalizerContentHandler(ContentHandler handler) {
super(handler);
@ -57,27 +55,24 @@ public class NSNormalizerContentHandler extends ContentHandlerDecorator {
}
@Override
public void startElement(
String namespaceURI, String localName, String qName,
Attributes atts) throws SAXException {
public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
throws SAXException {
AttributesImpl natts = new AttributesImpl();
for (int i = 0; i < atts.getLength(); i++) {
natts.addAttribute(
mapOldNS(atts.getURI(i)), atts.getLocalName(i),
atts.getQName(i), atts.getType(i), atts.getValue(i));
natts.addAttribute(mapOldNS(atts.getURI(i)), atts.getLocalName(i), atts.getQName(i),
atts.getType(i), atts.getValue(i));
}
super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
}
@Override
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
throws SAXException {
super.endElement(mapOldNS(namespaceURI), localName, qName);
}
@Override
public void startPrefixMapping(String prefix, String uri)
throws SAXException {
public void startPrefixMapping(String prefix, String uri) throws SAXException {
super.startPrefixMapping(prefix, mapOldNS(uri));
}
@ -87,9 +82,9 @@ public class NSNormalizerContentHandler extends ContentHandlerDecorator {
*/
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws IOException, SAXException {
if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
|| DTD_PUBLIC_ID.equals(publicId)) {
throws IOException, SAXException {
if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd")) ||
DTD_PUBLIC_ID.equals(publicId)) {
return new InputSource(new StringReader(""));
} else {
return super.resolveEntity(publicId, systemId);

View File

@ -0,0 +1,564 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import javax.xml.namespace.QName;
import org.apache.commons.codec.binary.Base64;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ElementMappingContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.StringUtils;
/*
Handler for the body element or odt flat files and content.xml of
traditional compressed odt files
*/
class OpenDocumentBodyHandler extends ElementMappingContentHandler {
public static final String TEXT_NS = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
public static final String TABLE_NS = "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
public static final String STYLE_NS = "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
public static final String FORMATTING_OBJECTS_NS =
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
public static final String OFFICE_NS = "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
public static final String SVG_NS = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
public static final String PRESENTATION_NS =
"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
public static final String DRAW_NS = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
protected static final char[] TAB = new char[]{'\t'};
private static final String BINARY_DATA = "binary-data";
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
/**
* Mappings between ODF tag names and XHTML tag names
* (including attributes). All other tag names/attributes are ignored
* and left out from event stream.
*/
private static final HashMap<QName, TargetElement> MAPPINGS =
new HashMap<>();
private static final char[] SPACE = new char[]{' '};
private static final String CLASS = "class";
private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation");
private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note");
private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes");
static {
// general mappings of text:-tags
MAPPINGS.put(new QName(TEXT_NS, "p"), new TargetElement(XHTML, "p"));
// text:h-tags are mapped specifically in startElement/endElement
MAPPINGS.put(new QName(TEXT_NS, "line-break"), new TargetElement(XHTML, "br"));
MAPPINGS.put(new QName(TEXT_NS, "list-item"), new TargetElement(XHTML, "li"));
MAPPINGS.put(new QName(TEXT_NS, "note"), new TargetElement(XHTML, "span"));
MAPPINGS.put(new QName(OFFICE_NS, "annotation"), new TargetElement(XHTML,
"span"));
MAPPINGS.put(new QName(PRESENTATION_NS, "notes"), new TargetElement(XHTML,
"span"));
MAPPINGS.put(new QName(DRAW_NS, "object"), new TargetElement(XHTML,
"object"));
MAPPINGS.put(new QName(DRAW_NS, "text-box"), new TargetElement(XHTML, "div"));
MAPPINGS.put(new QName(SVG_NS, "title"), new TargetElement(XHTML, "span"));
MAPPINGS.put(new QName(SVG_NS, "desc"), new TargetElement(XHTML, "span"));
MAPPINGS.put(new QName(TEXT_NS, "span"), new TargetElement(XHTML, "span"));
final HashMap<QName, QName> aAttsMapping = new HashMap<>();
aAttsMapping.put(new QName(XLINK_NS, "href"), new QName("href"));
aAttsMapping.put(new QName(XLINK_NS, "title"), new QName("title"));
MAPPINGS.put(new QName(TEXT_NS, "a"), new TargetElement(XHTML, "a",
aAttsMapping));
MAPPINGS.put(new QName(DRAW_NS, "a"), new TargetElement(XHTML, "a",
aAttsMapping));
// create HTML tables from table:-tags
MAPPINGS.put(new QName(TABLE_NS, "table"), new TargetElement(XHTML, "table"));
// repeating of rows is ignored; for columns, see below!
MAPPINGS.put(new QName(TABLE_NS, "table-row"), new TargetElement(XHTML, "tr"));
// special mapping for rowspan/colspan attributes
final HashMap<QName, QName> tableCellAttsMapping = new HashMap<>();
tableCellAttsMapping
.put(new QName(TABLE_NS, "number-columns-spanned"), new QName("colspan"));
tableCellAttsMapping.put(new QName(TABLE_NS, "number-rows-spanned"), new QName("rowspan"));
/* TODO: The following is not correct, the cell should be repeated not spanned!
* Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
* Problems may occur when both spanning and repeating is given, which is not allowed by
* spec.
* Cell spanning instead of repeating is not a problem, because OpenOffice uses it
* only for empty cells.
*/
tableCellAttsMapping
.put(new QName(TABLE_NS, "number-columns-repeated"), new QName("colspan"));
MAPPINGS.put(new QName(TABLE_NS, "table-cell"),
new TargetElement(XHTML, "td", tableCellAttsMapping));
}
private final ContentHandler handler;
private final ParseContext parseContext;
private final BitSet textNodeStack = new BitSet();
//have we written the start style tags
//yet for the current text style
boolean hasWrittenStartStyleTags = false;
//if we're in a binary-data tag
boolean inBinaryData = false;
private EmbeddedDocumentExtractor embeddedDocumentExtractor;
private StringBuilder base64BinaryDataBuffer = new StringBuilder();
private int nodeDepth = 0;
private int completelyFiltered = 0;
private Stack<String> headingStack = new Stack<>();
private Map<String, TextStyle> paragraphTextStyleMap = new HashMap<>();
private Map<String, TextStyle> textStyleMap = new HashMap<>();
private Map<String, ListStyle> listStyleMap = new HashMap<>();
private String currParagraphStyleName; //paragraph style name
private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs
private String currTextStyleName;
private Stack<ListStyle> listStyleStack = new Stack<>();
private ListStyle listStyle;
// True if we are currently in the named style:
private boolean curUnderlined;
private boolean curBold;
private boolean curItalic;
private int pDepth = 0;
OpenDocumentBodyHandler(ContentHandler handler, ParseContext parseContext) {
super(handler, MAPPINGS);
this.handler = handler;
this.parseContext = parseContext;
}
private static Attributes buildAttributes(String key, String value) {
AttributesImpl attrs = new AttributesImpl();
attrs.addAttribute("", key, key, "CDATA", value);
return attrs;
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (inBinaryData) {
base64BinaryDataBuffer.append(ch, start, length);
return;
}
// only forward content of tags from text:-namespace
if (completelyFiltered == 0 && nodeDepth > 0 && textNodeStack.get(nodeDepth - 1)) {
if (!hasWrittenStartStyleTags) {
updateStyleTags();
hasWrittenStartStyleTags = true;
}
super.characters(ch, start, length);
}
}
// helper for checking tags which need complete filtering
// (with sub-tags)
private boolean needsCompleteFiltering(String namespaceURI, String localName) {
if (TEXT_NS.equals(namespaceURI)) {
return localName.endsWith("-template") || localName.endsWith("-style");
}
return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
}
//<p> can appear inside comments and other things that are already inside <p>
//we need to track our pDepth and only output <p> if we're at the main level
// map the heading level to <hX> HTML tags
private String getXHTMLHeaderTagName(Attributes atts) {
String depthStr = atts.getValue(TEXT_NS, "outline-level");
if (depthStr == null) {
return "h1";
}
int depth = Integer.parseInt(depthStr);
if (depth >= 6) {
return "h6";
} else if (depth <= 1) {
return "h1";
} else {
return "h" + depth;
}
}
/**
* Check if a node is a text node
*/
private boolean isTextNode(String namespaceURI, String localName) {
if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") &&
!localName.equals("page-count")) {
return true;
}
if (SVG_NS.equals(namespaceURI)) {
return "title".equals(localName) || "desc".equals(localName);
}
return false;
}
private void startList(String name) throws SAXException {
String elementName = "ul";
if (name != null) {
ListStyle style = listStyleMap.get(name);
elementName = style != null ? style.getTag() : "ul";
listStyleStack.push(style);
}
handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
}
private void endList() throws SAXException {
String elementName = "ul";
if (!listStyleStack.isEmpty()) {
ListStyle style = listStyleStack.pop();
elementName = style != null ? style.getTag() : "ul";
}
handler.endElement(XHTML, elementName, elementName);
}
private void startSpan(String name) throws SAXException {
if (name == null) {
return;
}
currTextStyle = textStyleMap.get(name);
hasWrittenStartStyleTags = false;
}
private void startParagraph(String styleName) throws SAXException {
if (pDepth == 0) {
handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES);
if (styleName != null) {
currTextStyle = paragraphTextStyleMap.get(styleName);
}
hasWrittenStartStyleTags = false;
} else {
handler.characters(SPACE, 0, SPACE.length);
}
pDepth++;
}
private void endParagraph() throws SAXException {
closeStyleTags();
if (pDepth == 1) {
handler.endElement(XHTML, "p", "p");
} else {
handler.characters(SPACE, 0, SPACE.length);
}
pDepth--;
}
private void updateStyleTags() throws SAXException {
if (currTextStyle == null) {
closeStyleTags();
return;
}
if (currTextStyle.bold != curBold) {
// Enforce nesting -- must close s and i tags
if (curUnderlined) {
handler.endElement(XHTML, "u", "u");
curUnderlined = false;
}
if (curItalic) {
handler.endElement(XHTML, "i", "i");
curItalic = false;
}
if (currTextStyle.bold) {
handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "b", "b");
}
curBold = currTextStyle.bold;
}
if (currTextStyle.italic != curItalic) {
// Enforce nesting -- must close s tag
if (curUnderlined) {
handler.endElement(XHTML, "u", "u");
curUnderlined = false;
}
if (currTextStyle.italic) {
handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "i", "i");
}
curItalic = currTextStyle.italic;
}
if (currTextStyle.underlined != curUnderlined) {
if (currTextStyle.underlined) {
handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "u", "u");
}
curUnderlined = currTextStyle.underlined;
}
}
private void endSpan() throws SAXException {
updateStyleTags();
}
private void closeStyleTags() throws SAXException {
// Close any still open style tags
if (curUnderlined) {
handler.endElement(XHTML, "u", "u");
curUnderlined = false;
}
if (curItalic) {
handler.endElement(XHTML, "i", "i");
curItalic = false;
}
if (curBold) {
handler.endElement(XHTML, "b", "b");
curBold = false;
}
currTextStyle = null;
hasWrittenStartStyleTags = false;
}
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
throws SAXException {
if (DRAW_NS.equals(namespaceURI) && "image".equals(localName)) {
String link = attrs.getValue(XLINK_NS, "href");
AttributesImpl attr = new AttributesImpl();
if (!StringUtils.isEmpty(link)) {
attr.addAttribute("", "src", "src", "CDATA", "embedded:" + link);
}
handler.startElement(XHTMLContentHandler.XHTML, "img", "img", attr);
handler.endElement(XHTMLContentHandler.XHTML, "img", "img");
}
if (BINARY_DATA.equals(localName)) {
inBinaryData = true;
return;
}
// keep track of current node type. If it is a text node,
// a bit at the current depth its set in textNodeStack.
// characters() checks the top bit to determine, if the
// actual node is a text node to print out nodeDepth contains
// the depth of the current node and also marks top of stack.
assert nodeDepth >= 0;
// Set styles
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
String family = attrs.getValue(STYLE_NS, "family");
if ("text".equals(family)) {
currTextStyle = new TextStyle();
currTextStyleName = attrs.getValue(STYLE_NS, "name");
} else if ("paragraph".equals(family)) {
currTextStyle = new TextStyle();
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
}
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
listStyle = new ListStyle();
String name = attrs.getValue(STYLE_NS, "name");
listStyleMap.put(name, listStyle);
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI) &&
"text-properties".equals(localName)) {
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
currTextStyle.italic = true;
}
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight) ||
(fontWeight != null && Character.isDigit(fontWeight.charAt(0)) &&
Integer.parseInt(fontWeight) > 500)) {
currTextStyle.bold = true;
}
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
if (underlineStyle != null && !underlineStyle.equals("none")) {
currTextStyle.underlined = true;
}
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
if ("list-level-style-bullet".equals(localName)) {
listStyle.ordered = false;
} else if ("list-level-style-number".equals(localName)) {
listStyle.ordered = true;
}
}
textNodeStack.set(nodeDepth++, isTextNode(namespaceURI, localName));
// filter *all* content of some tags
assert completelyFiltered >= 0;
if (needsCompleteFiltering(namespaceURI, localName)) {
completelyFiltered++;
}
// call next handler if no filtering
if (completelyFiltered == 0) {
// special handling of text:h, that are directly passed
// to incoming handler
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
startList(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
startSpan(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
handler.characters(SPACE, 0, 1);
} else if ("annotation".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES);
} else if ("note".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES);
} else if ("notes".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES);
} else {
super.startElement(namespaceURI, localName, qName, attrs);
}
}
}
@Override
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
if (BINARY_DATA.equals(localName)) {
inBinaryData = false;
try {
processBinaryData();
} catch (IOException e) {
throw new SAXException(e);
}
return;
}
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
if (currTextStyle != null && currTextStyleName != null) {
textStyleMap.put(currTextStyleName, currTextStyle);
currTextStyleName = null;
currTextStyle = null;
} else if (currTextStyle != null && currParagraphStyleName != null) {
paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle);
currParagraphStyleName = null;
currTextStyle = null;
}
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
listStyle = null;
}
// call next handler if no filtering
if (completelyFiltered == 0) {
// special handling of text:h, that are directly passed
// to incoming handler
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
final String el = headingStack.pop();
handler.endElement(namespaceURI, el, el);
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
endList();
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
currTextStyle = null;
hasWrittenStartStyleTags = false;
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
endParagraph();
} else if ("annotation".equals(localName) || "note".equals(localName) ||
"notes".equals(localName)) {
closeStyleTags();
handler.endElement(namespaceURI, localName, localName);
} else {
super.endElement(namespaceURI, localName, qName);
}
// special handling of tabulators
if (TEXT_NS.equals(namespaceURI) &&
("tab-stop".equals(localName) || "tab".equals(localName))) {
this.characters(TAB, 0, TAB.length);
}
}
// revert filter for *all* content of some tags
if (needsCompleteFiltering(namespaceURI, localName)) {
completelyFiltered--;
}
assert completelyFiltered >= 0;
// reduce current node depth
nodeDepth--;
assert nodeDepth >= 0;
}
private void processBinaryData() throws IOException, SAXException {
//TODO: figure out whether we're in an inline image or a regular
//attachment and add that info to the embedded metadata
byte[] bytes = Base64.decodeBase64(base64BinaryDataBuffer.toString());
//clear state before parsing
base64BinaryDataBuffer.setLength(0);
inBinaryData = false;
if (embeddedDocumentExtractor == null) {
embeddedDocumentExtractor =
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
}
Metadata embeddedMetadata = new Metadata();
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
try (InputStream is = TikaInputStream.get(bytes)) {
embeddedDocumentExtractor.parseEmbedded(is, handler, embeddedMetadata, false);
}
}
}
@Override
public void startPrefixMapping(String prefix, String uri) {
// remove prefix mappings as they should not occur in XHTML
}
@Override
public void endPrefixMapping(String prefix) {
// remove prefix mappings as they should not occur in XHTML
}
private interface Style {
}
private static class TextStyle implements Style {
public boolean italic;
public boolean bold;
public boolean underlined;
@Override
public String toString() {
return "TextStyle{" + "italic=" + italic + ", bold=" + bold + ", underlined=" +
underlined + '}';
}
}
private static class ListStyle implements Style {
public boolean ordered;
public String getTag() {
return ordered ? "ol" : "ul";
}
}
}

View File

@ -16,591 +16,47 @@
*/
package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ElementMappingContentHandler;
import org.apache.tika.sax.ElementMappingContentHandler.TargetElement;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.namespace.QName;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
/**
* Parser for ODF <code>content.xml</code> files.
*/
public class OpenDocumentContentParser extends AbstractParser {
private interface Style {
}
private static class TextStyle implements Style {
public boolean italic;
public boolean bold;
public boolean underlined;
@Override
public String toString() {
return "TextStyle{" +
"italic=" + italic +
", bold=" + bold +
", underlined=" + underlined +
'}';
}
}
private static class ListStyle implements Style {
public boolean ordered;
public String getTag() {
return ordered ? "ol" : "ul";
}
}
private static final class OpenDocumentElementMappingContentHandler extends
ElementMappingContentHandler {
private static final char[] SPACE = new char[]{ ' '};
private static final String CLASS = "class";
private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation");
private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note");
private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes");
private static Attributes buildAttributes(String key, String value) {
AttributesImpl attrs = new AttributesImpl();
attrs.addAttribute("", key, key, "CDATA", value);
return attrs;
}
private final ContentHandler handler;
private final BitSet textNodeStack = new BitSet();
private int nodeDepth = 0;
private int completelyFiltered = 0;
private Stack<String> headingStack = new Stack<String>();
private Map<String, TextStyle> paragraphTextStyleMap = new HashMap<String, TextStyle>();
private Map<String, TextStyle> textStyleMap = new HashMap<String, TextStyle>();
private Map<String, ListStyle> listStyleMap = new HashMap<String, ListStyle>();
private String currParagraphStyleName; //paragraph style name
private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs
private String currTextStyleName;
private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
private ListStyle listStyle;
// True if we are currently in the named style:
private boolean curUnderlined;
private boolean curBold;
private boolean curItalic;
//have we written the start style tags
//yet for the current text style
boolean hasWrittenStartStyleTags = false;
private int pDepth = 0; //<p> can appear inside comments and other things that are already inside <p>
//we need to track our pDepth and only output <p> if we're at the main level
private OpenDocumentElementMappingContentHandler(ContentHandler handler,
Map<QName, TargetElement> mappings) {
super(handler, mappings);
this.handler = handler;
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
// only forward content of tags from text:-namespace
if (completelyFiltered == 0 && nodeDepth > 0
&& textNodeStack.get(nodeDepth - 1)) {
if (!hasWrittenStartStyleTags) {
updateStyleTags();
hasWrittenStartStyleTags = true;
}
super.characters(ch, start, length);
}
}
// helper for checking tags which need complete filtering
// (with sub-tags)
private boolean needsCompleteFiltering(
String namespaceURI, String localName) {
if (TEXT_NS.equals(namespaceURI)) {
return localName.endsWith("-template")
|| localName.endsWith("-style");
}
return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
}
// map the heading level to <hX> HTML tags
private String getXHTMLHeaderTagName(Attributes atts) {
String depthStr = atts.getValue(TEXT_NS, "outline-level");
if (depthStr == null) {
return "h1";
}
int depth = Integer.parseInt(depthStr);
if (depth >= 6) {
return "h6";
} else if (depth <= 1) {
return "h1";
} else {
return "h" + depth;
}
}
/**
* Check if a node is a text node
*/
private boolean isTextNode(String namespaceURI, String localName) {
if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) {
return true;
}
if (SVG_NS.equals(namespaceURI)) {
return "title".equals(localName) ||
"desc".equals(localName);
}
return false;
}
private void startList(String name) throws SAXException {
String elementName = "ul";
if (name != null) {
ListStyle style = listStyleMap.get(name);
elementName = style != null ? style.getTag() : "ul";
listStyleStack.push(style);
}
handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
}
private void endList() throws SAXException {
String elementName = "ul";
if (!listStyleStack.isEmpty()) {
ListStyle style = listStyleStack.pop();
elementName = style != null ? style.getTag() : "ul";
}
handler.endElement(XHTML, elementName, elementName);
}
private void startSpan(String name) throws SAXException {
if (name == null) {
return;
}
currTextStyle = textStyleMap.get(name);
hasWrittenStartStyleTags = false;
}
private void startParagraph(String styleName) throws SAXException {
if (pDepth == 0) {
handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES);
if (styleName != null) {
currTextStyle = paragraphTextStyleMap.get(styleName);
}
hasWrittenStartStyleTags = false;
} else {
handler.characters(SPACE, 0, SPACE.length);
}
pDepth++;
}
private void endParagraph() throws SAXException {
closeStyleTags();
if (pDepth == 1) {
handler.endElement(XHTML, "p", "p");
} else {
handler.characters(SPACE, 0, SPACE.length);
}
pDepth--;
}
private void updateStyleTags() throws SAXException {
if (currTextStyle == null) {
closeStyleTags();
return;
}
if (currTextStyle.bold != curBold) {
// Enforce nesting -- must close s and i tags
if (curUnderlined) {
handler.endElement(XHTML, "u", "u");
curUnderlined = false;
}
if (curItalic) {
handler.endElement(XHTML, "i", "i");
curItalic = false;
}
if (currTextStyle.bold) {
handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "b", "b");
}
curBold = currTextStyle.bold;
}
if (currTextStyle.italic != curItalic) {
// Enforce nesting -- must close s tag
if (curUnderlined) {
handler.endElement(XHTML, "u", "u");
curUnderlined = false;
}
if (currTextStyle.italic) {
handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "i", "i");
}
curItalic = currTextStyle.italic;
}
if (currTextStyle.underlined != curUnderlined) {
if (currTextStyle.underlined) {
handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
} else {
handler.endElement(XHTML, "u", "u");
}
curUnderlined = currTextStyle.underlined;
}
}
private void endSpan() throws SAXException {
updateStyleTags();
}
private void closeStyleTags() throws SAXException {
// Close any still open style tags
if (curUnderlined) {
handler.endElement(XHTML,"u", "u");
curUnderlined = false;
}
if (curItalic) {
handler.endElement(XHTML,"i", "i");
curItalic = false;
}
if (curBold) {
handler.endElement(XHTML,"b", "b");
curBold = false;
}
currTextStyle = null;
hasWrittenStartStyleTags = false;
}
@Override
public void startElement(
String namespaceURI, String localName, String qName,
Attributes attrs) throws SAXException {
// keep track of current node type. If it is a text node,
// a bit at the current depth its set in textNodeStack.
// characters() checks the top bit to determine, if the
// actual node is a text node to print out nodeDepth contains
// the depth of the current node and also marks top of stack.
assert nodeDepth >= 0;
// Set styles
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
String family = attrs.getValue(STYLE_NS, "family");
if ("text".equals(family)) {
currTextStyle = new TextStyle();
currTextStyleName = attrs.getValue(STYLE_NS, "name");
} else if ("paragraph".equals(family)) {
currTextStyle = new TextStyle();
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
}
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
listStyle = new ListStyle();
String name = attrs.getValue(STYLE_NS, "name");
listStyleMap.put(name, listStyle);
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI)
&& "text-properties".equals(localName)) {
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
currTextStyle.italic = true;
}
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
|| (fontWeight != null && Character.isDigit(fontWeight.charAt(0))
&& Integer.valueOf(fontWeight) > 500)) {
currTextStyle.bold = true;
}
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
if (underlineStyle != null && !underlineStyle.equals("none")) {
currTextStyle.underlined = true;
}
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
if ("list-level-style-bullet".equals(localName)) {
listStyle.ordered = false;
} else if ("list-level-style-number".equals(localName)) {
listStyle.ordered = true;
}
}
textNodeStack.set(nodeDepth++,
isTextNode(namespaceURI, localName));
// filter *all* content of some tags
assert completelyFiltered >= 0;
if (needsCompleteFiltering(namespaceURI, localName)) {
completelyFiltered++;
}
// call next handler if no filtering
if (completelyFiltered == 0) {
// special handling of text:h, that are directly passed
// to incoming handler
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
startList(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
startSpan(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
handler.characters(SPACE, 0, 1);
} else if ("annotation".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES);
} else if ("note".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES);
} else if ("notes".equals(localName)) {
closeStyleTags();
handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES);
} else {
super.startElement(namespaceURI, localName, qName, attrs);
}
}
}
@Override
public void endElement(
String namespaceURI, String localName, String qName)
throws SAXException {
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
if (currTextStyle != null && currTextStyleName != null) {
textStyleMap.put(currTextStyleName, currTextStyle);
currTextStyleName = null;
currTextStyle = null;
} else if (currTextStyle != null && currParagraphStyleName != null) {
paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle);
currParagraphStyleName = null;
currTextStyle = null;
}
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
listStyle = null;
}
// call next handler if no filtering
if (completelyFiltered == 0) {
// special handling of text:h, that are directly passed
// to incoming handler
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
final String el = headingStack.pop();
handler.endElement(XHTMLContentHandler.XHTML, el, el);
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
endList();
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
currTextStyle = null;
hasWrittenStartStyleTags = false;
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
endParagraph();
} else if ("annotation".equals(localName) || "note".equals(localName) ||
"notes".equals(localName)) {
closeStyleTags();
handler.endElement("", localName, localName);
} else {
super.endElement(namespaceURI, localName, qName);
}
// special handling of tabulators
if (TEXT_NS.equals(namespaceURI)
&& ("tab-stop".equals(localName)
|| "tab".equals(localName))) {
this.characters(TAB, 0, TAB.length);
}
}
// revert filter for *all* content of some tags
if (needsCompleteFiltering(namespaceURI, localName)) {
completelyFiltered--;
}
assert completelyFiltered >= 0;
// reduce current node depth
nodeDepth--;
assert nodeDepth >= 0;
}
@Override
public void startPrefixMapping(String prefix, String uri) {
// remove prefix mappings as they should not occur in XHTML
}
@Override
public void endPrefixMapping(String prefix) {
// remove prefix mappings as they should not occur in XHTML
}
}
public static final String TEXT_NS =
"urn:oasis:names:tc:opendocument:xmlns:text:1.0";
public static final String TABLE_NS =
"urn:oasis:names:tc:opendocument:xmlns:table:1.0";
public static final String STYLE_NS =
"urn:oasis:names:tc:opendocument:xmlns:style:1.0";
public static final String FORMATTING_OBJECTS_NS =
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
public static final String OFFICE_NS =
"urn:oasis:names:tc:opendocument:xmlns:office:1.0";
public static final String SVG_NS =
"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
public static final String PRESENTATION_NS =
"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
public static final String DRAW_NS =
"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
protected static final char[] TAB = new char[]{'\t'};
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
/**
* Mappings between ODF tag names and XHTML tag names
* (including attributes). All other tag names/attributes are ignored
* and left out from event stream.
*/
private static final HashMap<QName, TargetElement> MAPPINGS =
new HashMap<QName, TargetElement>();
static {
// general mappings of text:-tags
MAPPINGS.put(
new QName(TEXT_NS, "p"),
new TargetElement(XHTML, "p"));
// text:h-tags are mapped specifically in startElement/endElement
MAPPINGS.put(
new QName(TEXT_NS, "line-break"),
new TargetElement(XHTML, "br"));
MAPPINGS.put(
new QName(TEXT_NS, "list-item"),
new TargetElement(XHTML, "li"));
MAPPINGS.put(
new QName(TEXT_NS, "note"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
new QName(OFFICE_NS, "annotation"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
new QName(PRESENTATION_NS, "notes"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
new QName(DRAW_NS, "object"),
new TargetElement(XHTML, "object"));
MAPPINGS.put(
new QName(DRAW_NS, "text-box"),
new TargetElement(XHTML, "div"));
MAPPINGS.put(
new QName(SVG_NS, "title"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
new QName(SVG_NS, "desc"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
new QName(TEXT_NS, "span"),
new TargetElement(XHTML, "span"));
final HashMap<QName, QName> aAttsMapping =
new HashMap<QName, QName>();
aAttsMapping.put(
new QName(XLINK_NS, "href"),
new QName("href"));
aAttsMapping.put(
new QName(XLINK_NS, "title"),
new QName("title"));
MAPPINGS.put(
new QName(TEXT_NS, "a"),
new TargetElement(XHTML, "a", aAttsMapping));
// create HTML tables from table:-tags
MAPPINGS.put(
new QName(TABLE_NS, "table"),
new TargetElement(XHTML, "table"));
// repeating of rows is ignored; for columns, see below!
MAPPINGS.put(
new QName(TABLE_NS, "table-row"),
new TargetElement(XHTML, "tr"));
// special mapping for rowspan/colspan attributes
final HashMap<QName, QName> tableCellAttsMapping =
new HashMap<QName, QName>();
tableCellAttsMapping.put(
new QName(TABLE_NS, "number-columns-spanned"),
new QName("colspan"));
tableCellAttsMapping.put(
new QName(TABLE_NS, "number-rows-spanned"),
new QName("rowspan"));
/* TODO: The following is not correct, the cell should be repeated not spanned!
* Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
* Problems may occur when both spanning and repeating is given, which is not allowed by spec.
* Cell spanning instead of repeating is not a problem, because OpenOffice uses it
* only for empty cells.
*/
tableCellAttsMapping.put(
new QName(TABLE_NS, "number-columns-repeated"),
new QName("colspan"));
MAPPINGS.put(
new QName(TABLE_NS, "table-cell"),
new TargetElement(XHTML, "td", tableCellAttsMapping));
}
public Set<MediaType> getSupportedTypes(ParseContext context) {
return Collections.emptySet(); // not a top-level parser
}
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
parseInternal(stream,
new XHTMLContentHandler(handler, metadata),
metadata, context);
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
parseInternal(stream, new XHTMLContentHandler(handler, metadata), metadata, context);
}
void parseInternal(
InputStream stream, final ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
void parseInternal(InputStream stream, final ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
DefaultHandler dh = new OpenDocumentBodyHandler(handler, context);
XMLReaderUtils.parseSAX(
new CloseShieldInputStream(stream),
new OfflineContentHandler(
new NSNormalizerContentHandler(dh)),
context);
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream),
new OfflineContentHandler(new NSNormalizerContentHandler(dh)), context);
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
import java.io.IOException;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.utils.XMLReaderUtils;
class OpenDocumentMacroHandler extends FlatOpenDocumentMacroHandler {
OpenDocumentMacroHandler(ContentHandler contentHandler, ParseContext parseContext) {
super(contentHandler, parseContext);
}
@Override
public void startElement(String namespaceURI, String localName, String qName, Attributes attrs)
throws SAXException {
//in the compressed odf, there should only be one element in this file.
if (MODULE.equalsIgnoreCase(localName)) {
inMacro = true;
macroName = XMLReaderUtils.getAttrValue(NAME, attrs);
}
}
@Override
public void endElement(String namespaceURI, String localName, String qName)
throws SAXException {
if (MODULE.equals(localName)) {
try {
handleMacro();
} catch (IOException e) {
throw new SAXException(e);
} finally {
//this shouldn't be necessary in the compressed odf files
resetMacroState();
}
}
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.odf;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.sax.ContentHandlerDecorator;
/**
* For now, this only looks for any encryption-data elements.
* If found this will throw an EncryptedDocumentException wrapped
* in a SAXException.
*
* If desired, we can add to this to actually extract information
* necessary for decryption. Please open an issue or pull
* request for this added functionality.
*
*/
class OpenDocumentManifestHandler extends ContentHandlerDecorator {
@Override
public void startElement(
String namespaceURI, String localName, String qName,
Attributes attrs) throws SAXException {
if (localName.equals("encryption-data")) {
throw new SAXException(new EncryptedDocumentException());
}
}
}

View File

@ -16,12 +16,21 @@
*/
package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
@ -36,11 +45,6 @@ import org.apache.tika.sax.xpath.CompositeMatcher;
import org.apache.tika.sax.xpath.Matcher;
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
/**
* Parser for OpenDocument <code>meta.xml</code> files.
@ -54,68 +58,54 @@ public class OpenDocumentMetaParser extends XMLParser {
private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
/**
* @see OfficeOpenXMLCore#SUBJECT
* @deprecated use OfficeOpenXMLCore#SUBJECT
*/
@Deprecated
private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
Property.composite(Office.INITIAL_AUTHOR,
new Property[]{Property.externalText("initial-creator")});
private static ContentHandler getDublinCoreHandler(
Metadata metadata, Property property, String element) {
return new ElementMetadataHandler(
DublinCore.NAMESPACE_URI_DC, element,
metadata, property);
private static ContentHandler getDublinCoreHandler(Metadata metadata, Property property,
String element) {
return new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, element, metadata, property);
}
private static ContentHandler getMeta(
ContentHandler ch, Metadata md, Property property, String element) {
Matcher matcher = new CompositeMatcher(
META_XPATH.parse("//meta:" + element),
META_XPATH.parse("//meta:" + element + "//text()"));
private static ContentHandler getMeta(ContentHandler ch, Metadata md, Property property,
String element) {
Matcher matcher = new CompositeMatcher(META_XPATH.parse("//meta:" + element),
META_XPATH.parse("//meta:" + element + "//text()"));
ContentHandler branch =
new MatchingContentHandler(new MetadataHandler(md, property), matcher);
new MatchingContentHandler(new MetadataHandler(md, property), matcher);
return new TeeContentHandler(ch, branch);
}
private static ContentHandler getUserDefined(
ContentHandler ch, Metadata md) {
Matcher matcher = new CompositeMatcher(
META_XPATH.parse("//meta:user-defined/@meta:name"),
META_XPATH.parse("//meta:user-defined//text()"));
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
private static ContentHandler getUserDefined(ContentHandler ch, Metadata md) {
Matcher matcher = new CompositeMatcher(META_XPATH.parse("//meta:user-defined/@meta:name"),
META_XPATH.parse("//meta:user-defined//text()"));
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes
// custom:Info1=Text1
ContentHandler branch = new MatchingContentHandler(
new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
matcher);
new AttributeDependantMetadataHandler(md, "meta:name",
Office.USER_DEFINED_METADATA_NAME_PREFIX), matcher);
return new TeeContentHandler(ch, branch);
}
@Deprecated
private static ContentHandler getStatistic(
ContentHandler ch, Metadata md, String name, String attribute) {
Matcher matcher =
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
private static ContentHandler getStatistic(ContentHandler ch, Metadata md, String name,
String attribute) {
Matcher matcher = META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
ContentHandler branch = new MatchingContentHandler(
new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
return new TeeContentHandler(ch, branch);
}
private static ContentHandler getStatistic(
ContentHandler ch, Metadata md, Property property, String attribute) {
Matcher matcher =
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
private static ContentHandler getStatistic(ContentHandler ch, Metadata md, Property property,
String attribute) {
Matcher matcher = META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
ContentHandler branch = new MatchingContentHandler(
new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
return new TeeContentHandler(ch, branch);
}
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
static ContentHandler getContentHandler(Metadata md, ParseContext context,
ContentHandler... handlers) {
// We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
// Process the Dublin Core Attributes
ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
ContentHandler ch =
new TeeContentHandler(getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
@ -129,19 +119,20 @@ public class OpenDocumentMetaParser extends XMLParser {
// Process the OO Meta Attributes
ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
// ODF uses dc:date for modified
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
DublinCore.NAMESPACE_URI_DC, "date",
md, TikaCoreProperties.MODIFIED));
ch = new TeeContentHandler(ch,
new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, "date", md,
TikaCoreProperties.MODIFIED));
// ODF uses dc:subject for description
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
DublinCore.NAMESPACE_URI_DC, "subject",
md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
ch = new TeeContentHandler(ch,
new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, "subject", md,
OfficeOpenXMLCore.SUBJECT));
ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
ch = getMeta(ch, md, Office.KEYWORDS, "keyword");
ch = getMeta(ch, md, OfficeOpenXMLExtended.TOTAL_TIME, "editing-duration");
ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
ch = getMeta(ch, md, TikaCoreProperties.CREATOR, "initial-creator");
ch = getMeta(ch, md, Property.externalText("generator"), "generator");
// Process the user defined Meta Attributes
@ -157,43 +148,48 @@ public class OpenDocumentMetaParser extends XMLParser {
ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
// Legacy, Tika-1.0 style attributes
// TODO Remove these in Tika 2.0
ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
// Legacy Statistics Attributes, replaced with real keys above
// TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
ch = getStatistic(ch, md, "nbPage", "page-count");
ch = getStatistic(ch, md, "nbPara", "paragraph-count");
ch = getStatistic(ch, md, "nbWord", "word-count");
ch = getStatistic(ch, md, "nbCharacter", "character-count");
ch = getStatistic(ch, md, "nbTab", "table-count");
ch = getStatistic(ch, md, "nbObject", "object-count");
ch = getStatistic(ch, md, "nbImg", "image-count");
if (handlers != null && handlers.length > 0) {
ContentHandler[] newHandlers = new ContentHandler[handlers.length + 1];
newHandlers[0] = ch;
System.arraycopy(handlers, 0, newHandlers, 1, handlers.length);
ch = new TeeContentHandler(newHandlers);
}
// Normalise the rest
ch = new NSNormalizerContentHandler(ch);
return ch;
}
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md,
ParseContext context) {
return getContentHandler(md, context, super.getContentHandler(ch, md, context));
}
@Override
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
super.parse(stream, handler, metadata, context);
// Copy subject to description for OO2
String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
if (odfSubject != null && !odfSubject.equals("") &&
(metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
(metadata.get(TikaCoreProperties.DESCRIPTION) == null ||
metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
}
//reset the dc:subject to include both keywords and subject
//We can't relying on composite keys in the MatchingContentHandlers
//because those are "setting" not "adding" to the Metadata object
List<String> subjects = new ArrayList<>();
if (metadata.getValues(Office.KEYWORDS) != null) {
subjects.addAll(Arrays.asList(metadata.getValues(Office.KEYWORDS)));
}
if (metadata.getValues(OfficeOpenXMLCore.SUBJECT) != null) {
subjects.addAll(Arrays.asList(metadata.getValues(OfficeOpenXMLCore.SUBJECT)));
}
if (subjects.size() > 0) {
metadata.set(TikaCoreProperties.SUBJECT, subjects.toArray(new String[0]));
}
}
}

View File

@ -16,37 +16,44 @@
*/
package org.apache.tika.parser.odf;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.config.Field;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.exception.WriteLimitReachedException;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.EndDocumentShieldingContentHandler;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.tika.utils.XMLReaderUtils;
/**
* OpenOffice parser
@ -58,47 +65,48 @@ public class OpenDocumentParser extends AbstractParser {
*/
private static final long serialVersionUID = -6410276875438618287L;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("vnd.sun.xml.writer"),
MediaType.application("vnd.oasis.opendocument.text"),
MediaType.application("vnd.oasis.opendocument.graphics"),
MediaType.application("vnd.oasis.opendocument.presentation"),
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
MediaType.application("vnd.oasis.opendocument.chart"),
MediaType.application("vnd.oasis.opendocument.image"),
MediaType.application("vnd.oasis.opendocument.formula"),
MediaType.application("vnd.oasis.opendocument.text-master"),
MediaType.application("vnd.oasis.opendocument.text-web"),
MediaType.application("vnd.oasis.opendocument.text-template"),
MediaType.application("vnd.oasis.opendocument.graphics-template"),
MediaType.application("vnd.oasis.opendocument.presentation-template"),
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
MediaType.application("vnd.oasis.opendocument.chart-template"),
MediaType.application("vnd.oasis.opendocument.image-template"),
MediaType.application("vnd.oasis.opendocument.formula-template"),
MediaType.application("x-vnd.oasis.opendocument.text"),
MediaType.application("x-vnd.oasis.opendocument.graphics"),
MediaType.application("x-vnd.oasis.opendocument.presentation"),
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
MediaType.application("x-vnd.oasis.opendocument.chart"),
MediaType.application("x-vnd.oasis.opendocument.image"),
MediaType.application("x-vnd.oasis.opendocument.formula"),
MediaType.application("x-vnd.oasis.opendocument.text-master"),
MediaType.application("x-vnd.oasis.opendocument.text-web"),
MediaType.application("x-vnd.oasis.opendocument.text-template"),
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
MediaType.application("x-vnd.oasis.opendocument.image-template"),
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<>(Arrays.asList(MediaType.application("vnd.sun.xml.writer"),
MediaType.application("vnd.oasis.opendocument.text"),
MediaType.application("vnd.oasis.opendocument.graphics"),
MediaType.application("vnd.oasis.opendocument.presentation"),
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
MediaType.application("vnd.oasis.opendocument.chart"),
MediaType.application("vnd.oasis.opendocument.image"),
MediaType.application("vnd.oasis.opendocument.formula"),
MediaType.application("vnd.oasis.opendocument.text-master"),
MediaType.application("vnd.oasis.opendocument.text-web"),
MediaType.application("vnd.oasis.opendocument.text-template"),
MediaType.application("vnd.oasis.opendocument.graphics-template"),
MediaType.application("vnd.oasis.opendocument.presentation-template"),
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
MediaType.application("vnd.oasis.opendocument.chart-template"),
MediaType.application("vnd.oasis.opendocument.image-template"),
MediaType.application("vnd.oasis.opendocument.formula-template"),
MediaType.application("x-vnd.oasis.opendocument.text"),
MediaType.application("x-vnd.oasis.opendocument.graphics"),
MediaType.application("x-vnd.oasis.opendocument.presentation"),
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
MediaType.application("x-vnd.oasis.opendocument.chart"),
MediaType.application("x-vnd.oasis.opendocument.image"),
MediaType.application("x-vnd.oasis.opendocument.formula"),
MediaType.application("x-vnd.oasis.opendocument.text-master"),
MediaType.application("x-vnd.oasis.opendocument.text-web"),
MediaType.application("x-vnd.oasis.opendocument.text-template"),
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
MediaType.application("x-vnd.oasis.opendocument.image-template"),
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
private static final String META_NAME = "meta.xml";
private static final String MANIFEST_NAME = "META-INF/manifest.xml";
private Parser meta = new OpenDocumentMetaParser();
private Parser content = new OpenDocumentContentParser();
private boolean extractMacros = false;
public Parser getMetaParser() {
return meta;
@ -120,10 +128,10 @@ public class OpenDocumentParser extends AbstractParser {
return SUPPORTED_TYPES;
}
public void parse(
InputStream stream, ContentHandler baseHandler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
public void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
EmbeddedDocumentUtil embeddedDocumentUtil = new EmbeddedDocumentUtil(context);
// Open the Zip stream
// Use a File if we can, and an already open zip is even better
@ -145,85 +153,129 @@ public class OpenDocumentParser extends AbstractParser {
// Prepare to handle the content
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
xhtml.startDocument();
// As we don't know which of the metadata or the content
// we'll hit first, catch the endDocument call initially
EndDocumentShieldingContentHandler handler =
new EndDocumentShieldingContentHandler(xhtml);
EndDocumentShieldingContentHandler handler = new EndDocumentShieldingContentHandler(xhtml);
if (zipFile != null) {
try {
handleZipFile(zipFile, metadata, context, handler);
} finally {
//Do we want to close silently == catch an exception here?
zipFile.close();
try {
if (zipFile != null) {
try {
handleZipFile(zipFile, metadata, context, handler, embeddedDocumentUtil);
} finally {
//Do we want to close silently == catch an exception here?
zipFile.close();
}
} else {
try {
handleZipStream(zipStream, metadata, context, handler, embeddedDocumentUtil);
} finally {
//Do we want to close silently == catch an exception here?
zipStream.close();
}
}
} else {
try {
handleZipStream(zipStream, metadata, context, handler);
} finally {
//Do we want to close silently == catch an exception here?
zipStream.close();
} catch (SAXException e) {
if (e.getCause() instanceof EncryptedDocumentException) {
throw (EncryptedDocumentException)e.getCause();
}
throw e;
}
// Only now call the end document
if (handler.getEndDocumentWasCalled()) {
if (handler.isEndDocumentWasCalled()) {
handler.reallyEndDocument();
}
}
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
ZipEntry entry = zipStream.getNextEntry();
if (entry == null) {
throw new IOException("No entries found in ZipInputStream");
}
do {
handleZipEntry(entry, zipStream, metadata, context, handler);
entry = zipStream.getNextEntry();
} while (entry != null);
@Field
public void setExtractMacros(boolean extractMacros) {
this.extractMacros = extractMacros;
}
private void handleZipFile(ZipFile zipFile, Metadata metadata,
ParseContext context, EndDocumentShieldingContentHandler handler)
throws IOException, TikaException, SAXException {
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context,
EndDocumentShieldingContentHandler handler,
EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, TikaException, SAXException {
ZipEntry entry = zipStream.getNextEntry();
if (entry == null) {
throw new IOException("No entries found in ZipInputStream");
}
List<SAXException> exceptions = new ArrayList<>();
do {
try {
handleZipEntry(entry, zipStream, metadata, context, handler,
embeddedDocumentUtil);
} catch (SAXException e) {
WriteLimitReachedException.throwIfWriteLimitReached(e);
if (e.getCause() instanceof EncryptedDocumentException) {
throw (EncryptedDocumentException)e.getCause();
} else {
exceptions.add(e);
}
}
entry = zipStream.getNextEntry();
} while (entry != null);
if (exceptions.size() > 0) {
throw exceptions.get(0);
}
}
private void handleZipFile(ZipFile zipFile, Metadata metadata, ParseContext context,
EndDocumentShieldingContentHandler handler,
EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, TikaException, SAXException {
// If we can, process the metadata first, then the
// rest of the file afterwards (TIKA-1353)
// Only possible to guarantee that when opened from a file not a stream
ZipEntry entry = zipFile.getEntry(META_NAME);
ZipEntry entry = zipFile.getEntry(MANIFEST_NAME);
if (entry != null) {
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
handler, embeddedDocumentUtil);
}
entry = zipFile.getEntry(META_NAME);
if (entry != null) {
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context,
handler, embeddedDocumentUtil);
}
Enumeration<? extends ZipEntry> entries = zipFile.entries();
while (entries.hasMoreElements()) {
entry = entries.nextElement();
if (!META_NAME.equals(entry.getName())) {
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
handleZipEntry(entry, zipFile.getInputStream(entry), metadata,
context, handler, embeddedDocumentUtil);
}
}
}
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
ParseContext context, EndDocumentShieldingContentHandler handler)
throws IOException, SAXException, TikaException {
if (entry == null) return;
if (entry.getName().equals("mimetype")) {
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
ParseContext context, ContentHandler handler,
EmbeddedDocumentUtil embeddedDocumentUtil)
throws IOException, SAXException, TikaException {
if (entry.getName().contains("manifest.xml")) {
checkForEncryption(zip, context);
} else if (entry.getName().equals("mimetype")) {
String type = IOUtils.toString(zip, UTF_8);
metadata.set(Metadata.CONTENT_TYPE, type);
} else if (entry.getName().equals(META_NAME)) {
meta.parse(zip, new DefaultHandler(), metadata, context);
} else if (entry.getName().endsWith("content.xml")) {
if (content instanceof OpenDocumentContentParser) {
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
((OpenDocumentContentParser) content)
.parseInternal(zip, handler, metadata, context);
} else {
// Foreign content parser was set:
content.parse(zip, handler, metadata, context);
}
} else if (entry.getName().endsWith("styles.xml")) {
if (content instanceof OpenDocumentContentParser) {
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
((OpenDocumentContentParser) content)
.parseInternal(zip, handler, metadata, context);
} else {
// Foreign content parser was set:
content.parse(zip, handler, metadata, context);
@ -231,26 +283,87 @@ public class OpenDocumentParser extends AbstractParser {
} else {
String embeddedName = entry.getName();
//scrape everything under Thumbnails/ and Pictures/
if (embeddedName.contains("Thumbnails/") ||
embeddedName.contains("Pictures/")) {
EmbeddedDocumentExtractor embeddedDocumentExtractor =
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
if (embeddedName.contains("Thumbnails/") || embeddedName.contains("Pictures/")) {
Metadata embeddedMetadata = new Metadata();
embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName());
/* if (embeddedName.startsWith("Thumbnails/")) {
TikaInputStream stream = TikaInputStream.get(zip);
embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, entry.getName());
if (embeddedName.startsWith("Thumbnails/")) {
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
}*/
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL.toString());
}
if (embeddedName.contains("Pictures/")) {
embeddedMetadata.set(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
MediaType embeddedMimeType =
embeddedDocumentUtil.getDetector().detect(stream, embeddedMetadata);
if (embeddedMimeType != null) {
embeddedMetadata.set(Metadata.CONTENT_TYPE, embeddedMimeType.toString());
}
stream.reset();
}
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
embeddedDocumentExtractor.parseEmbedded(zip,
new EmbeddedContentHandler(handler), embeddedMetadata, false);
if (embeddedDocumentUtil.shouldParseEmbedded(embeddedMetadata)) {
embeddedDocumentUtil.parseEmbedded(stream, new EmbeddedContentHandler(handler),
embeddedMetadata, false);
}
} else if (extractMacros && embeddedName.contains("Basic/")) {
//process all files under Basic/; let maybeHandleMacro figure
//out if it is a macro or not
maybeHandleMacro(zip, embeddedName, handler, context);
}
}
}
private void maybeHandleMacro(InputStream is, String embeddedName, ContentHandler handler,
ParseContext context)
throws TikaException, IOException, SAXException {
//should probably run XMLRootExtractor on the inputstream
//or read the macro manifest for the names of the macros
//rather than relying on the script file name
if (ignoreScriptFile(embeddedName)) {
return;
}
Metadata embeddedMetadata = new Metadata();
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
handler = new OpenDocumentMacroHandler(handler, context);
XMLReaderUtils.parseSAX(new CloseShieldInputStream(is),
new OfflineContentHandler(new EmbeddedContentHandler(handler)), context);
}
private void checkForEncryption(InputStream stream, ParseContext context)
throws SAXException, TikaException, IOException {
try {
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream),
new OfflineContentHandler(new EmbeddedContentHandler(
new OpenDocumentManifestHandler())), context);
} catch (SAXException e) {
if (e.getCause() != null
&& e.getCause() instanceof EncryptedDocumentException) {
throw (EncryptedDocumentException)e.getCause();
}
//otherwise...swallow
}
}
private boolean ignoreScriptFile(String embeddedName) {
if (embeddedName.contains("Basic/")) {
if (embeddedName.contains("script-lb.xml")) {
return true;
} else if (embeddedName.contains("script-lc.xml")) {
return true;
}
} else {
//shouldn't ever get here, but if it isn't under Basic/, ignore it
return true;
}
return false;
}
}

View File

@ -16,13 +16,14 @@
*/
package org.apache.tika.parser.xml;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.xml.sax.helpers.DefaultHandler;
import java.util.Arrays;
import java.util.List;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
/**
* Base class for SAX handlers that map SAX events into document metadata.
*
@ -39,11 +40,12 @@ class AbstractMetadataHandler extends DefaultHandler {
this.property = null;
this.name = name;
}
protected AbstractMetadataHandler(Metadata metadata, Property property) {
this.metadata = metadata;
this.property = property;
this.name = property.getName();
}
this.metadata = metadata;
this.property = property;
this.name = property.getName();
}
/**
* Adds the given metadata value. The value is ignored if it is
@ -59,9 +61,9 @@ class AbstractMetadataHandler extends DefaultHandler {
List<String> previous = Arrays.asList(metadata.getValues(name));
if (!previous.contains(value)) {
if (property != null) {
metadata.add(property, value);
metadata.add(property, value);
} else {
metadata.add(name, value);
metadata.add(name, value);
}
}
} else {
@ -69,23 +71,23 @@ class AbstractMetadataHandler extends DefaultHandler {
String previous = metadata.get(name);
if (previous != null && previous.length() > 0) {
if (!previous.equals(value)) {
if (property != null) {
if (property.isMultiValuePermitted()) {
metadata.add(property, value);
} else {
// Replace the existing value if isMultiValuePermitted is false
metadata.set(property, value);
}
} else {
metadata.add(name, value);
}
if (property != null) {
if (property.isMultiValuePermitted()) {
metadata.add(property, value);
} else {
// Replace the existing value if isMultiValuePermitted is false
metadata.set(property, value);
}
} else {
metadata.add(name, value);
}
}
} else {
if (property != null) {
metadata.set(property, value);
} else {
metadata.set(name, value);
}
if (property != null) {
metadata.set(property, value);
} else {
metadata.set(name, value);
}
}
}
}

View File

@ -16,15 +16,16 @@
*/
package org.apache.tika.parser.xml;
import org.apache.tika.metadata.Metadata;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.metadata.Metadata;
/**
* This adds a Metadata entry for a given node.
* The textual content of the node is used as the
* value, and the Metadata name is taken from
* an attribute, with a prefix if required.
* value, and the Metadata name is taken from
* an attribute, with a prefix if required.
*/
public class AttributeDependantMetadataHandler extends DefaultHandler {
@ -32,20 +33,20 @@ public class AttributeDependantMetadataHandler extends DefaultHandler {
private final String nameHoldingAttribute;
private final String namePrefix;
private final StringBuilder buffer = new StringBuilder();
private String name;
private final StringBuilder buffer = new StringBuilder();
public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute, String namePrefix) {
public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute,
String namePrefix) {
this.metadata = metadata;
this.nameHoldingAttribute = nameHoldingAttribute;
this.namePrefix = namePrefix;
}
public void addMetadata(String value) {
if(name == null || name.length() == 0) {
// We didn't find the attribute which holds the name
return;
if (name == null || name.length() == 0) {
// We didn't find the attribute which holds the name
return;
}
if (value.length() > 0) {
String previous = metadata.get(name);
@ -61,15 +62,14 @@ public class AttributeDependantMetadataHandler extends DefaultHandler {
buffer.setLength(0);
}
public void startElement(
String uri, String localName, String name, Attributes attributes) {
public void startElement(String uri, String localName, String name, Attributes attributes) {
String rawName = attributes.getValue(nameHoldingAttribute);
if (rawName != null) {
if (namePrefix == null) {
this.name = rawName;
} else {
this.name = namePrefix + rawName;
}
if (namePrefix == null) {
this.name = rawName;
} else {
this.name = namePrefix + rawName;
}
}
// All other attributes are ignored
}

View File

@ -16,11 +16,12 @@
*/
package org.apache.tika.parser.xml;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
/**
* SAX event handler that maps the contents of an XML attribute into
* a metadata field.
@ -33,26 +34,25 @@ public class AttributeMetadataHandler extends AbstractMetadataHandler {
private final String localName;
public AttributeMetadataHandler(
String uri, String localName, Metadata metadata, String name) {
public AttributeMetadataHandler(String uri, String localName, Metadata metadata, String name) {
super(metadata, name);
this.uri = uri;
this.localName = localName;
}
public AttributeMetadataHandler(
String uri, String localName, Metadata metadata, Property property) {
super(metadata, property);
this.uri = uri;
this.localName = localName;
}
public AttributeMetadataHandler(String uri, String localName, Metadata metadata,
Property property) {
super(metadata, property);
this.uri = uri;
this.localName = localName;
}
@Override
public void startElement(
String uri, String localName, String qName, Attributes attributes)
throws SAXException {
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
for (int i = 0; i < attributes.getLength(); i++) {
if (attributes.getURI(i).equals(this.uri)
&& attributes.getLocalName(i).equals(this.localName)) {
if (attributes.getURI(i).equals(this.uri) &&
attributes.getLocalName(i).equals(this.localName)) {
addMetadata(attributes.getValue(i).trim());
}
}

View File

@ -16,45 +16,45 @@
*/
package org.apache.tika.parser.xml;
import org.xml.sax.ContentHandler;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.ContentHandler;
/**
* Dublin Core metadata parser
*/
public class DcXMLParser extends XMLParser {
/** Serial version UID */
/**
* Serial version UID
*/
private static final long serialVersionUID = 4905318835463880819L;
private static ContentHandler getDublinCoreHandler(
Metadata metadata, Property property, String element) {
return new ElementMetadataHandler(
DublinCore.NAMESPACE_URI_DC, element,
metadata, property);
private static ContentHandler getDublinCoreHandler(Metadata metadata, Property property,
String element) {
return new ElementMetadataHandler(DublinCore.NAMESPACE_URI_DC, element, metadata, property);
}
protected ContentHandler getContentHandler(
ContentHandler handler, Metadata metadata, ParseContext context) {
return new TeeContentHandler(
super.getContentHandler(handler, metadata, context),
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
ParseContext context) {
return new TeeContentHandler(super.getContentHandler(handler, metadata, context),
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
getDublinCoreHandler(metadata, TikaCoreProperties.SUBJECT, "subject"),
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
}
}

View File

@ -16,13 +16,14 @@
*/
package org.apache.tika.parser.xml;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import java.util.Arrays;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
/**
* SAX event handler that maps the contents of an XML element into
@ -44,21 +45,17 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
private final Metadata metadata;
private final String name;
private Property targetProperty;
private final boolean allowDuplicateValues;
private final boolean allowEmptyValues;
/**
* The buffer used to capture characters when inside a bag li element.
*/
private final StringBuilder bufferBagged = new StringBuilder();
/**
* The buffer used to capture characters inside standard elements.
*/
private final StringBuilder bufferBagless = new StringBuilder();
private Property targetProperty;
/**
* Whether or not the value was found in a standard element structure or inside a bag.
*/
@ -70,13 +67,12 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
/**
* Constructor for string metadata keys.
*
* @param uri the uri of the namespace of the element
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param name the Tika metadata field key
* @param metadata the Tika metadata object to populate
* @param name the Tika metadata field key
*/
public ElementMetadataHandler(
String uri, String localName, Metadata metadata, String name) {
public ElementMetadataHandler(String uri, String localName, Metadata metadata, String name) {
super(metadata, name);
this.uri = uri;
this.localName = localName;
@ -91,15 +87,15 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
* Constructor for string metadata keys which allows change of behavior
* for duplicate and empty entry values.
*
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param name the Tika metadata field key
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param name the Tika metadata field key
* @param allowDuplicateValues add duplicate values to the Tika metadata
* @param allowEmptyValues add empty values to the Tika metadata
* @param allowEmptyValues add empty values to the Tika metadata
*/
public ElementMetadataHandler(
String uri, String localName, Metadata metadata, String name, boolean allowDuplicateValues, boolean allowEmptyValues) {
public ElementMetadataHandler(String uri, String localName, Metadata metadata, String name,
boolean allowDuplicateValues, boolean allowEmptyValues) {
super(metadata, name);
this.uri = uri;
this.localName = localName;
@ -113,13 +109,13 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
/**
* Constructor for Property metadata keys.
*
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param targetProperty the Tika metadata Property key
*/
public ElementMetadataHandler(
String uri, String localName, Metadata metadata, Property targetProperty) {
public ElementMetadataHandler(String uri, String localName, Metadata metadata,
Property targetProperty) {
super(metadata, targetProperty);
this.uri = uri;
this.localName = localName;
@ -135,15 +131,16 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
* Constructor for Property metadata keys which allows change of behavior
* for duplicate and empty entry values.
*
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param targetProperty the Tika metadata Property key
* @param uri the uri of the namespace of the element
* @param localName the local name of the element
* @param metadata the Tika metadata object to populate
* @param targetProperty the Tika metadata Property key
* @param allowDuplicateValues add duplicate values to the Tika metadata
* @param allowEmptyValues add empty values to the Tika metadata
* @param allowEmptyValues add empty values to the Tika metadata
*/
public ElementMetadataHandler(
String uri, String localName, Metadata metadata, Property targetProperty, boolean allowDuplicateValues, boolean allowEmptyValues) {
public ElementMetadataHandler(String uri, String localName, Metadata metadata,
Property targetProperty, boolean allowDuplicateValues,
boolean allowEmptyValues) {
super(metadata, targetProperty);
this.uri = uri;
this.localName = localName;
@ -162,16 +159,13 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
protected boolean isMatchingElement(String uri, String localName) {
// match if we're inside the parent element or within some bag element
return (uri.equals(this.uri) && localName.equals(this.localName)) ||
(parentMatchLevel > 0 &&
((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) ||
(uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI))
)
);
(parentMatchLevel > 0 &&
((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) ||
(uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI))));
}
@Override
public void startElement(
String uri, String localName, String name, Attributes attributes) {
public void startElement(String uri, String localName, String name, Attributes attributes) {
if (isMatchingElement(uri, localName)) {
matchLevel++;
}
@ -230,7 +224,8 @@ public class ElementMetadataHandler extends AbstractMetadataHandler {
value = "";
}
String[] previous = metadata.getValues(name);
if (previous == null || !Arrays.asList(previous).contains(value) || allowDuplicateValues) {
if (previous == null || !Arrays.asList(previous).contains(value) ||
allowDuplicateValues) {
metadata.add(targetProperty, value);
}
}

View File

@ -16,64 +16,68 @@
*/
package org.apache.tika.parser.xml;
import org.apache.commons.codec.binary.Base64;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.codec.binary.Base64;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
public class FictionBookParser extends XMLParser {
private static final long serialVersionUID = 4195954546491524374L;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("x-fictionbook+xml"));
Collections.singleton(MediaType.application("x-fictionbook+xml"));
@Override
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
@Override
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata, ParseContext context) {
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
ParseContext context) {
return new BinaryElementsDataHandler(
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler);
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler);
}
private static class BinaryElementsDataHandler extends DefaultHandler {
private static final String ELEMENT_BINARY = "binary";
private boolean binaryMode = false;
private static final String ATTRIBUTE_ID = "id";
private static final String ATTRIBUTE_CONTENT_TYPE = "content-type";
private final EmbeddedDocumentExtractor partExtractor;
private final ContentHandler handler;
private final StringBuilder binaryData = new StringBuilder();
private boolean binaryMode = false;
private Metadata metadata;
private static final String ATTRIBUTE_CONTENT_TYPE = "content-type";
private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor, ContentHandler handler) {
private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor,
ContentHandler handler) {
this.partExtractor = partExtractor;
this.handler = handler;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
binaryMode = ELEMENT_BINARY.equals(localName);
if (binaryMode) {
binaryData.setLength(0);
metadata = new Metadata();
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY,
attributes.getValue(ATTRIBUTE_ID));
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
}
}
@ -83,11 +87,8 @@ public class FictionBookParser extends XMLParser {
if (binaryMode) {
try {
partExtractor.parseEmbedded(
new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())),
handler,
metadata,
true
);
new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())),
handler, metadata, true);
} catch (IOException e) {
throw new SAXException("IOException in parseEmbedded", e);
}

View File

@ -16,19 +16,20 @@
*/
package org.apache.tika.parser.xml;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
/**
* This adds Metadata entries with a specified name for
* the textual content of a node (if present), and
* all attribute values passed through the matcher
* (but not their names).
* the textual content of a node (if present), and
* all attribute values passed through the matcher
* (but not their names).
*
* @deprecated Use the {@link AttributeMetadataHandler} and
* {@link ElementMetadataHandler} classes instead
* {@link ElementMetadataHandler} classes instead
*/
public class MetadataHandler extends DefaultHandler {
@ -44,11 +45,12 @@ public class MetadataHandler extends DefaultHandler {
this.property = null;
this.name = name;
}
public MetadataHandler(Metadata metadata, Property property) {
this.metadata = metadata;
this.property = property;
this.name = property.getName();
}
this.metadata = metadata;
this.property = property;
this.name = property.getName();
}
public void addMetadata(String value) {
if (value.length() > 0) {
@ -58,9 +60,9 @@ public class MetadataHandler extends DefaultHandler {
}
if (this.property != null) {
metadata.set(property, value);
metadata.set(property, value);
} else {
metadata.set(name, value);
metadata.set(name, value);
}
}
}
@ -70,8 +72,7 @@ public class MetadataHandler extends DefaultHandler {
buffer.setLength(0);
}
public void startElement(
String uri, String localName, String name, Attributes attributes) {
public void startElement(String uri, String localName, String name, Attributes attributes) {
for (int i = 0; i < attributes.getLength(); i++) {
addMetadata(attributes.getValue(i));
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.xml;
import org.xml.sax.ContentHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.TextAndAttributeContentHandler;
public class TextAndAttributeXMLParser extends XMLParser {
private static final long serialVersionUID = 7796914007312429473L;
@Override
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
ParseContext context) {
return new TextAndAttributeContentHandler(handler, true);
}
}

View File

@ -16,7 +16,17 @@
*/
package org.apache.tika.parser.xml;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@ -28,52 +38,41 @@ import org.apache.tika.sax.TaggedContentHandler;
import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
/**
* XML parser.
*/
public class XMLParser extends AbstractParser {
/** Serial version UID */
/**
* Serial version UID
*/
private static final long serialVersionUID = -6028836725280212837L;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("xml"),
MediaType.image("svg+xml"))));
private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(
new HashSet<>(
Arrays.asList(MediaType.application("xml"), MediaType.image("svg+xml"))));
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
public void parse(
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
ParseContext context) throws IOException, SAXException, TikaException {
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
}
final XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler, metadata);
final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
xhtml.startElement("p");
TaggedContentHandler tagged = new TaggedContentHandler(handler);
try {
XMLReaderUtils.parseSAX(
new CloseShieldInputStream(stream),
new OfflineContentHandler(new EmbeddedContentHandler(
getContentHandler(tagged, metadata, context))), context);
XMLReaderUtils.parseSAX(new CloseShieldInputStream(stream), new OfflineContentHandler(
new EmbeddedContentHandler(
getContentHandler(tagged, metadata, context))),
context);
} catch (SAXException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("XML parse error", e);
@ -83,8 +82,8 @@ public class XMLParser extends AbstractParser {
}
}
protected ContentHandler getContentHandler(
ContentHandler handler, Metadata metadata, ParseContext context) {
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata,
ParseContext context) {
return new TextContentHandler(handler, true);
}
}

View File

@ -0,0 +1,206 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.utils;
public class StringUtils {
/**
* The empty String {@code ""}.
*
* @since 2.0
*/
public static final String EMPTY = "";
/**
* A String for a space character.
*
* @since 3.2
*/
public static final String SPACE = " ";
static int PAD_LIMIT = 10000;
public static boolean isEmpty(final CharSequence cs) {
return cs == null || cs.length() == 0;
}
public static boolean isBlank(final String s) {
return s == null || s.trim().length() == 0;
}
/**
* <p>Left pad a String with a specified String.</p>
*
* <p>Pad to a size of {@code size}.</p>
*
* <pre>
* StringUtils.leftPad(null, *, *) = null
* StringUtils.leftPad("", 3, "z") = "zzz"
* StringUtils.leftPad("bat", 3, "yz") = "bat"
* StringUtils.leftPad("bat", 5, "yz") = "yzbat"
* StringUtils.leftPad("bat", 8, "yz") = "yzyzybat"
* StringUtils.leftPad("bat", 1, "yz") = "bat"
* StringUtils.leftPad("bat", -1, "yz") = "bat"
* StringUtils.leftPad("bat", 5, null) = " bat"
* StringUtils.leftPad("bat", 5, "") = " bat"
* </pre>
*
* @param str the String to pad out, may be null
* @param size the size to pad to
* @param padStr the String to pad with, null or empty treated as single space
* @return left padded String or original String if no padding is necessary,
* {@code null} if null String input
*/
public static String leftPad(final String str, final int size, String padStr) {
if (str == null) {
return null;
}
if (isEmpty(padStr)) {
padStr = SPACE;
}
final int padLen = padStr.length();
final int strLen = str.length();
final int pads = size - strLen;
if (pads <= 0) {
return str; // returns original String when possible
}
if (padLen == 1 && pads <= PAD_LIMIT) {
return leftPad(str, size, padStr.charAt(0));
}
if (pads == padLen) {
return padStr.concat(str);
} else if (pads < padLen) {
return padStr.substring(0, pads).concat(str);
} else {
final char[] padding = new char[pads];
final char[] padChars = padStr.toCharArray();
for (int i = 0; i < pads; i++) {
padding[i] = padChars[i % padLen];
}
return new String(padding).concat(str);
}
}
public static String leftPad(final String str, final int size, final char padChar) {
if (str == null) {
return null;
}
final int pads = size - str.length();
if (pads <= 0) {
return str; // returns original String when possible
}
if (pads > PAD_LIMIT) {
return leftPad(str, size, String.valueOf(padChar));
}
return repeat(padChar, pads).concat(str);
}
/**
* <p>Returns padding using the specified delimiter repeated
* to a given length.</p>
*
* <pre>
* StringUtils.repeat('e', 0) = ""
* StringUtils.repeat('e', 3) = "eee"
* StringUtils.repeat('e', -2) = ""
* </pre>
*
* <p>Note: this method does not support padding with
* <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a>
* as they require a pair of {@code char}s to be represented.
* If you are needing to support full I18N of your applications
* consider using {@link #repeat(String, int)} instead.
* </p>
*
* @param ch character to repeat
* @param repeat number of times to repeat char, negative treated as zero
* @return String with repeated character
* @see #repeat(String, int)
*/
public static String repeat(final char ch, final int repeat) {
if (repeat <= 0) {
return EMPTY;
}
final char[] buf = new char[repeat];
for (int i = repeat - 1; i >= 0; i--) {
buf[i] = ch;
}
return new String(buf);
}
// Padding
//-----------------------------------------------------------------------
/**
* <p>Repeat a String {@code repeat} times to form a
* new String.</p>
*
* <pre>
* StringUtils.repeat(null, 2) = null
* StringUtils.repeat("", 0) = ""
* StringUtils.repeat("", 2) = ""
* StringUtils.repeat("a", 3) = "aaa"
* StringUtils.repeat("ab", 2) = "abab"
* StringUtils.repeat("a", -2) = ""
* </pre>
*
* @param str the String to repeat, may be null
* @param repeat number of times to repeat str, negative treated as zero
* @return a new String consisting of the original String repeated,
* {@code null} if null String input
*/
public static String repeat(final String str, final int repeat) {
// Performance tuned for 2.0 (JDK1.4)
if (str == null) {
return null;
}
if (repeat <= 0) {
return EMPTY;
}
final int inputLength = str.length();
if (repeat == 1 || inputLength == 0) {
return str;
}
if (inputLength == 1 && repeat <= PAD_LIMIT) {
return repeat(str.charAt(0), repeat);
}
final int outputLength = inputLength * repeat;
switch (inputLength) {
case 1:
return repeat(str.charAt(0), repeat);
case 2:
final char ch0 = str.charAt(0);
final char ch1 = str.charAt(1);
final char[] output2 = new char[outputLength];
for (int i = repeat * 2 - 2; i >= 0; i--, i--) {
output2[i] = ch0;
output2[i + 1] = ch1;
}
return new String(output2);
default:
final StringBuilder buf = new StringBuilder(outputLength);
for (int i = 0; i < repeat; i++) {
buf.append(str);
}
return buf.toString();
}
}
}

View File

@ -16,7 +16,7 @@ import munit._
class OdfExtractTest extends FunSuite {
val files = List(
ExampleFiles.examples_sample_odt -> 6372,
ExampleFiles.examples_sample_odt -> 6367,
ExampleFiles.examples_sample_ods -> 717
)

View File

@ -20,7 +20,7 @@ import fs2.Stream
import docspell.common._
import org.apache.tika.config.TikaConfig
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaMetadataKeys}
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.txt.Icu4jEncodingDetector
@ -40,7 +40,7 @@ object TikaMimetype {
private def makeMetadata(hint: MimeTypeHint): Metadata = {
val md = new Metadata
hint.filename.foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
hint.filename.foreach(md.set(TikaCoreProperties.RESOURCE_NAME_KEY, _))
hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
md
}

View File

@ -108,7 +108,13 @@ object SolrSetup {
"Add latvian content field",
addContentField(Language.Latvian)
),
SolrMigration.reIndexAll(13, "Re-Index after adding latvian content field")
SolrMigration.reIndexAll(13, "Re-Index after adding latvian content field"),
SolrMigration[F](
14,
"Add japanese content field",
addContentField(Language.Japanese)
),
SolrMigration.reIndexAll(15, "Re-Index after adding japanese content field")
)
def addFolderField: F[Unit] =

View File

@ -2,7 +2,7 @@ openapi: 3.0.0
info:
title: Docspell JOEX
version: 0.25.0-SNAPSHOT
version: 0.26.0-SNAPSHOT
description: |
This is the remote API to the job executor component of Docspell.
Docspell is a free document management system focused on small

View File

@ -53,6 +53,7 @@ object ItemQuery {
case object ItemId extends StringAttr
case object Date extends DateAttr
case object DueDate extends DateAttr
case object CreatedDate extends DateAttr
case object AttachCount extends IntAttr
object Correspondent {

View File

@ -31,6 +31,9 @@ object AttrParser {
val dueDate: P[Attr.DateAttr] =
P.ignoreCase(C.due).as(Attr.DueDate)
val created: P[Attr.DateAttr] =
P.ignoreCase(C.created).as(Attr.CreatedDate)
val corrOrgId: P[Attr.StringAttr] =
P.ignoreCase(C.corrOrgId)
.as(Attr.Correspondent.OrgId)
@ -78,7 +81,7 @@ object AttrParser {
attachCountAttr
val dateAttr: P[Attr.DateAttr] =
P.oneOf(List(date, dueDate))
P.oneOf(List(date, dueDate, created))
val stringAttr: P[Attr.StringAttr] =
P.oneOf(

View File

@ -23,6 +23,8 @@ object Constants {
val corrOrgName = "corr.org.name"
val corrPersId = "corr.pers.id"
val corrPersName = "corr.pers.name"
val created = "created"
val createdIn = "createdIn"
val customField = "f"
val customFieldId = "f.id"
val date = "date"

View File

@ -35,6 +35,9 @@ object MacroParser {
val dueDateRangeMacro: P[Expr.DateRangeMacro] =
dateRangeMacroImpl(C.dueIn, Attr.DueDate)
val createdDateRangeMacro: P[Expr.DateRangeMacro] =
dateRangeMacroImpl(C.createdIn, Attr.CreatedDate)
val yearDateMacro: P[Expr.YearMacro] =
yearMacroImpl(C.year, Attr.Date)
@ -52,6 +55,7 @@ object MacroParser {
namesMacro,
dateRangeMacro,
dueDateRangeMacro,
createdDateRangeMacro,
yearDateMacro,
corrMacro,
concMacro

View File

@ -56,6 +56,14 @@ class SimpleExprParserTest extends FunSuite with ValueHelper {
p.parseAll("due<2021-03-14"),
Right(dateExpr(Operator.Lt, Attr.DueDate, ld(2021, 3, 14)))
)
assertEquals(
p.parseAll("created:2021-03-14"),
Right(dateExpr(Operator.Like, Attr.CreatedDate, ld(2021, 3, 14)))
)
assertEquals(
p.parseAll("created<2021-03-14"),
Right(dateExpr(Operator.Lt, Attr.CreatedDate, ld(2021, 3, 14)))
)
assertEquals(
p.parseAll("due~=2021-03-14,2021-03-13"),
Right(Expr.InDateExpr(Attr.DueDate, Nel.of(ld(2021, 3, 14), ld(2021, 3, 13))))

View File

@ -2,7 +2,7 @@ openapi: 3.0.0
info:
title: Docspell
version: 0.25.0-SNAPSHOT
version: 0.26.0-SNAPSHOT
description: |
This is the remote API to Docspell. Docspell is a free document
management system focused on small groups or families.
@ -1350,6 +1350,37 @@ paths:
schema:
$ref: "#/components/schemas/BasicResult"
/admin/attachments/convertallpdfs:
post:
operationId: "admin-attachments-convertallpdf"
tags: [Attachment, Admin]
summary: Convert all PDF files not yet converted
description: |
Docspell converts PDF files into PDF/A files by default, if
the OcrMyPDF tool is configured.
This endpoint can be used to submit a task that runs this on
all files that have not been converted yet in this way.
This conversion tool has been added in version 0.9.0 and so
older files can be "migrated" this way, or maybe after
enabling the tool (it is optional).
The task finds all files collective and submits a task for
each file to convert. These tasks are submitted with a low
priority so that normal processing can still proceed.
The body of the request should be empty.
security:
- adminHeader: []
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/source:
get:
operationId: "sec-source-get-all"
@ -1428,33 +1459,6 @@ paths:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/item/convertallpdfs:
post:
operationId: "sec-item-convert-all-pdfs"
tags: [ Item ]
summary: Convert all non-converted pdfs.
description: |
Submits a job that will find all pdf files that have not been
converted and converts them using the ocrmypdf tool (if
enabled). This tool has been added in version 0.9.0 and so
older files can be "migrated" this way, or maybe after
enabling the tool.
The task finds all files of the current collective and submits
task for each file to convert. These tasks are submitted with
a low priority so that normal processing can still proceed.
The body of the request should be empty.
security:
- authTokenHeader: []
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/item/search:
get:
operationId: "sec-item-search-by-get"
@ -1624,6 +1628,8 @@ paths:
Update the tags associated to an item. This will remove all
existing ones and sets the given tags, such that after this
returns, the item has exactly the tags as given.
Tags may be specified as names or ids.
security:
- authTokenHeader: []
parameters:
@ -1632,7 +1638,7 @@ paths:
content:
application/json:
schema:
$ref: "#/components/schemas/ReferenceList"
$ref: "#/components/schemas/StringList"
responses:
200:
description: Ok
@ -1668,7 +1674,7 @@ paths:
$ref: "#/components/schemas/BasicResult"
/sec/item/{id}/taglink:
post:
put:
operationId: "sec-item-link-tags"
tags: [Item]
summary: Link existing tags to an item.
@ -1721,6 +1727,31 @@ paths:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/item/{id}/tagsremove:
post:
operationId: "sec-item-remove-tags"
tags: [ Item ]
summary: Remove tags from an item
description: |
Remove the given tags from the item. The tags can be specified
via ids or names.
security:
- authTokenHeader: []
parameters:
- $ref: "#/components/parameters/id"
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/StringList"
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/item/{id}/direction:
put:
operationId: "sec-item-set-direction"

View File

@ -194,13 +194,21 @@ object AttachmentRoutes {
val dsl = Http4sDsl[F]
import dsl._
HttpRoutes.of { case POST -> Root / "generatePreviews" =>
for {
res <- backend.item.generateAllPreviews(MakePreviewArgs.StoreMode.Replace, true)
resp <- Ok(
Conversions.basicResult(res, "Generate all previews task submitted.")
)
} yield resp
HttpRoutes.of {
case POST -> Root / "generatePreviews" =>
for {
res <- backend.item.generateAllPreviews(MakePreviewArgs.StoreMode.Replace, true)
resp <- Ok(
Conversions.basicResult(res, "Generate all previews task submitted.")
)
} yield resp
case POST -> Root / "convertallpdfs" =>
for {
res <-
backend.item.convertAllPdf(None, None, true)
resp <- Ok(Conversions.basicResult(res, "Convert all PDFs task submitted"))
} yield resp
}
}

View File

@ -59,9 +59,12 @@ object ItemMultiRoutes extends MultiIdSupport {
for {
json <- req.as[ItemsAndRefs]
items <- readIds[F](json.items)
tags <- json.refs.traverse(readId[F])
res <- backend.item.setTagsMultipleItems(items, tags, user.account.collective)
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
res <- backend.item.setTagsMultipleItems(
items,
json.refs,
user.account.collective
)
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
} yield resp
case req @ POST -> Root / "tags" =>

View File

@ -47,13 +47,6 @@ object ItemRoutes {
import dsl._
HttpRoutes.of {
case POST -> Root / "convertallpdfs" =>
for {
res <-
backend.item.convertAllPdf(user.account.collective.some, user.account, true)
resp <- Ok(Conversions.basicResult(res, "Task submitted"))
} yield resp
case GET -> Root / "search" :? QP.Query(q) :? QP.Limit(limit) :? QP.Offset(
offset
) :? QP.WithDetails(detailFlag) =>
@ -153,8 +146,8 @@ object ItemRoutes {
case req @ PUT -> Root / Ident(id) / "tags" =>
for {
tags <- req.as[ReferenceList].map(_.items)
res <- backend.item.setTags(id, tags.map(_.id), user.account.collective)
tags <- req.as[StringList].map(_.items)
res <- backend.item.setTags(id, tags, user.account.collective)
resp <- Ok(Conversions.basicResult(res, "Tags updated"))
} yield resp
@ -180,6 +173,17 @@ object ItemRoutes {
resp <- Ok(Conversions.basicResult(res, "Tags linked"))
} yield resp
case req @ POST -> Root / Ident(id) / "tagsremove" =>
for {
json <- req.as[StringList]
res <- backend.item.removeTagsMultipleItems(
NonEmptyList.of(id),
json.items,
user.account.collective
)
resp <- Ok(Conversions.basicResult(res, "Tags removed"))
} yield resp
case req @ PUT -> Root / Ident(id) / "direction" =>
for {
dir <- req.as[DirectionValue]

View File

@ -171,12 +171,16 @@ object ItemQueryGenerator {
tables.item.id.in(select.withSelect(Nel.of(RItem.as("i").id.s)))
case Expr.AttachId(id) =>
tables.item.id.in(
Select(
select(RAttachment.T.itemId),
from(RAttachment.T),
val idWildcard = QueryWildcard(id)
val query =
if (id == idWildcard) {
RAttachment.T.id.cast[String] === id
).distinct
} else {
RAttachment.T.id.cast[String].like(idWildcard)
}
tables.item.id.in(
Select(select(RAttachment.T.itemId), from(RAttachment.T), query).distinct
)
case Expr.Fulltext(_) =>
@ -228,6 +232,8 @@ object ItemQueryGenerator {
coalesce(tables.item.itemDate.s, tables.item.created.s).s
case Attr.DueDate =>
tables.item.dueDate.s
case Attr.CreatedDate =>
tables.item.created.s
}
private def stringColumn(tables: Tables)(attr: Attr.StringAttr): Column[String] =

View File

@ -11,6 +11,7 @@ import java.time.LocalDate
import docspell.common._
import docspell.query.ItemQueryParser
import docspell.store.qb.DSL._
import docspell.store.qb.Select
import docspell.store.qb.generator.{ItemQueryGenerator, Tables}
import docspell.store.queries.AttachCountTable
import docspell.store.records._
@ -56,4 +57,31 @@ class ItemQueryGeneratorTest extends FunSuite {
assertEquals(cond, expect)
}
test("attach.id with wildcard") {
val q = ItemQueryParser.parseUnsafe("attach.id=abcde*")
val cond = ItemQueryGenerator(now, tables, Ident.unsafe("coll"))(q)
val expect = tables.item.id.in(
Select(
select(RAttachment.T.itemId),
from(RAttachment.T),
RAttachment.T.id.cast[String].like("abcde%")
).distinct
)
assertEquals(cond, expect)
}
test("attach.id with equals") {
val q = ItemQueryParser.parseUnsafe("attach.id=abcde")
val cond = ItemQueryGenerator(now, tables, Ident.unsafe("coll"))(q)
val expect = tables.item.id.in(
Select(
select(RAttachment.T.itemId),
from(RAttachment.T),
RAttachment.T.id.cast[String] === "abcde"
).distinct
)
assertEquals(cond, expect)
}
}

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Api exposing
( addConcEquip
, addConcPerson
@ -1782,12 +1783,12 @@ itemDetail flags id receive =
}
setTags : Flags -> String -> ReferenceList -> (Result Http.Error BasicResult -> msg) -> Cmd msg
setTags : Flags -> String -> StringList -> (Result Http.Error BasicResult -> msg) -> Cmd msg
setTags flags item tags receive =
Http2.authPut
{ url = flags.config.baseUrl ++ "/api/v1/sec/item/" ++ item ++ "/tags"
, account = getAccount flags
, body = Http.jsonBody (Api.Model.ReferenceList.encode tags)
, body = Http.jsonBody (Api.Model.StringList.encode tags)
, expect = Http.expectJson receive Api.Model.BasicResult.decoder
}

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module App.Data exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module App.Update exposing
( initPage
, update
@ -330,10 +331,18 @@ updateItemDetail lmsg model =
( hm, hc, hs ) =
updateHome (Page.Home.Data.SetLinkTarget result.linkTarget) model_
( hm1, hc1, hs1 ) =
case result.removedItem of
Just removedId ->
updateHome (Page.Home.Data.RemoveItem removedId) hm
Nothing ->
( hm, hc, hs )
in
( hm
, Cmd.batch [ Cmd.map ItemDetailMsg result.cmd, hc ]
, Sub.batch [ Sub.map ItemDetailMsg result.sub, hs ]
( hm1
, Cmd.batch [ Cmd.map ItemDetailMsg result.cmd, hc, hc1 ]
, Sub.batch [ Sub.map ItemDetailMsg result.sub, hs, hs1 ]
)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module App.View2 exposing (view)
import Api.Model.AuthResult exposing (AuthResult)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.AddressForm exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.AttachmentMeta exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.Basic exposing
( editLinkLabel
, editLinkTableCell

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.BasicSizeField exposing
( Msg
, update

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CalEventInput exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ChangePasswordForm exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ClassifierSettingsForm exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CollectiveSettingsForm exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ColorTagger exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ConfirmModal exposing
( Settings
, defaultSettings

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ContactField exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CustomFieldForm exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CustomFieldInput exposing
( FieldResult(..)
, Model

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CustomFieldManage exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CustomFieldMultiInput exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.CustomFieldTable exposing
( Action(..)
, Model

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.DatePicker exposing
( Msg
, defaultSettings

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.DetailEdit exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.Dropdown exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
-- inspired from here: https://ellie-app.com/3T5mNms7SwKa1

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EmailInput exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EmailSettingsForm exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EmailSettingsManage exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EmailSettingsTable exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EquipmentForm exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EquipmentManage exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.EquipmentTable exposing
( Model
, Msg(..)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ExpandCollapse exposing
( collapseToggle
, expandToggle

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FieldListSelect exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FixedDropdown exposing
( Item
, Model

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FolderDetail exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FolderManage exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FolderSelect exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.FolderTable exposing
( Action(..)
, Model

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ImapSettingsForm exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ImapSettingsManage exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ImapSettingsTable exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.IntField exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemCard exposing
( Model
, Msg

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemCardList exposing
( Model
, Msg(..)
@ -45,6 +46,7 @@ type Msg
= SetResults ItemLightList
| AddResults ItemLightList
| ItemCardMsg ItemLight Comp.ItemCard.Msg
| RemoveItem String
init : Model
@ -144,6 +146,13 @@ updateDrag dm _ msg model =
result.selection
result.linkTarget
RemoveItem id ->
UpdateResult { model | results = removeItemById id model.results }
Cmd.none
dm
Data.ItemSelection.Inactive
Comp.LinkTarget.LinkNone
--- View2
@ -170,13 +179,13 @@ viewGroup2 : Texts -> Model -> ViewConfig -> UiSettings -> ItemLightGroup -> Htm
viewGroup2 texts model cfg settings group =
div [ class "ds-item-group" ]
[ div
[ class "flex py-0 mt-2 flex flex-row items-center"
, class "bg-white dark:bg-bluegray-800 text-lg z-35"
[ class "flex py-1 mt-2 mb-2 flex flex-row items-center"
, class "bg-white dark:bg-bluegray-800 text-xl font-bold z-35"
, class "relative sticky top-10"
]
[ hr
[ class S.border
, class "flex-grow"
[ class S.border2
, class "w-16"
]
[]
, div [ class "px-6" ]
@ -186,7 +195,7 @@ viewGroup2 texts model cfg settings group =
]
]
, hr
[ class S.border
[ class S.border2
, class "flex-grow"
]
[]
@ -231,3 +240,15 @@ isMultiSelectMode cfg =
Data.ItemSelection.Inactive ->
False
removeItemById : String -> ItemLightList -> ItemLightList
removeItemById id list =
let
filterItem item =
item.id /= id
filterGroup group =
{ group | items = List.filter filterItem group.items }
in
{ list | groups = List.map filterGroup list.groups }

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail exposing
( Model
, emptyModel

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.AddFilesForm exposing (view)
import Comp.Dropzone

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.ConfirmModalView exposing (view)
import Comp.ConfirmModal

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.EditForm exposing (formTabs, view2)
import Comp.CustomFieldMultiInput

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.FieldTabState exposing (EditTab(..), allTabs, findTab, tabName, tabState)
import Comp.CustomFieldMultiInput

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.FormChange exposing
( FormChange(..)
, multiUpdate

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.ItemInfoHeader exposing (view)
import Api.Model.IdName exposing (IdName)

View File

@ -1,9 +1,10 @@
{-
Copyright 2020 Docspell Contributors
Copyright 2020 Docspell Contributors
SPDX-License-Identifier: GPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-or-later
-}
module Comp.ItemDetail.Model exposing
( AttachmentRename
, ConfirmModalValue(..)
@ -275,7 +276,7 @@ type Msg
| ItemModalCancelled
| RequestDelete
| SaveResp (Result Http.Error BasicResult)
| DeleteResp (Result Http.Error BasicResult)
| DeleteResp String (Result Http.Error BasicResult)
| GetItemResp (Result Http.Error ItemDetail)
| GetProposalResp (Result Http.Error ItemProposals)
| RemoveDueDate
@ -351,22 +352,23 @@ type alias UpdateResult =
, cmd : Cmd Msg
, sub : Sub Msg
, linkTarget : LinkTarget
, removedItem : Maybe String
}
resultModel : Model -> UpdateResult
resultModel model =
UpdateResult model Cmd.none Sub.none Comp.LinkTarget.LinkNone
UpdateResult model Cmd.none Sub.none Comp.LinkTarget.LinkNone Nothing
resultModelCmd : ( Model, Cmd Msg ) -> UpdateResult
resultModelCmd ( model, cmd ) =
UpdateResult model cmd Sub.none Comp.LinkTarget.LinkNone
UpdateResult model cmd Sub.none Comp.LinkTarget.LinkNone Nothing
resultModelCmdSub : ( Model, Cmd Msg, Sub Msg ) -> UpdateResult
resultModelCmdSub ( model, cmd, sub ) =
UpdateResult model cmd sub Comp.LinkTarget.LinkNone
UpdateResult model cmd sub Comp.LinkTarget.LinkNone Nothing
personMatchesOrg : Model -> Bool

Some files were not shown because too many files have changed in this diff Show More