mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-28 17:55:06 +00:00
Apply scalafmt to all files
This commit is contained in:
parent
6a9ec42a03
commit
5c37efeaba
@ -32,8 +32,8 @@ object BackendApp {
|
||||
def create[F[_]: ConcurrentEffect: ContextShift](
|
||||
cfg: Config,
|
||||
store: Store[F],
|
||||
httpClientEc: ExecutionContext,
|
||||
blocker: Blocker
|
||||
httpClientEc: ExecutionContext,
|
||||
blocker: Blocker
|
||||
): Resource[F, BackendApp[F]] =
|
||||
for {
|
||||
queue <- JobQueue(store)
|
||||
|
@ -176,7 +176,7 @@ object OItem {
|
||||
|
||||
def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] =
|
||||
store.transact((for {
|
||||
coll <- OptionT(RSource.findCollective(sourceId))
|
||||
coll <- OptionT(RSource.findCollective(sourceId))
|
||||
items <- OptionT.liftF(QItem.findByChecksum(checksum, coll))
|
||||
} yield items).getOrElse(Vector.empty))
|
||||
|
||||
|
@ -113,10 +113,10 @@ object OMail {
|
||||
|
||||
def createSettings(accId: AccountId, s: SmtpSettings): F[AddResult] =
|
||||
(for {
|
||||
ru <- OptionT(store.transact(s.toRecord(accId).value))
|
||||
ru <- OptionT(store.transact(s.toRecord(accId).value))
|
||||
ins = RUserEmail.insert(ru)
|
||||
exists = RUserEmail.exists(ru.uid, ru.name)
|
||||
res <- OptionT.liftF(store.add(ins, exists))
|
||||
res <- OptionT.liftF(store.add(ins, exists))
|
||||
} yield res).getOrElse(AddResult.Failure(new Exception("User not found")))
|
||||
|
||||
def updateSettings(accId: AccountId, name: Ident, data: SmtpSettings): F[Int] = {
|
||||
@ -143,8 +143,10 @@ object OMail {
|
||||
for {
|
||||
_ <- OptionT.liftF(store.transact(RItem.existsById(m.item))).filter(identity)
|
||||
ras <- OptionT.liftF(
|
||||
store.transact(RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective))
|
||||
)
|
||||
store.transact(
|
||||
RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective)
|
||||
)
|
||||
)
|
||||
} yield {
|
||||
val addAttach = m.attach.filter(ras).map { a =>
|
||||
Attach[F](Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all)))
|
||||
@ -169,15 +171,15 @@ object OMail {
|
||||
def storeMail(msgId: String, cfg: RUserEmail): F[Either[SendResult, Ident]] = {
|
||||
val save = for {
|
||||
data <- RSentMail.forItem(
|
||||
m.item,
|
||||
accId,
|
||||
msgId,
|
||||
cfg.mailFrom,
|
||||
name,
|
||||
m.subject,
|
||||
m.recipients,
|
||||
m.body
|
||||
)
|
||||
m.item,
|
||||
accId,
|
||||
msgId,
|
||||
cfg.mailFrom,
|
||||
name,
|
||||
m.subject,
|
||||
m.recipients,
|
||||
m.body
|
||||
)
|
||||
_ <- OptionT.liftF(RSentMail.insert(data._1))
|
||||
_ <- OptionT.liftF(RSentMailItem.insert(data._2))
|
||||
} yield data._1.id
|
||||
@ -195,7 +197,7 @@ object OMail {
|
||||
mail <- createMail(mailCfg)
|
||||
mid <- OptionT.liftF(sendMail(mailCfg.toMailConfig, mail))
|
||||
res <- mid.traverse(id => OptionT.liftF(storeMail(id, mailCfg)))
|
||||
conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id)))
|
||||
conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id)))
|
||||
} yield conv).getOrElse(SendResult.NotFound)
|
||||
}
|
||||
|
||||
|
@ -19,9 +19,9 @@ object AccountId {
|
||||
case n if n > 0 && input.length > 2 =>
|
||||
val coll = input.substring(0, n)
|
||||
val user = input.substring(n + 1)
|
||||
Ident.fromString(coll).
|
||||
flatMap(collId => Ident.fromString(user).
|
||||
map(userId => AccountId(collId, userId)))
|
||||
Ident
|
||||
.fromString(coll)
|
||||
.flatMap(collId => Ident.fromString(user).map(userId => AccountId(collId, userId)))
|
||||
case _ =>
|
||||
invalid
|
||||
}
|
||||
|
@ -12,5 +12,4 @@ object BaseJsonCodecs {
|
||||
implicit val decodeInstantEpoch: Decoder[Instant] =
|
||||
Decoder.decodeLong.map(Instant.ofEpochMilli)
|
||||
|
||||
|
||||
}
|
||||
|
@ -21,32 +21,29 @@ object CollectiveState {
|
||||
* action. */
|
||||
case object Blocked extends CollectiveState
|
||||
|
||||
|
||||
def fromString(s: String): Either[String, CollectiveState] =
|
||||
s.toLowerCase match {
|
||||
case "active" => Right(Active)
|
||||
case "active" => Right(Active)
|
||||
case "readonly" => Right(ReadOnly)
|
||||
case "closed" => Right(Closed)
|
||||
case "blocked" => Right(Blocked)
|
||||
case _ => Left(s"Unknown state: $s")
|
||||
case "closed" => Right(Closed)
|
||||
case "blocked" => Right(Blocked)
|
||||
case _ => Left(s"Unknown state: $s")
|
||||
}
|
||||
|
||||
def unsafe(str: String): CollectiveState =
|
||||
fromString(str).fold(sys.error, identity)
|
||||
|
||||
def asString(state: CollectiveState): String = state match {
|
||||
case Active => "active"
|
||||
case Blocked => "blocked"
|
||||
case Closed => "closed"
|
||||
case Active => "active"
|
||||
case Blocked => "blocked"
|
||||
case Closed => "closed"
|
||||
case ReadOnly => "readonly"
|
||||
}
|
||||
|
||||
|
||||
|
||||
implicit val collectiveStateEncoder: Encoder[CollectiveState] =
|
||||
Encoder.encodeString.contramap(CollectiveState.asString)
|
||||
|
||||
implicit val collectiveStateDecoder: Decoder[CollectiveState] =
|
||||
Decoder.decodeString.emap(CollectiveState.fromString)
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -10,22 +10,22 @@ sealed trait ContactKind { self: Product =>
|
||||
object ContactKind {
|
||||
val all = List()
|
||||
|
||||
case object Phone extends ContactKind
|
||||
case object Mobile extends ContactKind
|
||||
case object Fax extends ContactKind
|
||||
case object Email extends ContactKind
|
||||
case object Phone extends ContactKind
|
||||
case object Mobile extends ContactKind
|
||||
case object Fax extends ContactKind
|
||||
case object Email extends ContactKind
|
||||
case object Docspell extends ContactKind
|
||||
case object Website extends ContactKind
|
||||
case object Website extends ContactKind
|
||||
|
||||
def fromString(s: String): Either[String, ContactKind] =
|
||||
s.toLowerCase match {
|
||||
case "phone" => Right(Phone)
|
||||
case "mobile" => Right(Mobile)
|
||||
case "fax" => Right(Fax)
|
||||
case "email" => Right(Email)
|
||||
case "phone" => Right(Phone)
|
||||
case "mobile" => Right(Mobile)
|
||||
case "fax" => Right(Fax)
|
||||
case "email" => Right(Email)
|
||||
case "docspell" => Right(Docspell)
|
||||
case "website" => Right(Website)
|
||||
case _ => Left(s"Not a state value: $s")
|
||||
case "website" => Right(Website)
|
||||
case _ => Left(s"Not a state value: $s")
|
||||
}
|
||||
|
||||
def unsafe(str: String): ContactKind =
|
||||
@ -34,7 +34,6 @@ object ContactKind {
|
||||
def asString(s: ContactKind): String =
|
||||
s.asString.toLowerCase
|
||||
|
||||
|
||||
implicit val contactKindEncoder: Encoder[ContactKind] =
|
||||
Encoder.encodeString.contramap(_.asString)
|
||||
|
||||
|
@ -49,6 +49,6 @@ object Duration {
|
||||
def stopTime[F[_]: Sync]: F[F[Duration]] =
|
||||
for {
|
||||
now <- Timestamp.current[F]
|
||||
end = Timestamp.current[F]
|
||||
end = Timestamp.current[F]
|
||||
} yield end.map(e => Duration.millis(e.toMillis - now.toMillis))
|
||||
}
|
||||
|
@ -10,48 +10,41 @@ sealed trait JobState { self: Product =>
|
||||
object JobState {
|
||||
|
||||
/** Waiting for being executed. */
|
||||
case object Waiting extends JobState {
|
||||
}
|
||||
case object Waiting extends JobState {}
|
||||
|
||||
/** A scheduler has picked up this job and will pass it to the next
|
||||
* free slot. */
|
||||
case object Scheduled extends JobState {
|
||||
}
|
||||
case object Scheduled extends JobState {}
|
||||
|
||||
/** Is currently executing */
|
||||
case object Running extends JobState {
|
||||
}
|
||||
case object Running extends JobState {}
|
||||
|
||||
/** Finished with failure and is being retried. */
|
||||
case object Stuck extends JobState {
|
||||
}
|
||||
case object Stuck extends JobState {}
|
||||
|
||||
/** Finished finally with a failure */
|
||||
case object Failed extends JobState {
|
||||
}
|
||||
case object Failed extends JobState {}
|
||||
|
||||
/** Finished by cancellation. */
|
||||
case object Cancelled extends JobState {
|
||||
}
|
||||
case object Cancelled extends JobState {}
|
||||
|
||||
/** Finished with success */
|
||||
case object Success extends JobState {
|
||||
}
|
||||
case object Success extends JobState {}
|
||||
|
||||
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
|
||||
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
|
||||
val queued: Set[JobState] = Set(Waiting, Scheduled, Stuck)
|
||||
val done: Set[JobState] = Set(Failed, Cancelled, Success)
|
||||
val done: Set[JobState] = Set(Failed, Cancelled, Success)
|
||||
|
||||
def parse(str: String): Either[String, JobState] =
|
||||
str.toLowerCase match {
|
||||
case "waiting" => Right(Waiting)
|
||||
case "waiting" => Right(Waiting)
|
||||
case "scheduled" => Right(Scheduled)
|
||||
case "running" => Right(Running)
|
||||
case "stuck" => Right(Stuck)
|
||||
case "failed" => Right(Failed)
|
||||
case "running" => Right(Running)
|
||||
case "stuck" => Right(Stuck)
|
||||
case "failed" => Right(Failed)
|
||||
case "cancelled" => Right(Cancelled)
|
||||
case "success" => Right(Success)
|
||||
case _ => Left(s"Not a job state: $str")
|
||||
case "success" => Right(Success)
|
||||
case _ => Left(s"Not a job state: $str")
|
||||
}
|
||||
|
||||
def unsafe(str: String): JobState =
|
||||
@ -60,7 +53,6 @@ object JobState {
|
||||
def asString(state: JobState): String =
|
||||
state.name
|
||||
|
||||
|
||||
implicit val jobStateEncoder: Encoder[JobState] =
|
||||
Encoder.encodeString.contramap(_.name)
|
||||
|
||||
|
@ -51,8 +51,8 @@ case class LenientUri(
|
||||
def open[F[_]: Sync]: Either[String, Resource[F, HttpURLConnection]] =
|
||||
toJavaUrl.map { url =>
|
||||
Resource
|
||||
.make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))(
|
||||
conn => Sync[F].delay(conn.disconnect())
|
||||
.make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))(conn =>
|
||||
Sync[F].delay(conn.disconnect())
|
||||
)
|
||||
}
|
||||
|
||||
@ -61,17 +61,16 @@ case class LenientUri(
|
||||
.emit(Either.catchNonFatal(new URL(asString)))
|
||||
.covary[F]
|
||||
.rethrow
|
||||
.flatMap(
|
||||
url => fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
|
||||
.flatMap(url =>
|
||||
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
|
||||
)
|
||||
|
||||
def host: Option[String] =
|
||||
authority.map(
|
||||
a =>
|
||||
a.indexOf(':') match {
|
||||
case -1 => a
|
||||
case n => a.substring(0, n)
|
||||
}
|
||||
authority.map(a =>
|
||||
a.indexOf(':') match {
|
||||
case -1 => a
|
||||
case n => a.substring(0, n)
|
||||
}
|
||||
)
|
||||
|
||||
def asString: String = {
|
||||
|
@ -8,13 +8,11 @@ import io.circe.generic.semiauto._
|
||||
|
||||
case class MetaProposalList private (proposals: List[MetaProposal]) {
|
||||
|
||||
def isEmpty: Boolean = proposals.isEmpty
|
||||
def isEmpty: Boolean = proposals.isEmpty
|
||||
def nonEmpty: Boolean = proposals.nonEmpty
|
||||
|
||||
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean = {
|
||||
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).
|
||||
reduce(_ && _)
|
||||
}
|
||||
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean =
|
||||
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).reduce(_ && _)
|
||||
|
||||
def hasResultsAll: Boolean =
|
||||
proposals.map(_.proposalType).toSet == MetaProposalType.all.toSet
|
||||
@ -23,7 +21,7 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
|
||||
proposals.foldLeft(Set.empty[MetaProposalType])(_ + _.proposalType)
|
||||
|
||||
def fillEmptyFrom(ml: MetaProposalList): MetaProposalList = {
|
||||
val list = ml.proposals.foldLeft(proposals){ (mine, mp) =>
|
||||
val list = ml.proposals.foldLeft(proposals) { (mine, mp) =>
|
||||
if (hasResults(mp.proposalType)) mine
|
||||
else mp :: mine
|
||||
}
|
||||
@ -48,21 +46,24 @@ object MetaProposalList {
|
||||
fromSeq1(mt, refs.map(ref => Candidate(ref, Set(label))))
|
||||
|
||||
def fromSeq1(mt: MetaProposalType, refs: Seq[Candidate]): MetaProposalList =
|
||||
NonEmptyList.fromList(refs.toList).
|
||||
map(nl => MetaProposalList.of(MetaProposal(mt, nl))).
|
||||
getOrElse(empty)
|
||||
NonEmptyList
|
||||
.fromList(refs.toList)
|
||||
.map(nl => MetaProposalList.of(MetaProposal(mt, nl)))
|
||||
.getOrElse(empty)
|
||||
|
||||
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList = {
|
||||
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList =
|
||||
new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) }))
|
||||
}
|
||||
|
||||
def flatten(ml: Seq[MetaProposalList]): MetaProposalList = {
|
||||
val init: Map[MetaProposalType, MetaProposal] = Map.empty
|
||||
|
||||
def updateMap(map: Map[MetaProposalType, MetaProposal], mp: MetaProposal): Map[MetaProposalType, MetaProposal] =
|
||||
def updateMap(
|
||||
map: Map[MetaProposalType, MetaProposal],
|
||||
mp: MetaProposal
|
||||
): Map[MetaProposalType, MetaProposal] =
|
||||
map.get(mp.proposalType) match {
|
||||
case Some(mp0) => map.updated(mp.proposalType, mp0.addIdRef(mp.values.toList))
|
||||
case None => map.updated(mp.proposalType, mp)
|
||||
case None => map.updated(mp.proposalType, mp)
|
||||
}
|
||||
|
||||
val merged = ml.foldLeft(init) { (map, el) =>
|
||||
|
@ -10,25 +10,25 @@ sealed trait MetaProposalType { self: Product =>
|
||||
|
||||
object MetaProposalType {
|
||||
|
||||
case object CorrOrg extends MetaProposalType
|
||||
case object CorrOrg extends MetaProposalType
|
||||
case object CorrPerson extends MetaProposalType
|
||||
case object ConcPerson extends MetaProposalType
|
||||
case object ConcEquip extends MetaProposalType
|
||||
case object DocDate extends MetaProposalType
|
||||
case object DueDate extends MetaProposalType
|
||||
case object ConcEquip extends MetaProposalType
|
||||
case object DocDate extends MetaProposalType
|
||||
case object DueDate extends MetaProposalType
|
||||
|
||||
val all: List[MetaProposalType] =
|
||||
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
|
||||
|
||||
def fromString(str: String): Either[String, MetaProposalType] =
|
||||
str.toLowerCase match {
|
||||
case "corrorg" => Right(CorrOrg)
|
||||
case "corrorg" => Right(CorrOrg)
|
||||
case "corrperson" => Right(CorrPerson)
|
||||
case "concperson" => Right(ConcPerson)
|
||||
case "concequip" => Right(ConcEquip)
|
||||
case "docdate" => Right(DocDate)
|
||||
case "duedate" => Right(DueDate)
|
||||
case _ => Left(s"Invalid item-proposal-type: $str")
|
||||
case "concequip" => Right(ConcEquip)
|
||||
case "docdate" => Right(DocDate)
|
||||
case "duedate" => Right(DueDate)
|
||||
case _ => Left(s"Invalid item-proposal-type: $str")
|
||||
}
|
||||
|
||||
def unsafe(str: String): MetaProposalType =
|
||||
|
@ -11,31 +11,30 @@ sealed trait NerTag { self: Product =>
|
||||
object NerTag {
|
||||
|
||||
case object Organization extends NerTag
|
||||
case object Person extends NerTag
|
||||
case object Location extends NerTag
|
||||
case object Misc extends NerTag
|
||||
case object Email extends NerTag
|
||||
case object Website extends NerTag
|
||||
case object Date extends NerTag
|
||||
case object Person extends NerTag
|
||||
case object Location extends NerTag
|
||||
case object Misc extends NerTag
|
||||
case object Email extends NerTag
|
||||
case object Website extends NerTag
|
||||
case object Date extends NerTag
|
||||
|
||||
val all: List[NerTag] = List(Organization, Person, Location)
|
||||
|
||||
def fromString(str: String): Either[String, NerTag] =
|
||||
str.toLowerCase match {
|
||||
case "organization" => Right(Organization)
|
||||
case "person" => Right(Person)
|
||||
case "location" => Right(Location)
|
||||
case "misc" => Right(Misc)
|
||||
case "email" => Right(Email)
|
||||
case "website" => Right(Website)
|
||||
case "date" => Right(Date)
|
||||
case _ => Left(s"Invalid ner tag: $str")
|
||||
case "person" => Right(Person)
|
||||
case "location" => Right(Location)
|
||||
case "misc" => Right(Misc)
|
||||
case "email" => Right(Email)
|
||||
case "website" => Right(Website)
|
||||
case "date" => Right(Date)
|
||||
case _ => Left(s"Invalid ner tag: $str")
|
||||
}
|
||||
|
||||
def unsafe(str: String): NerTag =
|
||||
fromString(str).fold(sys.error, identity)
|
||||
|
||||
|
||||
implicit val jsonDecoder: Decoder[NerTag] =
|
||||
Decoder.decodeString.emap(fromString)
|
||||
implicit val jsonEncoder: Encoder[NerTag] =
|
||||
|
@ -24,12 +24,14 @@ object Implicits {
|
||||
ConfigReader[String].emap(reason(Ident.fromString))
|
||||
|
||||
implicit val byteVectorReader: ConfigReader[ByteVector] =
|
||||
ConfigReader[String].emap(reason(str => {
|
||||
ConfigReader[String].emap(reason { str =>
|
||||
if (str.startsWith("hex:")) ByteVector.fromHex(str.drop(4)).toRight("Invalid hex value.")
|
||||
else if (str.startsWith("b64:")) ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
|
||||
else if (str.startsWith("b64:"))
|
||||
ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
|
||||
else ByteVector.encodeUtf8(str).left.map(ex => s"Invalid utf8 string: ${ex.getMessage}")
|
||||
}))
|
||||
})
|
||||
|
||||
def reason[A: ClassTag](f: String => Either[String, A]): String => Either[FailureReason, A] =
|
||||
in => f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
|
||||
in =>
|
||||
f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
|
||||
}
|
||||
|
@ -2,9 +2,6 @@ package docspell.common
|
||||
|
||||
package object syntax {
|
||||
|
||||
object all extends EitherSyntax
|
||||
with StreamSyntax
|
||||
with StringSyntax
|
||||
with LoggerSyntax
|
||||
object all extends EitherSyntax with StreamSyntax with StringSyntax with LoggerSyntax
|
||||
|
||||
}
|
||||
|
@ -16,7 +16,6 @@ object QueryParam {
|
||||
implicit val queryStringDecoder: QueryParamDecoder[QueryString] =
|
||||
QueryParamDecoder[String].map(s => QueryString(s.trim.toLowerCase))
|
||||
|
||||
|
||||
// implicit val booleanDecoder: QueryParamDecoder[Boolean] =
|
||||
// QueryParamDecoder.fromUnsafeCast(qp => Option(qp.value).exists(_.equalsIgnoreCase("true")))(
|
||||
// "Boolean"
|
||||
|
@ -139,8 +139,7 @@ object ItemRoutes {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final implicit class OptionString(opt: Option[String]) {
|
||||
implicit final class OptionString(opt: Option[String]) {
|
||||
def notEmpty: Option[String] =
|
||||
opt.map(_.trim).filter(_.nonEmpty)
|
||||
}
|
||||
|
@ -24,13 +24,13 @@ object MailSendRoutes {
|
||||
HttpRoutes.of {
|
||||
case req @ POST -> Root / Ident(name) / Ident(id) =>
|
||||
for {
|
||||
in <- req.as[SimpleMail]
|
||||
in <- req.as[SimpleMail]
|
||||
mail = convertIn(id, in)
|
||||
res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m))
|
||||
res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m))
|
||||
resp <- res.fold(
|
||||
err => Ok(BasicResult(false, s"Invalid mail data: $err")),
|
||||
res => Ok(convertOut(res))
|
||||
)
|
||||
err => Ok(BasicResult(false, s"Invalid mail data: $err")),
|
||||
res => Ok(convertOut(res))
|
||||
)
|
||||
} yield resp
|
||||
}
|
||||
}
|
||||
@ -39,7 +39,7 @@ object MailSendRoutes {
|
||||
for {
|
||||
rec <- s.recipients.traverse(EmilUtil.readMailAddress)
|
||||
fileIds <- s.attachmentIds.traverse(Ident.fromString)
|
||||
sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds)
|
||||
sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds)
|
||||
} yield ItemMail(item, s.subject, rec, s.body, sel)
|
||||
|
||||
def convertOut(res: SendResult): BasicResult =
|
||||
|
@ -29,7 +29,7 @@ object MailSettingsRoutes {
|
||||
case GET -> Root :? QueryParam.QueryOpt(q) =>
|
||||
for {
|
||||
list <- backend.mail.getSettings(user.account, q.map(_.q))
|
||||
res = list.map(convert)
|
||||
res = list.map(convert)
|
||||
resp <- Ok(EmailSettingsList(res.toList))
|
||||
} yield resp
|
||||
|
||||
@ -45,13 +45,13 @@ object MailSettingsRoutes {
|
||||
ru = makeSettings(in)
|
||||
up <- OptionT.liftF(ru.traverse(r => backend.mail.createSettings(user.account, r)))
|
||||
resp <- OptionT.liftF(
|
||||
Ok(
|
||||
up.fold(
|
||||
err => BasicResult(false, err),
|
||||
ar => Conversions.basicResult(ar, "Mail settings stored.")
|
||||
)
|
||||
)
|
||||
)
|
||||
Ok(
|
||||
up.fold(
|
||||
err => BasicResult(false, err),
|
||||
ar => Conversions.basicResult(ar, "Mail settings stored.")
|
||||
)
|
||||
)
|
||||
)
|
||||
} yield resp).getOrElseF(NotFound())
|
||||
|
||||
case req @ PUT -> Root / Ident(name) =>
|
||||
@ -60,24 +60,24 @@ object MailSettingsRoutes {
|
||||
ru = makeSettings(in)
|
||||
up <- OptionT.liftF(ru.traverse(r => backend.mail.updateSettings(user.account, name, r)))
|
||||
resp <- OptionT.liftF(
|
||||
Ok(
|
||||
up.fold(
|
||||
err => BasicResult(false, err),
|
||||
n =>
|
||||
if (n > 0) BasicResult(true, "Mail settings stored.")
|
||||
else BasicResult(false, "Mail settings could not be saved")
|
||||
)
|
||||
)
|
||||
)
|
||||
Ok(
|
||||
up.fold(
|
||||
err => BasicResult(false, err),
|
||||
n =>
|
||||
if (n > 0) BasicResult(true, "Mail settings stored.")
|
||||
else BasicResult(false, "Mail settings could not be saved")
|
||||
)
|
||||
)
|
||||
)
|
||||
} yield resp).getOrElseF(NotFound())
|
||||
|
||||
case DELETE -> Root / Ident(name) =>
|
||||
for {
|
||||
n <- backend.mail.deleteSettings(user.account, name)
|
||||
resp <- Ok(
|
||||
if (n > 0) BasicResult(true, "Mail settings removed")
|
||||
else BasicResult(false, "Mail settings could not be removed")
|
||||
)
|
||||
if (n > 0) BasicResult(true, "Mail settings removed")
|
||||
else BasicResult(false, "Mail settings could not be removed")
|
||||
)
|
||||
} yield resp
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@ object SentMailRoutes {
|
||||
HttpRoutes.of {
|
||||
case GET -> Root / "item" / Ident(id) =>
|
||||
for {
|
||||
all <- backend.mail.getSentMailsForItem(user.account, id)
|
||||
all <- backend.mail.getSentMailsForItem(user.account, id)
|
||||
resp <- Ok(SentMails(all.map(convert).toList))
|
||||
} yield resp
|
||||
|
||||
@ -35,7 +35,7 @@ object SentMailRoutes {
|
||||
|
||||
case DELETE -> Root / "mail" / Ident(mailId) =>
|
||||
for {
|
||||
n <- backend.mail.deleteSentMail(user.account, mailId)
|
||||
n <- backend.mail.deleteSentMail(user.account, mailId)
|
||||
resp <- Ok(BasicResult(n > 0, s"Mails deleted: $n"))
|
||||
} yield resp
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ object TemplateRoutes {
|
||||
new InnerRoutes[F] {
|
||||
def doc =
|
||||
HttpRoutes.of[F] {
|
||||
case GET -> Root =>
|
||||
case GET -> Root =>
|
||||
for {
|
||||
templ <- docTemplate
|
||||
resp <- Ok(DocData().render(templ), `Content-Type`(`text/html`))
|
||||
|
@ -194,8 +194,9 @@ object QItem {
|
||||
IC.cid.prefix("i").is(q.collective),
|
||||
IC.state.prefix("i").isOneOf(q.states),
|
||||
IC.incoming.prefix("i").isOrDiscard(q.direction),
|
||||
name.map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n))).
|
||||
getOrElse(Fragment.empty),
|
||||
name
|
||||
.map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n)))
|
||||
.getOrElse(Fragment.empty),
|
||||
RPerson.Columns.pid.prefix("p0").isOrDiscard(q.corrPerson),
|
||||
ROrganization.Columns.oid.prefix("o0").isOrDiscard(q.corrOrg),
|
||||
RPerson.Columns.pid.prefix("p1").isOrDiscard(q.concPerson),
|
||||
|
@ -8,12 +8,12 @@ object RFileMeta {
|
||||
val table = fr"filemeta"
|
||||
|
||||
object Columns {
|
||||
val id = Column("id")
|
||||
val id = Column("id")
|
||||
val timestamp = Column("timestamp")
|
||||
val mimetype = Column("mimetype")
|
||||
val length = Column("length")
|
||||
val checksum = Column("checksum")
|
||||
val chunks = Column("chunks")
|
||||
val mimetype = Column("mimetype")
|
||||
val length = Column("length")
|
||||
val checksum = Column("checksum")
|
||||
val chunks = Column("chunks")
|
||||
val chunksize = Column("chunksize")
|
||||
|
||||
val all = List(id, timestamp, mimetype, length, checksum, chunks, chunksize)
|
||||
|
@ -52,8 +52,16 @@ object RSentMail {
|
||||
for {
|
||||
user <- OptionT(RUser.findByAccount(accId))
|
||||
sm <- OptionT.liftF(
|
||||
RSentMail[ConnectionIO](user.uid, messageId, sender, connName, subject, recipients, body)
|
||||
)
|
||||
RSentMail[ConnectionIO](
|
||||
user.uid,
|
||||
messageId,
|
||||
sender,
|
||||
connName,
|
||||
subject,
|
||||
recipients,
|
||||
body
|
||||
)
|
||||
)
|
||||
si <- OptionT.liftF(RSentMailItem[ConnectionIO](itemId, sm.id, Some(sm.created)))
|
||||
} yield (sm, si)
|
||||
|
||||
|
@ -9,43 +9,47 @@ object Contact {
|
||||
private[this] val protocols = Set("ftp", "http", "https")
|
||||
|
||||
def annotate(text: String): Vector[NerLabel] =
|
||||
TextSplitter.splitToken[Nothing](text, " \t\r\n".toSet).
|
||||
map({ token =>
|
||||
if (isEmailAddress(token.value)) NerLabel(token.value, NerTag.Email, token.begin, token.end).some
|
||||
else if (isWebsite(token.value)) NerLabel(token.value, NerTag.Website, token.begin, token.end).some
|
||||
TextSplitter
|
||||
.splitToken[Nothing](text, " \t\r\n".toSet)
|
||||
.map({ token =>
|
||||
if (isEmailAddress(token.value))
|
||||
NerLabel(token.value, NerTag.Email, token.begin, token.end).some
|
||||
else if (isWebsite(token.value))
|
||||
NerLabel(token.value, NerTag.Website, token.begin, token.end).some
|
||||
else None
|
||||
}).
|
||||
flatMap(_.map(Stream.emit).getOrElse(Stream.empty)).
|
||||
toVector
|
||||
|
||||
})
|
||||
.flatMap(_.map(Stream.emit).getOrElse(Stream.empty))
|
||||
.toVector
|
||||
|
||||
def isEmailAddress(str: String): Boolean = {
|
||||
val atIdx = str.indexOf('@')
|
||||
if (atIdx <= 0 || str.indexOf('@', atIdx + 1) > 0) false
|
||||
else {
|
||||
val name = str.substring(0, atIdx)
|
||||
val dom = str.substring(atIdx + 1)
|
||||
val dom = str.substring(atIdx + 1)
|
||||
Domain.isDomain(dom) && name.forall(c => !c.isWhitespace)
|
||||
}
|
||||
}
|
||||
|
||||
def isWebsite(str: String): Boolean =
|
||||
LenientUri.parse(str).
|
||||
toOption.
|
||||
map(uri => protocols.contains(uri.scheme.head)).
|
||||
getOrElse(Domain.isDomain(str))
|
||||
LenientUri
|
||||
.parse(str)
|
||||
.toOption
|
||||
.map(uri => protocols.contains(uri.scheme.head))
|
||||
.getOrElse(Domain.isDomain(str))
|
||||
|
||||
def isDocspellOpenUpload(str: String): Boolean = {
|
||||
def isUploadPath(p: LenientUri.Path): Boolean =
|
||||
p match {
|
||||
case LenientUri.RootPath => false
|
||||
case LenientUri.RootPath => false
|
||||
case LenientUri.EmptyPath => false
|
||||
case LenientUri.NonEmptyPath(segs) =>
|
||||
Ident.fromString(segs.last).isRight &&
|
||||
segs.init.takeRight(3) == List("open", "upload", "item")
|
||||
}
|
||||
LenientUri.parse(str).
|
||||
toOption.
|
||||
exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path))
|
||||
LenientUri
|
||||
.parse(str)
|
||||
.toOption
|
||||
.exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path))
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ private[text] object Tld {
|
||||
/**
|
||||
* Some selected TLDs.
|
||||
*/
|
||||
private [this] val known = List(
|
||||
private[this] val known = List(
|
||||
".com",
|
||||
".org",
|
||||
".net",
|
||||
|
@ -10,16 +10,22 @@ import scala.util.Try
|
||||
|
||||
object DateFind {
|
||||
|
||||
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] = {
|
||||
TextSplitter.splitToken(text, " \t.,\n\r/".toSet).
|
||||
sliding(3).
|
||||
filter(_.length == 3).
|
||||
map(q => SimpleDate.fromParts(q.toList, lang).
|
||||
map(sd => NerDateLabel(sd.toLocalDate,
|
||||
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)))).
|
||||
collect({ case Some(d) => d })
|
||||
}
|
||||
|
||||
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
|
||||
TextSplitter
|
||||
.splitToken(text, " \t.,\n\r/".toSet)
|
||||
.sliding(3)
|
||||
.filter(_.length == 3)
|
||||
.map(q =>
|
||||
SimpleDate
|
||||
.fromParts(q.toList, lang)
|
||||
.map(sd =>
|
||||
NerDateLabel(
|
||||
sd.toLocalDate,
|
||||
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)
|
||||
)
|
||||
)
|
||||
)
|
||||
.collect({ case Some(d) => d })
|
||||
|
||||
private case class SimpleDate(year: Int, month: Int, day: Int) {
|
||||
def toLocalDate: LocalDate =
|
||||
@ -27,13 +33,13 @@ object DateFind {
|
||||
}
|
||||
|
||||
private object SimpleDate {
|
||||
val p0 = readYear >> readMonth >> readDay map {
|
||||
val p0 = (readYear >> readMonth >> readDay).map {
|
||||
case ((y, m), d) => SimpleDate(y, m, d)
|
||||
}
|
||||
val p1 = readDay >> readMonth >> readYear map {
|
||||
val p1 = (readDay >> readMonth >> readYear).map {
|
||||
case ((d, m), y) => SimpleDate(y, m, d)
|
||||
}
|
||||
val p2 = readMonth >> readDay >> readYear map {
|
||||
val p2 = (readMonth >> readDay >> readYear).map {
|
||||
case ((m, d), y) => SimpleDate(y, m, d)
|
||||
}
|
||||
|
||||
@ -46,14 +52,14 @@ object DateFind {
|
||||
p.read(parts).toOption
|
||||
}
|
||||
|
||||
|
||||
def readYear: Reader[Int] = {
|
||||
Reader.readFirst(w => w.value.length match {
|
||||
case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption
|
||||
case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption
|
||||
case _ => None
|
||||
})
|
||||
}
|
||||
def readYear: Reader[Int] =
|
||||
Reader.readFirst(w =>
|
||||
w.value.length match {
|
||||
case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption
|
||||
case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption
|
||||
case _ => None
|
||||
}
|
||||
)
|
||||
|
||||
def readMonth: Reader[Int] =
|
||||
Reader.readFirst(w => Some(months.indexWhere(_.contains(w.value))).filter(_ > 0).map(_ + 1))
|
||||
@ -69,10 +75,12 @@ object DateFind {
|
||||
Reader(read.andThen(_.map(f)))
|
||||
|
||||
def or(other: Reader[A]): Reader[A] =
|
||||
Reader(words => read(words) match {
|
||||
case Result.Failure => other.read(words)
|
||||
case s @ Result.Success(_, _) => s
|
||||
})
|
||||
Reader(words =>
|
||||
read(words) match {
|
||||
case Result.Failure => other.read(words)
|
||||
case s @ Result.Success(_, _) => s
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
object Reader {
|
||||
@ -81,12 +89,11 @@ object DateFind {
|
||||
|
||||
def readFirst[A](f: Word => Option[A]): Reader[A] =
|
||||
Reader({
|
||||
case Nil => Result.Failure
|
||||
case Nil => Result.Failure
|
||||
case a :: as => f(a).map(value => Result.Success(value, as)).getOrElse(Result.Failure)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
sealed trait Result[+A] {
|
||||
def toOption: Option[A]
|
||||
def map[B](f: A => B): Result[B]
|
||||
@ -95,14 +102,14 @@ object DateFind {
|
||||
|
||||
object Result {
|
||||
final case class Success[A](value: A, rest: List[Word]) extends Result[A] {
|
||||
val toOption = Some(value)
|
||||
val toOption = Some(value)
|
||||
def map[B](f: A => B): Result[B] = Success(f(value), rest)
|
||||
def next[B](r: Reader[B]): Result[(A, B)] =
|
||||
r.read(rest).map(b => (value, b))
|
||||
}
|
||||
final case object Failure extends Result[Nothing] {
|
||||
val toOption = None
|
||||
def map[B](f: Nothing => B): Result[B] = this
|
||||
val toOption = None
|
||||
def map[B](f: Nothing => B): Result[B] = this
|
||||
def next[B](r: Reader[B]): Result[(Nothing, B)] = this
|
||||
}
|
||||
}
|
||||
|
@ -14,23 +14,28 @@ import java.net.URL
|
||||
import scala.util.Using
|
||||
|
||||
object StanfordNerClassifier {
|
||||
private [this] val logger = getLogger
|
||||
private[this] val logger = getLogger
|
||||
|
||||
lazy val germanNerClassifier = makeClassifier(Language.German)
|
||||
lazy val germanNerClassifier = makeClassifier(Language.German)
|
||||
lazy val englishNerClassifier = makeClassifier(Language.English)
|
||||
|
||||
def nerAnnotate(lang: Language)(text: String): Vector[NerLabel] = {
|
||||
val nerClassifier = lang match {
|
||||
case Language.English => englishNerClassifier
|
||||
case Language.German => germanNerClassifier
|
||||
case Language.German => germanNerClassifier
|
||||
}
|
||||
nerClassifier.classify(text).asScala.flatMap(a => a.asScala).
|
||||
collect(Function.unlift(label => {
|
||||
nerClassifier
|
||||
.classify(text)
|
||||
.asScala
|
||||
.flatMap(a => a.asScala)
|
||||
.collect(Function.unlift { label =>
|
||||
val tag = label.get(classOf[CoreAnnotations.AnswerAnnotation])
|
||||
NerTag.fromString(Option(tag).getOrElse("")).toOption.
|
||||
map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition()))
|
||||
})).
|
||||
toVector
|
||||
NerTag
|
||||
.fromString(Option(tag).getOrElse(""))
|
||||
.toOption
|
||||
.map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition()))
|
||||
})
|
||||
.toVector
|
||||
}
|
||||
|
||||
private def makeClassifier(lang: Language): AbstractSequenceClassifier[CoreLabel] = {
|
||||
@ -48,7 +53,9 @@ object StanfordNerClassifier {
|
||||
|
||||
check(lang match {
|
||||
case Language.German =>
|
||||
getClass.getResource("/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz")
|
||||
getClass.getResource(
|
||||
"/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz"
|
||||
)
|
||||
case Language.English =>
|
||||
getClass.getResource("/edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz")
|
||||
})
|
||||
|
@ -5,11 +5,11 @@ import java.nio.file.{Path, Paths}
|
||||
import docspell.common._
|
||||
|
||||
case class Config(
|
||||
allowedContentTypes: Set[MimeType]
|
||||
, ghostscript: Config.Ghostscript
|
||||
, pageRange: Config.PageRange
|
||||
, unpaper: Config.Unpaper
|
||||
, tesseract: Config.Tesseract
|
||||
allowedContentTypes: Set[MimeType],
|
||||
ghostscript: Config.Ghostscript,
|
||||
pageRange: Config.PageRange,
|
||||
unpaper: Config.Unpaper,
|
||||
tesseract: Config.Tesseract
|
||||
) {
|
||||
|
||||
def isAllowed(mt: MimeType): Boolean =
|
||||
@ -22,7 +22,7 @@ object Config {
|
||||
case class Command(program: String, args: Seq[String], timeout: Duration) {
|
||||
|
||||
def mapArgs(f: String => String): Command =
|
||||
Command(program, args map f, timeout)
|
||||
Command(program, args.map(f), timeout)
|
||||
|
||||
def toCmd: List[String] =
|
||||
program :: args.toList
|
||||
@ -44,23 +44,23 @@ object Config {
|
||||
),
|
||||
pageRange = PageRange(10),
|
||||
ghostscript = Ghostscript(
|
||||
Command("gs", Seq("-dNOPAUSE"
|
||||
, "-dBATCH"
|
||||
, "-dSAFER"
|
||||
, "-sDEVICE=tiffscaled8"
|
||||
, "-sOutputFile={{outfile}}"
|
||||
, "{{infile}}"),
|
||||
Duration.seconds(30)),
|
||||
Paths.get(System.getProperty("java.io.tmpdir")).
|
||||
resolve("docspell-extraction")),
|
||||
unpaper = Unpaper(Command("unpaper"
|
||||
, Seq("{{infile}}", "{{outfile}}")
|
||||
, Duration.seconds(30))),
|
||||
Command(
|
||||
"gs",
|
||||
Seq(
|
||||
"-dNOPAUSE",
|
||||
"-dBATCH",
|
||||
"-dSAFER",
|
||||
"-sDEVICE=tiffscaled8",
|
||||
"-sOutputFile={{outfile}}",
|
||||
"{{infile}}"
|
||||
),
|
||||
Duration.seconds(30)
|
||||
),
|
||||
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
||||
),
|
||||
unpaper = Unpaper(Command("unpaper", Seq("{{infile}}", "{{outfile}}"), Duration.seconds(30))),
|
||||
tesseract = Tesseract(
|
||||
Command("tesseract", Seq("{{file}}"
|
||||
, "stdout"
|
||||
, "-l"
|
||||
, "{{lang}}"),
|
||||
Duration.minutes(1)))
|
||||
Command("tesseract", Seq("{{file}}", "stdout", "-l", "{{lang}}"), Duration.minutes(1))
|
||||
)
|
||||
)
|
||||
}
|
||||
|
@ -11,71 +11,106 @@ object Ocr {
|
||||
|
||||
/** Extract the text of all pages in the given pdf file.
|
||||
*/
|
||||
def extractPdf[F[_]: Sync: ContextShift](pdf: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] =
|
||||
def extractPdf[F[_]: Sync: ContextShift](
|
||||
pdf: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
|
||||
runGhostscript(pdf, config, wd, blocker).
|
||||
flatMap({ tmpImg =>
|
||||
runGhostscript(pdf, config, wd, blocker)
|
||||
.flatMap({ tmpImg =>
|
||||
runTesseractFile(tmpImg, blocker, lang, config)
|
||||
}).
|
||||
fold1(_ + "\n\n\n" + _)
|
||||
})
|
||||
.fold1(_ + "\n\n\n" + _)
|
||||
}
|
||||
|
||||
/** Extract the text from the given image file
|
||||
*/
|
||||
def extractImage[F[_]: Sync: ContextShift](img: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] =
|
||||
def extractImage[F[_]: Sync: ContextShift](
|
||||
img: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
runTesseractStdin(img, blocker, lang, config)
|
||||
|
||||
|
||||
def extractPdFFile[F[_]: Sync: ContextShift](pdf: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] =
|
||||
def extractPdFFile[F[_]: Sync: ContextShift](
|
||||
pdf: Path,
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
|
||||
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker).
|
||||
flatMap({ tif =>
|
||||
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker)
|
||||
.flatMap({ tif =>
|
||||
runTesseractFile(tif, blocker, lang, config)
|
||||
}).
|
||||
fold1(_ + "\n\n\n" + _)
|
||||
})
|
||||
.fold1(_ + "\n\n\n" + _)
|
||||
}
|
||||
|
||||
def extractImageFile[F[_]: Sync: ContextShift](img: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] =
|
||||
def extractImageFile[F[_]: Sync: ContextShift](
|
||||
img: Path,
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
runTesseractFile(img, blocker, lang, config)
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runGhostscript[F[_]: Sync: ContextShift](
|
||||
pdf: Stream[F, Byte]
|
||||
, cfg: Config
|
||||
, wd: Path
|
||||
, blocker: Blocker): Stream[F, Path] = {
|
||||
pdf: Stream[F, Byte],
|
||||
cfg: Config,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
): Stream[F, Path] = {
|
||||
val xargs =
|
||||
if (cfg.pageRange.begin > 0) s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args
|
||||
if (cfg.pageRange.begin > 0)
|
||||
s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args
|
||||
else cfg.ghostscript.command.args
|
||||
val cmd = cfg.ghostscript.command.copy(args = xargs).mapArgs(replace(Map(
|
||||
"{{infile}}" -> "-",
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)))
|
||||
SystemCommand.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf).
|
||||
evalMap({ _ =>
|
||||
val cmd = cfg.ghostscript.command
|
||||
.copy(args = xargs)
|
||||
.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> "-",
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf)
|
||||
.evalMap({ _ =>
|
||||
File.listFiles(pathEndsWith(".tif"), wd)
|
||||
}).
|
||||
flatMap(fs => Stream.emits(fs))
|
||||
})
|
||||
.flatMap(fs => Stream.emits(fs))
|
||||
}
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runGhostscriptFile[F[_]: Sync: ContextShift](
|
||||
pdf: Path
|
||||
, ghostscript: Config.Command
|
||||
, wd: Path, blocker: Blocker): Stream[F, Path] = {
|
||||
val cmd = ghostscript.mapArgs(replace(Map(
|
||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)))
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).
|
||||
evalMap({ _ =>
|
||||
pdf: Path,
|
||||
ghostscript: Config.Command,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
): Stream[F, Path] = {
|
||||
val cmd = ghostscript.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess[F](cmd, blocker, wd = Some(wd))
|
||||
.evalMap({ _ =>
|
||||
File.listFiles(pathEndsWith(".tif"), wd)
|
||||
}).
|
||||
flatMap(fs => Stream.emits(fs))
|
||||
})
|
||||
.flatMap(fs => Stream.emits(fs))
|
||||
}
|
||||
|
||||
private def pathEndsWith(ext: String): Path => Boolean =
|
||||
@ -84,65 +119,72 @@ object Ocr {
|
||||
/** Run unpaper to optimize the image for ocr. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](img: Path
|
||||
, unpaper: Config.Command
|
||||
, wd: Path, blocker: Blocker): Stream[F, Path] = {
|
||||
val targetFile = img.resolveSibling("u-"+ img.getFileName.toString).toAbsolutePath
|
||||
val cmd = unpaper.mapArgs(replace(Map(
|
||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)))
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).
|
||||
map(_ => targetFile).
|
||||
handleErrorWith(th => {
|
||||
logger.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
|
||||
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](
|
||||
img: Path,
|
||||
unpaper: Config.Command,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
): Stream[F, Path] = {
|
||||
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
||||
val cmd = unpaper.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)
|
||||
)
|
||||
)
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).map(_ => targetFile).handleErrorWith {
|
||||
th =>
|
||||
logger
|
||||
.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
|
||||
Stream.emit(img)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
*/
|
||||
private[text] def runTesseractFile[F[_]: Sync: ContextShift](
|
||||
img: Path
|
||||
, blocker: Blocker
|
||||
, lang: String
|
||||
, config: Config): Stream[F, String] = {
|
||||
img: Path,
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
// tesseract cannot cope with absolute filenames
|
||||
// so use the parent as working dir
|
||||
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).
|
||||
flatMap(uimg => {
|
||||
val cmd = config.tesseract.command.mapArgs(replace(Map(
|
||||
"{{file}}" -> uimg.getFileName.toString
|
||||
, "{{lang}}" -> fixLanguage(lang))))
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
|
||||
})
|
||||
}
|
||||
|
||||
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).flatMap { uimg =>
|
||||
val cmd = config.tesseract.command.mapArgs(
|
||||
replace(Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)))
|
||||
)
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
*/
|
||||
private[text] def runTesseractStdin[F[_]: Sync: ContextShift](
|
||||
img: Stream[F, Byte]
|
||||
, blocker: Blocker
|
||||
, lang: String
|
||||
, config: Config): Stream[F, String] = {
|
||||
val cmd = config.tesseract.command.mapArgs(replace(Map(
|
||||
"{{file}}" -> "stdin"
|
||||
, "{{lang}}" -> fixLanguage(lang))))
|
||||
img: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] = {
|
||||
val cmd = config.tesseract.command
|
||||
.mapArgs(replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang))))
|
||||
SystemCommand.execSuccess(cmd, blocker, stdin = img).map(_.stdout)
|
||||
}
|
||||
|
||||
private def replace(repl: Map[String, String]): String => String =
|
||||
s => repl.foldLeft(s) { case (res, (k, v)) =>
|
||||
res.replace(k, v)
|
||||
}
|
||||
s =>
|
||||
repl.foldLeft(s) {
|
||||
case (res, (k, v)) =>
|
||||
res.replace(k, v)
|
||||
}
|
||||
|
||||
private def fixLanguage(lang: String): String =
|
||||
lang match {
|
||||
case "de" => "deu"
|
||||
case "en" => "eng"
|
||||
case l => l
|
||||
case l => l
|
||||
}
|
||||
}
|
||||
|
@ -16,57 +16,87 @@ object SystemCommand {
|
||||
|
||||
final case class Result(rc: Int, stdout: String, stderr: String)
|
||||
|
||||
def exec[F[_]: Sync: ContextShift]( cmd: Config.Command
|
||||
, blocker: Blocker
|
||||
, wd: Option[Path] = None
|
||||
, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] =
|
||||
startProcess(cmd, wd){ proc =>
|
||||
def exec[F[_]: Sync: ContextShift](
|
||||
cmd: Config.Command,
|
||||
blocker: Blocker,
|
||||
wd: Option[Path] = None,
|
||||
stdin: Stream[F, Byte] = Stream.empty
|
||||
): Stream[F, Result] =
|
||||
startProcess(cmd, wd) { proc =>
|
||||
Stream.eval {
|
||||
for {
|
||||
_ <- writeToProcess(stdin, proc, blocker)
|
||||
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
|
||||
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
||||
else logger.fwarn(s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!")
|
||||
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
|
||||
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("")
|
||||
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("")
|
||||
_ <- writeToProcess(stdin, proc, blocker)
|
||||
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
|
||||
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
||||
else
|
||||
logger.fwarn(
|
||||
s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!"
|
||||
)
|
||||
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
|
||||
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("")
|
||||
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("")
|
||||
} yield Result(proc.exitValue, out, err)
|
||||
}
|
||||
}
|
||||
|
||||
def execSuccess[F[_]: Sync: ContextShift](cmd: Config.Command, blocker: Blocker, wd: Option[Path] = None, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] =
|
||||
def execSuccess[F[_]: Sync: ContextShift](
|
||||
cmd: Config.Command,
|
||||
blocker: Blocker,
|
||||
wd: Option[Path] = None,
|
||||
stdin: Stream[F, Byte] = Stream.empty
|
||||
): Stream[F, Result] =
|
||||
exec(cmd, blocker, wd, stdin).flatMap { r =>
|
||||
if (r.rc != 0) Stream.raiseError[F](new Exception(s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}"))
|
||||
if (r.rc != 0)
|
||||
Stream.raiseError[F](
|
||||
new Exception(
|
||||
s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}"
|
||||
)
|
||||
)
|
||||
else Stream.emit(r)
|
||||
}
|
||||
|
||||
private def startProcess[F[_]: Sync,A](cmd: Config.Command, wd: Option[Path])(f: Process => Stream[F,A]): Stream[F, A] = {
|
||||
private def startProcess[F[_]: Sync, A](cmd: Config.Command, wd: Option[Path])(
|
||||
f: Process => Stream[F, A]
|
||||
): Stream[F, A] = {
|
||||
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
|
||||
val proc = log *> Sync[F].delay {
|
||||
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
||||
wd.map(_.toFile).foreach(pb.directory)
|
||||
pb.start()
|
||||
}
|
||||
Stream.bracket(proc)(p => logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||
p.destroy()
|
||||
}).flatMap(f)
|
||||
Stream
|
||||
.bracket(proc)(p =>
|
||||
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||
p.destroy()
|
||||
}
|
||||
)
|
||||
.flatMap(f)
|
||||
}
|
||||
|
||||
private def inputStreamToString[F[_]: Sync: ContextShift](in: InputStream, blocker: Blocker): F[String] =
|
||||
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false).
|
||||
through(text.utf8Decode).
|
||||
chunks.
|
||||
map(_.toVector.mkString).
|
||||
fold1(_ + _).
|
||||
compile.last.
|
||||
map(_.getOrElse(""))
|
||||
private def inputStreamToString[F[_]: Sync: ContextShift](
|
||||
in: InputStream,
|
||||
blocker: Blocker
|
||||
): F[String] =
|
||||
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
|
||||
.through(text.utf8Decode)
|
||||
.chunks
|
||||
.map(_.toVector.mkString)
|
||||
.fold1(_ + _)
|
||||
.compile
|
||||
.last
|
||||
.map(_.getOrElse(""))
|
||||
|
||||
private def writeToProcess[F[_]: Sync: ContextShift](data: Stream[F, Byte], proc: Process, blocker: Blocker): F[Unit] =
|
||||
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).
|
||||
compile.drain
|
||||
private def writeToProcess[F[_]: Sync: ContextShift](
|
||||
data: Stream[F, Byte],
|
||||
proc: Process,
|
||||
blocker: Blocker
|
||||
): F[Unit] =
|
||||
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).compile.drain
|
||||
|
||||
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config.Command): F[Unit] =
|
||||
Sync[F].delay(proc.destroyForcibly()).attempt *> {
|
||||
Sync[F].raiseError(new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"))
|
||||
Sync[F].raiseError(
|
||||
new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -12,18 +12,17 @@ object TikaMimetype {
|
||||
private val tika = new TikaConfig().getDetector
|
||||
|
||||
private def convert(mt: MediaType): MimeType =
|
||||
Option(mt).map(_.toString).
|
||||
map(MimeType.parse).
|
||||
flatMap(_.toOption).
|
||||
map(normalize).
|
||||
getOrElse(MimeType.octetStream)
|
||||
Option(mt)
|
||||
.map(_.toString)
|
||||
.map(MimeType.parse)
|
||||
.flatMap(_.toOption)
|
||||
.map(normalize)
|
||||
.getOrElse(MimeType.octetStream)
|
||||
|
||||
private def makeMetadata(hint: MimeTypeHint): Metadata = {
|
||||
val md = new Metadata
|
||||
hint.filename.
|
||||
foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
|
||||
hint.advertised.
|
||||
foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
|
||||
hint.filename.foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
|
||||
hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
|
||||
md
|
||||
}
|
||||
|
||||
@ -33,13 +32,10 @@ object TikaMimetype {
|
||||
case _ => in
|
||||
}
|
||||
|
||||
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType = {
|
||||
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType =
|
||||
convert(tika.detect(new java.io.ByteArrayInputStream(bv), makeMetadata(hint)))
|
||||
}
|
||||
|
||||
def detect[F[_]: Sync](data: Stream[F, Byte]): F[MimeType] =
|
||||
data.take(1024).
|
||||
compile.toVector.
|
||||
map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
|
||||
data.take(1024).compile.toVector.map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user