Apply scalafmt to all files

This commit is contained in:
Eike Kettner 2020-02-09 01:54:11 +01:00
parent 6a9ec42a03
commit 5c37efeaba
32 changed files with 442 additions and 362 deletions

View File

@ -32,8 +32,8 @@ object BackendApp {
def create[F[_]: ConcurrentEffect: ContextShift]( def create[F[_]: ConcurrentEffect: ContextShift](
cfg: Config, cfg: Config,
store: Store[F], store: Store[F],
httpClientEc: ExecutionContext, httpClientEc: ExecutionContext,
blocker: Blocker blocker: Blocker
): Resource[F, BackendApp[F]] = ): Resource[F, BackendApp[F]] =
for { for {
queue <- JobQueue(store) queue <- JobQueue(store)

View File

@ -176,7 +176,7 @@ object OItem {
def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] = def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] =
store.transact((for { store.transact((for {
coll <- OptionT(RSource.findCollective(sourceId)) coll <- OptionT(RSource.findCollective(sourceId))
items <- OptionT.liftF(QItem.findByChecksum(checksum, coll)) items <- OptionT.liftF(QItem.findByChecksum(checksum, coll))
} yield items).getOrElse(Vector.empty)) } yield items).getOrElse(Vector.empty))

View File

@ -113,10 +113,10 @@ object OMail {
def createSettings(accId: AccountId, s: SmtpSettings): F[AddResult] = def createSettings(accId: AccountId, s: SmtpSettings): F[AddResult] =
(for { (for {
ru <- OptionT(store.transact(s.toRecord(accId).value)) ru <- OptionT(store.transact(s.toRecord(accId).value))
ins = RUserEmail.insert(ru) ins = RUserEmail.insert(ru)
exists = RUserEmail.exists(ru.uid, ru.name) exists = RUserEmail.exists(ru.uid, ru.name)
res <- OptionT.liftF(store.add(ins, exists)) res <- OptionT.liftF(store.add(ins, exists))
} yield res).getOrElse(AddResult.Failure(new Exception("User not found"))) } yield res).getOrElse(AddResult.Failure(new Exception("User not found")))
def updateSettings(accId: AccountId, name: Ident, data: SmtpSettings): F[Int] = { def updateSettings(accId: AccountId, name: Ident, data: SmtpSettings): F[Int] = {
@ -143,8 +143,10 @@ object OMail {
for { for {
_ <- OptionT.liftF(store.transact(RItem.existsById(m.item))).filter(identity) _ <- OptionT.liftF(store.transact(RItem.existsById(m.item))).filter(identity)
ras <- OptionT.liftF( ras <- OptionT.liftF(
store.transact(RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective)) store.transact(
) RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective)
)
)
} yield { } yield {
val addAttach = m.attach.filter(ras).map { a => val addAttach = m.attach.filter(ras).map { a =>
Attach[F](Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all))) Attach[F](Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all)))
@ -169,15 +171,15 @@ object OMail {
def storeMail(msgId: String, cfg: RUserEmail): F[Either[SendResult, Ident]] = { def storeMail(msgId: String, cfg: RUserEmail): F[Either[SendResult, Ident]] = {
val save = for { val save = for {
data <- RSentMail.forItem( data <- RSentMail.forItem(
m.item, m.item,
accId, accId,
msgId, msgId,
cfg.mailFrom, cfg.mailFrom,
name, name,
m.subject, m.subject,
m.recipients, m.recipients,
m.body m.body
) )
_ <- OptionT.liftF(RSentMail.insert(data._1)) _ <- OptionT.liftF(RSentMail.insert(data._1))
_ <- OptionT.liftF(RSentMailItem.insert(data._2)) _ <- OptionT.liftF(RSentMailItem.insert(data._2))
} yield data._1.id } yield data._1.id
@ -195,7 +197,7 @@ object OMail {
mail <- createMail(mailCfg) mail <- createMail(mailCfg)
mid <- OptionT.liftF(sendMail(mailCfg.toMailConfig, mail)) mid <- OptionT.liftF(sendMail(mailCfg.toMailConfig, mail))
res <- mid.traverse(id => OptionT.liftF(storeMail(id, mailCfg))) res <- mid.traverse(id => OptionT.liftF(storeMail(id, mailCfg)))
conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id))) conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id)))
} yield conv).getOrElse(SendResult.NotFound) } yield conv).getOrElse(SendResult.NotFound)
} }

View File

@ -19,9 +19,9 @@ object AccountId {
case n if n > 0 && input.length > 2 => case n if n > 0 && input.length > 2 =>
val coll = input.substring(0, n) val coll = input.substring(0, n)
val user = input.substring(n + 1) val user = input.substring(n + 1)
Ident.fromString(coll). Ident
flatMap(collId => Ident.fromString(user). .fromString(coll)
map(userId => AccountId(collId, userId))) .flatMap(collId => Ident.fromString(user).map(userId => AccountId(collId, userId)))
case _ => case _ =>
invalid invalid
} }

View File

@ -12,5 +12,4 @@ object BaseJsonCodecs {
implicit val decodeInstantEpoch: Decoder[Instant] = implicit val decodeInstantEpoch: Decoder[Instant] =
Decoder.decodeLong.map(Instant.ofEpochMilli) Decoder.decodeLong.map(Instant.ofEpochMilli)
} }

View File

@ -21,32 +21,29 @@ object CollectiveState {
* action. */ * action. */
case object Blocked extends CollectiveState case object Blocked extends CollectiveState
def fromString(s: String): Either[String, CollectiveState] = def fromString(s: String): Either[String, CollectiveState] =
s.toLowerCase match { s.toLowerCase match {
case "active" => Right(Active) case "active" => Right(Active)
case "readonly" => Right(ReadOnly) case "readonly" => Right(ReadOnly)
case "closed" => Right(Closed) case "closed" => Right(Closed)
case "blocked" => Right(Blocked) case "blocked" => Right(Blocked)
case _ => Left(s"Unknown state: $s") case _ => Left(s"Unknown state: $s")
} }
def unsafe(str: String): CollectiveState = def unsafe(str: String): CollectiveState =
fromString(str).fold(sys.error, identity) fromString(str).fold(sys.error, identity)
def asString(state: CollectiveState): String = state match { def asString(state: CollectiveState): String = state match {
case Active => "active" case Active => "active"
case Blocked => "blocked" case Blocked => "blocked"
case Closed => "closed" case Closed => "closed"
case ReadOnly => "readonly" case ReadOnly => "readonly"
} }
implicit val collectiveStateEncoder: Encoder[CollectiveState] = implicit val collectiveStateEncoder: Encoder[CollectiveState] =
Encoder.encodeString.contramap(CollectiveState.asString) Encoder.encodeString.contramap(CollectiveState.asString)
implicit val collectiveStateDecoder: Decoder[CollectiveState] = implicit val collectiveStateDecoder: Decoder[CollectiveState] =
Decoder.decodeString.emap(CollectiveState.fromString) Decoder.decodeString.emap(CollectiveState.fromString)
} }

View File

@ -10,22 +10,22 @@ sealed trait ContactKind { self: Product =>
object ContactKind { object ContactKind {
val all = List() val all = List()
case object Phone extends ContactKind case object Phone extends ContactKind
case object Mobile extends ContactKind case object Mobile extends ContactKind
case object Fax extends ContactKind case object Fax extends ContactKind
case object Email extends ContactKind case object Email extends ContactKind
case object Docspell extends ContactKind case object Docspell extends ContactKind
case object Website extends ContactKind case object Website extends ContactKind
def fromString(s: String): Either[String, ContactKind] = def fromString(s: String): Either[String, ContactKind] =
s.toLowerCase match { s.toLowerCase match {
case "phone" => Right(Phone) case "phone" => Right(Phone)
case "mobile" => Right(Mobile) case "mobile" => Right(Mobile)
case "fax" => Right(Fax) case "fax" => Right(Fax)
case "email" => Right(Email) case "email" => Right(Email)
case "docspell" => Right(Docspell) case "docspell" => Right(Docspell)
case "website" => Right(Website) case "website" => Right(Website)
case _ => Left(s"Not a state value: $s") case _ => Left(s"Not a state value: $s")
} }
def unsafe(str: String): ContactKind = def unsafe(str: String): ContactKind =
@ -34,7 +34,6 @@ object ContactKind {
def asString(s: ContactKind): String = def asString(s: ContactKind): String =
s.asString.toLowerCase s.asString.toLowerCase
implicit val contactKindEncoder: Encoder[ContactKind] = implicit val contactKindEncoder: Encoder[ContactKind] =
Encoder.encodeString.contramap(_.asString) Encoder.encodeString.contramap(_.asString)

View File

@ -49,6 +49,6 @@ object Duration {
def stopTime[F[_]: Sync]: F[F[Duration]] = def stopTime[F[_]: Sync]: F[F[Duration]] =
for { for {
now <- Timestamp.current[F] now <- Timestamp.current[F]
end = Timestamp.current[F] end = Timestamp.current[F]
} yield end.map(e => Duration.millis(e.toMillis - now.toMillis)) } yield end.map(e => Duration.millis(e.toMillis - now.toMillis))
} }

View File

@ -10,48 +10,41 @@ sealed trait JobState { self: Product =>
object JobState { object JobState {
/** Waiting for being executed. */ /** Waiting for being executed. */
case object Waiting extends JobState { case object Waiting extends JobState {}
}
/** A scheduler has picked up this job and will pass it to the next /** A scheduler has picked up this job and will pass it to the next
* free slot. */ * free slot. */
case object Scheduled extends JobState { case object Scheduled extends JobState {}
}
/** Is currently executing */ /** Is currently executing */
case object Running extends JobState { case object Running extends JobState {}
}
/** Finished with failure and is being retried. */ /** Finished with failure and is being retried. */
case object Stuck extends JobState { case object Stuck extends JobState {}
}
/** Finished finally with a failure */ /** Finished finally with a failure */
case object Failed extends JobState { case object Failed extends JobState {}
}
/** Finished by cancellation. */ /** Finished by cancellation. */
case object Cancelled extends JobState { case object Cancelled extends JobState {}
}
/** Finished with success */ /** Finished with success */
case object Success extends JobState { case object Success extends JobState {}
}
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success) val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
val queued: Set[JobState] = Set(Waiting, Scheduled, Stuck) val queued: Set[JobState] = Set(Waiting, Scheduled, Stuck)
val done: Set[JobState] = Set(Failed, Cancelled, Success) val done: Set[JobState] = Set(Failed, Cancelled, Success)
def parse(str: String): Either[String, JobState] = def parse(str: String): Either[String, JobState] =
str.toLowerCase match { str.toLowerCase match {
case "waiting" => Right(Waiting) case "waiting" => Right(Waiting)
case "scheduled" => Right(Scheduled) case "scheduled" => Right(Scheduled)
case "running" => Right(Running) case "running" => Right(Running)
case "stuck" => Right(Stuck) case "stuck" => Right(Stuck)
case "failed" => Right(Failed) case "failed" => Right(Failed)
case "cancelled" => Right(Cancelled) case "cancelled" => Right(Cancelled)
case "success" => Right(Success) case "success" => Right(Success)
case _ => Left(s"Not a job state: $str") case _ => Left(s"Not a job state: $str")
} }
def unsafe(str: String): JobState = def unsafe(str: String): JobState =
@ -60,7 +53,6 @@ object JobState {
def asString(state: JobState): String = def asString(state: JobState): String =
state.name state.name
implicit val jobStateEncoder: Encoder[JobState] = implicit val jobStateEncoder: Encoder[JobState] =
Encoder.encodeString.contramap(_.name) Encoder.encodeString.contramap(_.name)

View File

@ -51,8 +51,8 @@ case class LenientUri(
def open[F[_]: Sync]: Either[String, Resource[F, HttpURLConnection]] = def open[F[_]: Sync]: Either[String, Resource[F, HttpURLConnection]] =
toJavaUrl.map { url => toJavaUrl.map { url =>
Resource Resource
.make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))( .make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))(conn =>
conn => Sync[F].delay(conn.disconnect()) Sync[F].delay(conn.disconnect())
) )
} }
@ -61,17 +61,16 @@ case class LenientUri(
.emit(Either.catchNonFatal(new URL(asString))) .emit(Either.catchNonFatal(new URL(asString)))
.covary[F] .covary[F]
.rethrow .rethrow
.flatMap( .flatMap(url =>
url => fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true) fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
) )
def host: Option[String] = def host: Option[String] =
authority.map( authority.map(a =>
a => a.indexOf(':') match {
a.indexOf(':') match { case -1 => a
case -1 => a case n => a.substring(0, n)
case n => a.substring(0, n) }
}
) )
def asString: String = { def asString: String = {

View File

@ -8,13 +8,11 @@ import io.circe.generic.semiauto._
case class MetaProposalList private (proposals: List[MetaProposal]) { case class MetaProposalList private (proposals: List[MetaProposal]) {
def isEmpty: Boolean = proposals.isEmpty def isEmpty: Boolean = proposals.isEmpty
def nonEmpty: Boolean = proposals.nonEmpty def nonEmpty: Boolean = proposals.nonEmpty
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean = { def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean =
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)). (mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).reduce(_ && _)
reduce(_ && _)
}
def hasResultsAll: Boolean = def hasResultsAll: Boolean =
proposals.map(_.proposalType).toSet == MetaProposalType.all.toSet proposals.map(_.proposalType).toSet == MetaProposalType.all.toSet
@ -23,7 +21,7 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
proposals.foldLeft(Set.empty[MetaProposalType])(_ + _.proposalType) proposals.foldLeft(Set.empty[MetaProposalType])(_ + _.proposalType)
def fillEmptyFrom(ml: MetaProposalList): MetaProposalList = { def fillEmptyFrom(ml: MetaProposalList): MetaProposalList = {
val list = ml.proposals.foldLeft(proposals){ (mine, mp) => val list = ml.proposals.foldLeft(proposals) { (mine, mp) =>
if (hasResults(mp.proposalType)) mine if (hasResults(mp.proposalType)) mine
else mp :: mine else mp :: mine
} }
@ -48,21 +46,24 @@ object MetaProposalList {
fromSeq1(mt, refs.map(ref => Candidate(ref, Set(label)))) fromSeq1(mt, refs.map(ref => Candidate(ref, Set(label))))
def fromSeq1(mt: MetaProposalType, refs: Seq[Candidate]): MetaProposalList = def fromSeq1(mt: MetaProposalType, refs: Seq[Candidate]): MetaProposalList =
NonEmptyList.fromList(refs.toList). NonEmptyList
map(nl => MetaProposalList.of(MetaProposal(mt, nl))). .fromList(refs.toList)
getOrElse(empty) .map(nl => MetaProposalList.of(MetaProposal(mt, nl)))
.getOrElse(empty)
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList = { def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList =
new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) })) new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) }))
}
def flatten(ml: Seq[MetaProposalList]): MetaProposalList = { def flatten(ml: Seq[MetaProposalList]): MetaProposalList = {
val init: Map[MetaProposalType, MetaProposal] = Map.empty val init: Map[MetaProposalType, MetaProposal] = Map.empty
def updateMap(map: Map[MetaProposalType, MetaProposal], mp: MetaProposal): Map[MetaProposalType, MetaProposal] = def updateMap(
map: Map[MetaProposalType, MetaProposal],
mp: MetaProposal
): Map[MetaProposalType, MetaProposal] =
map.get(mp.proposalType) match { map.get(mp.proposalType) match {
case Some(mp0) => map.updated(mp.proposalType, mp0.addIdRef(mp.values.toList)) case Some(mp0) => map.updated(mp.proposalType, mp0.addIdRef(mp.values.toList))
case None => map.updated(mp.proposalType, mp) case None => map.updated(mp.proposalType, mp)
} }
val merged = ml.foldLeft(init) { (map, el) => val merged = ml.foldLeft(init) { (map, el) =>

View File

@ -10,25 +10,25 @@ sealed trait MetaProposalType { self: Product =>
object MetaProposalType { object MetaProposalType {
case object CorrOrg extends MetaProposalType case object CorrOrg extends MetaProposalType
case object CorrPerson extends MetaProposalType case object CorrPerson extends MetaProposalType
case object ConcPerson extends MetaProposalType case object ConcPerson extends MetaProposalType
case object ConcEquip extends MetaProposalType case object ConcEquip extends MetaProposalType
case object DocDate extends MetaProposalType case object DocDate extends MetaProposalType
case object DueDate extends MetaProposalType case object DueDate extends MetaProposalType
val all: List[MetaProposalType] = val all: List[MetaProposalType] =
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip) List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
def fromString(str: String): Either[String, MetaProposalType] = def fromString(str: String): Either[String, MetaProposalType] =
str.toLowerCase match { str.toLowerCase match {
case "corrorg" => Right(CorrOrg) case "corrorg" => Right(CorrOrg)
case "corrperson" => Right(CorrPerson) case "corrperson" => Right(CorrPerson)
case "concperson" => Right(ConcPerson) case "concperson" => Right(ConcPerson)
case "concequip" => Right(ConcEquip) case "concequip" => Right(ConcEquip)
case "docdate" => Right(DocDate) case "docdate" => Right(DocDate)
case "duedate" => Right(DueDate) case "duedate" => Right(DueDate)
case _ => Left(s"Invalid item-proposal-type: $str") case _ => Left(s"Invalid item-proposal-type: $str")
} }
def unsafe(str: String): MetaProposalType = def unsafe(str: String): MetaProposalType =

View File

@ -11,31 +11,30 @@ sealed trait NerTag { self: Product =>
object NerTag { object NerTag {
case object Organization extends NerTag case object Organization extends NerTag
case object Person extends NerTag case object Person extends NerTag
case object Location extends NerTag case object Location extends NerTag
case object Misc extends NerTag case object Misc extends NerTag
case object Email extends NerTag case object Email extends NerTag
case object Website extends NerTag case object Website extends NerTag
case object Date extends NerTag case object Date extends NerTag
val all: List[NerTag] = List(Organization, Person, Location) val all: List[NerTag] = List(Organization, Person, Location)
def fromString(str: String): Either[String, NerTag] = def fromString(str: String): Either[String, NerTag] =
str.toLowerCase match { str.toLowerCase match {
case "organization" => Right(Organization) case "organization" => Right(Organization)
case "person" => Right(Person) case "person" => Right(Person)
case "location" => Right(Location) case "location" => Right(Location)
case "misc" => Right(Misc) case "misc" => Right(Misc)
case "email" => Right(Email) case "email" => Right(Email)
case "website" => Right(Website) case "website" => Right(Website)
case "date" => Right(Date) case "date" => Right(Date)
case _ => Left(s"Invalid ner tag: $str") case _ => Left(s"Invalid ner tag: $str")
} }
def unsafe(str: String): NerTag = def unsafe(str: String): NerTag =
fromString(str).fold(sys.error, identity) fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[NerTag] = implicit val jsonDecoder: Decoder[NerTag] =
Decoder.decodeString.emap(fromString) Decoder.decodeString.emap(fromString)
implicit val jsonEncoder: Encoder[NerTag] = implicit val jsonEncoder: Encoder[NerTag] =

View File

@ -24,12 +24,14 @@ object Implicits {
ConfigReader[String].emap(reason(Ident.fromString)) ConfigReader[String].emap(reason(Ident.fromString))
implicit val byteVectorReader: ConfigReader[ByteVector] = implicit val byteVectorReader: ConfigReader[ByteVector] =
ConfigReader[String].emap(reason(str => { ConfigReader[String].emap(reason { str =>
if (str.startsWith("hex:")) ByteVector.fromHex(str.drop(4)).toRight("Invalid hex value.") if (str.startsWith("hex:")) ByteVector.fromHex(str.drop(4)).toRight("Invalid hex value.")
else if (str.startsWith("b64:")) ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.") else if (str.startsWith("b64:"))
ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
else ByteVector.encodeUtf8(str).left.map(ex => s"Invalid utf8 string: ${ex.getMessage}") else ByteVector.encodeUtf8(str).left.map(ex => s"Invalid utf8 string: ${ex.getMessage}")
})) })
def reason[A: ClassTag](f: String => Either[String, A]): String => Either[FailureReason, A] = def reason[A: ClassTag](f: String => Either[String, A]): String => Either[FailureReason, A] =
in => f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str)) in =>
f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
} }

View File

@ -2,9 +2,6 @@ package docspell.common
package object syntax { package object syntax {
object all extends EitherSyntax object all extends EitherSyntax with StreamSyntax with StringSyntax with LoggerSyntax
with StreamSyntax
with StringSyntax
with LoggerSyntax
} }

View File

@ -16,7 +16,6 @@ object QueryParam {
implicit val queryStringDecoder: QueryParamDecoder[QueryString] = implicit val queryStringDecoder: QueryParamDecoder[QueryString] =
QueryParamDecoder[String].map(s => QueryString(s.trim.toLowerCase)) QueryParamDecoder[String].map(s => QueryString(s.trim.toLowerCase))
// implicit val booleanDecoder: QueryParamDecoder[Boolean] = // implicit val booleanDecoder: QueryParamDecoder[Boolean] =
// QueryParamDecoder.fromUnsafeCast(qp => Option(qp.value).exists(_.equalsIgnoreCase("true")))( // QueryParamDecoder.fromUnsafeCast(qp => Option(qp.value).exists(_.equalsIgnoreCase("true")))(
// "Boolean" // "Boolean"

View File

@ -139,8 +139,7 @@ object ItemRoutes {
} }
} }
implicit final class OptionString(opt: Option[String]) {
final implicit class OptionString(opt: Option[String]) {
def notEmpty: Option[String] = def notEmpty: Option[String] =
opt.map(_.trim).filter(_.nonEmpty) opt.map(_.trim).filter(_.nonEmpty)
} }

View File

@ -24,13 +24,13 @@ object MailSendRoutes {
HttpRoutes.of { HttpRoutes.of {
case req @ POST -> Root / Ident(name) / Ident(id) => case req @ POST -> Root / Ident(name) / Ident(id) =>
for { for {
in <- req.as[SimpleMail] in <- req.as[SimpleMail]
mail = convertIn(id, in) mail = convertIn(id, in)
res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m)) res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m))
resp <- res.fold( resp <- res.fold(
err => Ok(BasicResult(false, s"Invalid mail data: $err")), err => Ok(BasicResult(false, s"Invalid mail data: $err")),
res => Ok(convertOut(res)) res => Ok(convertOut(res))
) )
} yield resp } yield resp
} }
} }
@ -39,7 +39,7 @@ object MailSendRoutes {
for { for {
rec <- s.recipients.traverse(EmilUtil.readMailAddress) rec <- s.recipients.traverse(EmilUtil.readMailAddress)
fileIds <- s.attachmentIds.traverse(Ident.fromString) fileIds <- s.attachmentIds.traverse(Ident.fromString)
sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds) sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds)
} yield ItemMail(item, s.subject, rec, s.body, sel) } yield ItemMail(item, s.subject, rec, s.body, sel)
def convertOut(res: SendResult): BasicResult = def convertOut(res: SendResult): BasicResult =

View File

@ -29,7 +29,7 @@ object MailSettingsRoutes {
case GET -> Root :? QueryParam.QueryOpt(q) => case GET -> Root :? QueryParam.QueryOpt(q) =>
for { for {
list <- backend.mail.getSettings(user.account, q.map(_.q)) list <- backend.mail.getSettings(user.account, q.map(_.q))
res = list.map(convert) res = list.map(convert)
resp <- Ok(EmailSettingsList(res.toList)) resp <- Ok(EmailSettingsList(res.toList))
} yield resp } yield resp
@ -45,13 +45,13 @@ object MailSettingsRoutes {
ru = makeSettings(in) ru = makeSettings(in)
up <- OptionT.liftF(ru.traverse(r => backend.mail.createSettings(user.account, r))) up <- OptionT.liftF(ru.traverse(r => backend.mail.createSettings(user.account, r)))
resp <- OptionT.liftF( resp <- OptionT.liftF(
Ok( Ok(
up.fold( up.fold(
err => BasicResult(false, err), err => BasicResult(false, err),
ar => Conversions.basicResult(ar, "Mail settings stored.") ar => Conversions.basicResult(ar, "Mail settings stored.")
) )
) )
) )
} yield resp).getOrElseF(NotFound()) } yield resp).getOrElseF(NotFound())
case req @ PUT -> Root / Ident(name) => case req @ PUT -> Root / Ident(name) =>
@ -60,24 +60,24 @@ object MailSettingsRoutes {
ru = makeSettings(in) ru = makeSettings(in)
up <- OptionT.liftF(ru.traverse(r => backend.mail.updateSettings(user.account, name, r))) up <- OptionT.liftF(ru.traverse(r => backend.mail.updateSettings(user.account, name, r)))
resp <- OptionT.liftF( resp <- OptionT.liftF(
Ok( Ok(
up.fold( up.fold(
err => BasicResult(false, err), err => BasicResult(false, err),
n => n =>
if (n > 0) BasicResult(true, "Mail settings stored.") if (n > 0) BasicResult(true, "Mail settings stored.")
else BasicResult(false, "Mail settings could not be saved") else BasicResult(false, "Mail settings could not be saved")
) )
) )
) )
} yield resp).getOrElseF(NotFound()) } yield resp).getOrElseF(NotFound())
case DELETE -> Root / Ident(name) => case DELETE -> Root / Ident(name) =>
for { for {
n <- backend.mail.deleteSettings(user.account, name) n <- backend.mail.deleteSettings(user.account, name)
resp <- Ok( resp <- Ok(
if (n > 0) BasicResult(true, "Mail settings removed") if (n > 0) BasicResult(true, "Mail settings removed")
else BasicResult(false, "Mail settings could not be removed") else BasicResult(false, "Mail settings could not be removed")
) )
} yield resp } yield resp
} }

View File

@ -23,7 +23,7 @@ object SentMailRoutes {
HttpRoutes.of { HttpRoutes.of {
case GET -> Root / "item" / Ident(id) => case GET -> Root / "item" / Ident(id) =>
for { for {
all <- backend.mail.getSentMailsForItem(user.account, id) all <- backend.mail.getSentMailsForItem(user.account, id)
resp <- Ok(SentMails(all.map(convert).toList)) resp <- Ok(SentMails(all.map(convert).toList))
} yield resp } yield resp
@ -35,7 +35,7 @@ object SentMailRoutes {
case DELETE -> Root / "mail" / Ident(mailId) => case DELETE -> Root / "mail" / Ident(mailId) =>
for { for {
n <- backend.mail.deleteSentMail(user.account, mailId) n <- backend.mail.deleteSentMail(user.account, mailId)
resp <- Ok(BasicResult(n > 0, s"Mails deleted: $n")) resp <- Ok(BasicResult(n > 0, s"Mails deleted: $n"))
} yield resp } yield resp
} }

View File

@ -37,7 +37,7 @@ object TemplateRoutes {
new InnerRoutes[F] { new InnerRoutes[F] {
def doc = def doc =
HttpRoutes.of[F] { HttpRoutes.of[F] {
case GET -> Root => case GET -> Root =>
for { for {
templ <- docTemplate templ <- docTemplate
resp <- Ok(DocData().render(templ), `Content-Type`(`text/html`)) resp <- Ok(DocData().render(templ), `Content-Type`(`text/html`))

View File

@ -194,8 +194,9 @@ object QItem {
IC.cid.prefix("i").is(q.collective), IC.cid.prefix("i").is(q.collective),
IC.state.prefix("i").isOneOf(q.states), IC.state.prefix("i").isOneOf(q.states),
IC.incoming.prefix("i").isOrDiscard(q.direction), IC.incoming.prefix("i").isOrDiscard(q.direction),
name.map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n))). name
getOrElse(Fragment.empty), .map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n)))
.getOrElse(Fragment.empty),
RPerson.Columns.pid.prefix("p0").isOrDiscard(q.corrPerson), RPerson.Columns.pid.prefix("p0").isOrDiscard(q.corrPerson),
ROrganization.Columns.oid.prefix("o0").isOrDiscard(q.corrOrg), ROrganization.Columns.oid.prefix("o0").isOrDiscard(q.corrOrg),
RPerson.Columns.pid.prefix("p1").isOrDiscard(q.concPerson), RPerson.Columns.pid.prefix("p1").isOrDiscard(q.concPerson),

View File

@ -8,12 +8,12 @@ object RFileMeta {
val table = fr"filemeta" val table = fr"filemeta"
object Columns { object Columns {
val id = Column("id") val id = Column("id")
val timestamp = Column("timestamp") val timestamp = Column("timestamp")
val mimetype = Column("mimetype") val mimetype = Column("mimetype")
val length = Column("length") val length = Column("length")
val checksum = Column("checksum") val checksum = Column("checksum")
val chunks = Column("chunks") val chunks = Column("chunks")
val chunksize = Column("chunksize") val chunksize = Column("chunksize")
val all = List(id, timestamp, mimetype, length, checksum, chunks, chunksize) val all = List(id, timestamp, mimetype, length, checksum, chunks, chunksize)

View File

@ -52,8 +52,16 @@ object RSentMail {
for { for {
user <- OptionT(RUser.findByAccount(accId)) user <- OptionT(RUser.findByAccount(accId))
sm <- OptionT.liftF( sm <- OptionT.liftF(
RSentMail[ConnectionIO](user.uid, messageId, sender, connName, subject, recipients, body) RSentMail[ConnectionIO](
) user.uid,
messageId,
sender,
connName,
subject,
recipients,
body
)
)
si <- OptionT.liftF(RSentMailItem[ConnectionIO](itemId, sm.id, Some(sm.created))) si <- OptionT.liftF(RSentMailItem[ConnectionIO](itemId, sm.id, Some(sm.created)))
} yield (sm, si) } yield (sm, si)

View File

@ -9,43 +9,47 @@ object Contact {
private[this] val protocols = Set("ftp", "http", "https") private[this] val protocols = Set("ftp", "http", "https")
def annotate(text: String): Vector[NerLabel] = def annotate(text: String): Vector[NerLabel] =
TextSplitter.splitToken[Nothing](text, " \t\r\n".toSet). TextSplitter
map({ token => .splitToken[Nothing](text, " \t\r\n".toSet)
if (isEmailAddress(token.value)) NerLabel(token.value, NerTag.Email, token.begin, token.end).some .map({ token =>
else if (isWebsite(token.value)) NerLabel(token.value, NerTag.Website, token.begin, token.end).some if (isEmailAddress(token.value))
NerLabel(token.value, NerTag.Email, token.begin, token.end).some
else if (isWebsite(token.value))
NerLabel(token.value, NerTag.Website, token.begin, token.end).some
else None else None
}). })
flatMap(_.map(Stream.emit).getOrElse(Stream.empty)). .flatMap(_.map(Stream.emit).getOrElse(Stream.empty))
toVector .toVector
def isEmailAddress(str: String): Boolean = { def isEmailAddress(str: String): Boolean = {
val atIdx = str.indexOf('@') val atIdx = str.indexOf('@')
if (atIdx <= 0 || str.indexOf('@', atIdx + 1) > 0) false if (atIdx <= 0 || str.indexOf('@', atIdx + 1) > 0) false
else { else {
val name = str.substring(0, atIdx) val name = str.substring(0, atIdx)
val dom = str.substring(atIdx + 1) val dom = str.substring(atIdx + 1)
Domain.isDomain(dom) && name.forall(c => !c.isWhitespace) Domain.isDomain(dom) && name.forall(c => !c.isWhitespace)
} }
} }
def isWebsite(str: String): Boolean = def isWebsite(str: String): Boolean =
LenientUri.parse(str). LenientUri
toOption. .parse(str)
map(uri => protocols.contains(uri.scheme.head)). .toOption
getOrElse(Domain.isDomain(str)) .map(uri => protocols.contains(uri.scheme.head))
.getOrElse(Domain.isDomain(str))
def isDocspellOpenUpload(str: String): Boolean = { def isDocspellOpenUpload(str: String): Boolean = {
def isUploadPath(p: LenientUri.Path): Boolean = def isUploadPath(p: LenientUri.Path): Boolean =
p match { p match {
case LenientUri.RootPath => false case LenientUri.RootPath => false
case LenientUri.EmptyPath => false case LenientUri.EmptyPath => false
case LenientUri.NonEmptyPath(segs) => case LenientUri.NonEmptyPath(segs) =>
Ident.fromString(segs.last).isRight && Ident.fromString(segs.last).isRight &&
segs.init.takeRight(3) == List("open", "upload", "item") segs.init.takeRight(3) == List("open", "upload", "item")
} }
LenientUri.parse(str). LenientUri
toOption. .parse(str)
exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path)) .toOption
.exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path))
} }
} }

View File

@ -11,7 +11,7 @@ private[text] object Tld {
/** /**
* Some selected TLDs. * Some selected TLDs.
*/ */
private [this] val known = List( private[this] val known = List(
".com", ".com",
".org", ".org",
".net", ".net",

View File

@ -10,16 +10,22 @@ import scala.util.Try
object DateFind { object DateFind {
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] = { def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
TextSplitter.splitToken(text, " \t.,\n\r/".toSet). TextSplitter
sliding(3). .splitToken(text, " \t.,\n\r/".toSet)
filter(_.length == 3). .sliding(3)
map(q => SimpleDate.fromParts(q.toList, lang). .filter(_.length == 3)
map(sd => NerDateLabel(sd.toLocalDate, .map(q =>
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)))). SimpleDate
collect({ case Some(d) => d }) .fromParts(q.toList, lang)
} .map(sd =>
NerDateLabel(
sd.toLocalDate,
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)
)
)
)
.collect({ case Some(d) => d })
private case class SimpleDate(year: Int, month: Int, day: Int) { private case class SimpleDate(year: Int, month: Int, day: Int) {
def toLocalDate: LocalDate = def toLocalDate: LocalDate =
@ -27,13 +33,13 @@ object DateFind {
} }
private object SimpleDate { private object SimpleDate {
val p0 = readYear >> readMonth >> readDay map { val p0 = (readYear >> readMonth >> readDay).map {
case ((y, m), d) => SimpleDate(y, m, d) case ((y, m), d) => SimpleDate(y, m, d)
} }
val p1 = readDay >> readMonth >> readYear map { val p1 = (readDay >> readMonth >> readYear).map {
case ((d, m), y) => SimpleDate(y, m, d) case ((d, m), y) => SimpleDate(y, m, d)
} }
val p2 = readMonth >> readDay >> readYear map { val p2 = (readMonth >> readDay >> readYear).map {
case ((m, d), y) => SimpleDate(y, m, d) case ((m, d), y) => SimpleDate(y, m, d)
} }
@ -46,14 +52,14 @@ object DateFind {
p.read(parts).toOption p.read(parts).toOption
} }
def readYear: Reader[Int] =
def readYear: Reader[Int] = { Reader.readFirst(w =>
Reader.readFirst(w => w.value.length match { w.value.length match {
case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption
case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption
case _ => None case _ => None
}) }
} )
def readMonth: Reader[Int] = def readMonth: Reader[Int] =
Reader.readFirst(w => Some(months.indexWhere(_.contains(w.value))).filter(_ > 0).map(_ + 1)) Reader.readFirst(w => Some(months.indexWhere(_.contains(w.value))).filter(_ > 0).map(_ + 1))
@ -69,10 +75,12 @@ object DateFind {
Reader(read.andThen(_.map(f))) Reader(read.andThen(_.map(f)))
def or(other: Reader[A]): Reader[A] = def or(other: Reader[A]): Reader[A] =
Reader(words => read(words) match { Reader(words =>
case Result.Failure => other.read(words) read(words) match {
case s @ Result.Success(_, _) => s case Result.Failure => other.read(words)
}) case s @ Result.Success(_, _) => s
}
)
} }
object Reader { object Reader {
@ -81,12 +89,11 @@ object DateFind {
def readFirst[A](f: Word => Option[A]): Reader[A] = def readFirst[A](f: Word => Option[A]): Reader[A] =
Reader({ Reader({
case Nil => Result.Failure case Nil => Result.Failure
case a :: as => f(a).map(value => Result.Success(value, as)).getOrElse(Result.Failure) case a :: as => f(a).map(value => Result.Success(value, as)).getOrElse(Result.Failure)
}) })
} }
sealed trait Result[+A] { sealed trait Result[+A] {
def toOption: Option[A] def toOption: Option[A]
def map[B](f: A => B): Result[B] def map[B](f: A => B): Result[B]
@ -95,14 +102,14 @@ object DateFind {
object Result { object Result {
final case class Success[A](value: A, rest: List[Word]) extends Result[A] { final case class Success[A](value: A, rest: List[Word]) extends Result[A] {
val toOption = Some(value) val toOption = Some(value)
def map[B](f: A => B): Result[B] = Success(f(value), rest) def map[B](f: A => B): Result[B] = Success(f(value), rest)
def next[B](r: Reader[B]): Result[(A, B)] = def next[B](r: Reader[B]): Result[(A, B)] =
r.read(rest).map(b => (value, b)) r.read(rest).map(b => (value, b))
} }
final case object Failure extends Result[Nothing] { final case object Failure extends Result[Nothing] {
val toOption = None val toOption = None
def map[B](f: Nothing => B): Result[B] = this def map[B](f: Nothing => B): Result[B] = this
def next[B](r: Reader[B]): Result[(Nothing, B)] = this def next[B](r: Reader[B]): Result[(Nothing, B)] = this
} }
} }

View File

@ -14,23 +14,28 @@ import java.net.URL
import scala.util.Using import scala.util.Using
object StanfordNerClassifier { object StanfordNerClassifier {
private [this] val logger = getLogger private[this] val logger = getLogger
lazy val germanNerClassifier = makeClassifier(Language.German) lazy val germanNerClassifier = makeClassifier(Language.German)
lazy val englishNerClassifier = makeClassifier(Language.English) lazy val englishNerClassifier = makeClassifier(Language.English)
def nerAnnotate(lang: Language)(text: String): Vector[NerLabel] = { def nerAnnotate(lang: Language)(text: String): Vector[NerLabel] = {
val nerClassifier = lang match { val nerClassifier = lang match {
case Language.English => englishNerClassifier case Language.English => englishNerClassifier
case Language.German => germanNerClassifier case Language.German => germanNerClassifier
} }
nerClassifier.classify(text).asScala.flatMap(a => a.asScala). nerClassifier
collect(Function.unlift(label => { .classify(text)
.asScala
.flatMap(a => a.asScala)
.collect(Function.unlift { label =>
val tag = label.get(classOf[CoreAnnotations.AnswerAnnotation]) val tag = label.get(classOf[CoreAnnotations.AnswerAnnotation])
NerTag.fromString(Option(tag).getOrElse("")).toOption. NerTag
map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition())) .fromString(Option(tag).getOrElse(""))
})). .toOption
toVector .map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition()))
})
.toVector
} }
private def makeClassifier(lang: Language): AbstractSequenceClassifier[CoreLabel] = { private def makeClassifier(lang: Language): AbstractSequenceClassifier[CoreLabel] = {
@ -48,7 +53,9 @@ object StanfordNerClassifier {
check(lang match { check(lang match {
case Language.German => case Language.German =>
getClass.getResource("/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz") getClass.getResource(
"/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz"
)
case Language.English => case Language.English =>
getClass.getResource("/edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz") getClass.getResource("/edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz")
}) })

View File

@ -5,11 +5,11 @@ import java.nio.file.{Path, Paths}
import docspell.common._ import docspell.common._
case class Config( case class Config(
allowedContentTypes: Set[MimeType] allowedContentTypes: Set[MimeType],
, ghostscript: Config.Ghostscript ghostscript: Config.Ghostscript,
, pageRange: Config.PageRange pageRange: Config.PageRange,
, unpaper: Config.Unpaper unpaper: Config.Unpaper,
, tesseract: Config.Tesseract tesseract: Config.Tesseract
) { ) {
def isAllowed(mt: MimeType): Boolean = def isAllowed(mt: MimeType): Boolean =
@ -22,7 +22,7 @@ object Config {
case class Command(program: String, args: Seq[String], timeout: Duration) { case class Command(program: String, args: Seq[String], timeout: Duration) {
def mapArgs(f: String => String): Command = def mapArgs(f: String => String): Command =
Command(program, args map f, timeout) Command(program, args.map(f), timeout)
def toCmd: List[String] = def toCmd: List[String] =
program :: args.toList program :: args.toList
@ -44,23 +44,23 @@ object Config {
), ),
pageRange = PageRange(10), pageRange = PageRange(10),
ghostscript = Ghostscript( ghostscript = Ghostscript(
Command("gs", Seq("-dNOPAUSE" Command(
, "-dBATCH" "gs",
, "-dSAFER" Seq(
, "-sDEVICE=tiffscaled8" "-dNOPAUSE",
, "-sOutputFile={{outfile}}" "-dBATCH",
, "{{infile}}"), "-dSAFER",
Duration.seconds(30)), "-sDEVICE=tiffscaled8",
Paths.get(System.getProperty("java.io.tmpdir")). "-sOutputFile={{outfile}}",
resolve("docspell-extraction")), "{{infile}}"
unpaper = Unpaper(Command("unpaper" ),
, Seq("{{infile}}", "{{outfile}}") Duration.seconds(30)
, Duration.seconds(30))), ),
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
),
unpaper = Unpaper(Command("unpaper", Seq("{{infile}}", "{{outfile}}"), Duration.seconds(30))),
tesseract = Tesseract( tesseract = Tesseract(
Command("tesseract", Seq("{{file}}" Command("tesseract", Seq("{{file}}", "stdout", "-l", "{{lang}}"), Duration.minutes(1))
, "stdout" )
, "-l"
, "{{lang}}"),
Duration.minutes(1)))
) )
} }

View File

@ -11,71 +11,106 @@ object Ocr {
/** Extract the text of all pages in the given pdf file. /** Extract the text of all pages in the given pdf file.
*/ */
def extractPdf[F[_]: Sync: ContextShift](pdf: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] = def extractPdf[F[_]: Sync: ContextShift](
pdf: Stream[F, Byte],
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd => File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
runGhostscript(pdf, config, wd, blocker). runGhostscript(pdf, config, wd, blocker)
flatMap({ tmpImg => .flatMap({ tmpImg =>
runTesseractFile(tmpImg, blocker, lang, config) runTesseractFile(tmpImg, blocker, lang, config)
}). })
fold1(_ + "\n\n\n" + _) .fold1(_ + "\n\n\n" + _)
} }
/** Extract the text from the given image file /** Extract the text from the given image file
*/ */
def extractImage[F[_]: Sync: ContextShift](img: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] = def extractImage[F[_]: Sync: ContextShift](
img: Stream[F, Byte],
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
runTesseractStdin(img, blocker, lang, config) runTesseractStdin(img, blocker, lang, config)
def extractPdFFile[F[_]: Sync: ContextShift](
def extractPdFFile[F[_]: Sync: ContextShift](pdf: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] = pdf: Path,
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd => File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker). runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker)
flatMap({ tif => .flatMap({ tif =>
runTesseractFile(tif, blocker, lang, config) runTesseractFile(tif, blocker, lang, config)
}). })
fold1(_ + "\n\n\n" + _) .fold1(_ + "\n\n\n" + _)
} }
def extractImageFile[F[_]: Sync: ContextShift](img: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] = def extractImageFile[F[_]: Sync: ContextShift](
img: Path,
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
runTesseractFile(img, blocker, lang, config) runTesseractFile(img, blocker, lang, config)
/** Run ghostscript to extract all pdf pages into tiff files. The /** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[text] def runGhostscript[F[_]: Sync: ContextShift]( private[text] def runGhostscript[F[_]: Sync: ContextShift](
pdf: Stream[F, Byte] pdf: Stream[F, Byte],
, cfg: Config cfg: Config,
, wd: Path wd: Path,
, blocker: Blocker): Stream[F, Path] = { blocker: Blocker
): Stream[F, Path] = {
val xargs = val xargs =
if (cfg.pageRange.begin > 0) s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args if (cfg.pageRange.begin > 0)
s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args
else cfg.ghostscript.command.args else cfg.ghostscript.command.args
val cmd = cfg.ghostscript.command.copy(args = xargs).mapArgs(replace(Map( val cmd = cfg.ghostscript.command
"{{infile}}" -> "-", .copy(args = xargs)
"{{outfile}}" -> "%d.tif" .mapArgs(
))) replace(
SystemCommand.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf). Map(
evalMap({ _ => "{{infile}}" -> "-",
"{{outfile}}" -> "%d.tif"
)
)
)
SystemCommand
.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf)
.evalMap({ _ =>
File.listFiles(pathEndsWith(".tif"), wd) File.listFiles(pathEndsWith(".tif"), wd)
}). })
flatMap(fs => Stream.emits(fs)) .flatMap(fs => Stream.emits(fs))
} }
/** Run ghostscript to extract all pdf pages into tiff files. The /** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[text] def runGhostscriptFile[F[_]: Sync: ContextShift]( private[text] def runGhostscriptFile[F[_]: Sync: ContextShift](
pdf: Path pdf: Path,
, ghostscript: Config.Command ghostscript: Config.Command,
, wd: Path, blocker: Blocker): Stream[F, Path] = { wd: Path,
val cmd = ghostscript.mapArgs(replace(Map( blocker: Blocker
"{{infile}}" -> pdf.toAbsolutePath.toString, ): Stream[F, Path] = {
"{{outfile}}" -> "%d.tif" val cmd = ghostscript.mapArgs(
))) replace(
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)). Map(
evalMap({ _ => "{{infile}}" -> pdf.toAbsolutePath.toString,
"{{outfile}}" -> "%d.tif"
)
)
)
SystemCommand
.execSuccess[F](cmd, blocker, wd = Some(wd))
.evalMap({ _ =>
File.listFiles(pathEndsWith(".tif"), wd) File.listFiles(pathEndsWith(".tif"), wd)
}). })
flatMap(fs => Stream.emits(fs)) .flatMap(fs => Stream.emits(fs))
} }
private def pathEndsWith(ext: String): Path => Boolean = private def pathEndsWith(ext: String): Path => Boolean =
@ -84,65 +119,72 @@ object Ocr {
/** Run unpaper to optimize the image for ocr. The /** Run unpaper to optimize the image for ocr. The
* files are stored to a temporary location on disk and returned. * files are stored to a temporary location on disk and returned.
*/ */
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](img: Path private[text] def runUnpaperFile[F[_]: Sync: ContextShift](
, unpaper: Config.Command img: Path,
, wd: Path, blocker: Blocker): Stream[F, Path] = { unpaper: Config.Command,
val targetFile = img.resolveSibling("u-"+ img.getFileName.toString).toAbsolutePath wd: Path,
val cmd = unpaper.mapArgs(replace(Map( blocker: Blocker
"{{infile}}" -> img.toAbsolutePath.toString, ): Stream[F, Path] = {
"{{outfile}}" -> targetFile.toString val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
))) val cmd = unpaper.mapArgs(
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)). replace(
map(_ => targetFile). Map(
handleErrorWith(th => { "{{infile}}" -> img.toAbsolutePath.toString,
logger.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.") "{{outfile}}" -> targetFile.toString
)
)
)
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).map(_ => targetFile).handleErrorWith {
th =>
logger
.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
Stream.emit(img) Stream.emit(img)
}) }
} }
/** Run tesseract on the given image file and return the extracted /** Run tesseract on the given image file and return the extracted
* text. * text.
*/ */
private[text] def runTesseractFile[F[_]: Sync: ContextShift]( private[text] def runTesseractFile[F[_]: Sync: ContextShift](
img: Path img: Path,
, blocker: Blocker blocker: Blocker,
, lang: String lang: String,
, config: Config): Stream[F, String] = { config: Config
): Stream[F, String] =
// tesseract cannot cope with absolute filenames // tesseract cannot cope with absolute filenames
// so use the parent as working dir // so use the parent as working dir
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker). runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).flatMap { uimg =>
flatMap(uimg => { val cmd = config.tesseract.command.mapArgs(
val cmd = config.tesseract.command.mapArgs(replace(Map( replace(Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)))
"{{file}}" -> uimg.getFileName.toString )
, "{{lang}}" -> fixLanguage(lang)))) SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout) }
})
}
/** Run tesseract on the given image file and return the extracted /** Run tesseract on the given image file and return the extracted
* text. * text.
*/ */
private[text] def runTesseractStdin[F[_]: Sync: ContextShift]( private[text] def runTesseractStdin[F[_]: Sync: ContextShift](
img: Stream[F, Byte] img: Stream[F, Byte],
, blocker: Blocker blocker: Blocker,
, lang: String lang: String,
, config: Config): Stream[F, String] = { config: Config
val cmd = config.tesseract.command.mapArgs(replace(Map( ): Stream[F, String] = {
"{{file}}" -> "stdin" val cmd = config.tesseract.command
, "{{lang}}" -> fixLanguage(lang)))) .mapArgs(replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang))))
SystemCommand.execSuccess(cmd, blocker, stdin = img).map(_.stdout) SystemCommand.execSuccess(cmd, blocker, stdin = img).map(_.stdout)
} }
private def replace(repl: Map[String, String]): String => String = private def replace(repl: Map[String, String]): String => String =
s => repl.foldLeft(s) { case (res, (k, v)) => s =>
res.replace(k, v) repl.foldLeft(s) {
} case (res, (k, v)) =>
res.replace(k, v)
}
private def fixLanguage(lang: String): String = private def fixLanguage(lang: String): String =
lang match { lang match {
case "de" => "deu" case "de" => "deu"
case "en" => "eng" case "en" => "eng"
case l => l case l => l
} }
} }

View File

@ -16,57 +16,87 @@ object SystemCommand {
final case class Result(rc: Int, stdout: String, stderr: String) final case class Result(rc: Int, stdout: String, stderr: String)
def exec[F[_]: Sync: ContextShift]( cmd: Config.Command def exec[F[_]: Sync: ContextShift](
, blocker: Blocker cmd: Config.Command,
, wd: Option[Path] = None blocker: Blocker,
, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] = wd: Option[Path] = None,
startProcess(cmd, wd){ proc => stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
startProcess(cmd, wd) { proc =>
Stream.eval { Stream.eval {
for { for {
_ <- writeToProcess(stdin, proc, blocker) _ <- writeToProcess(stdin, proc, blocker)
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS)) term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}") _ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
else logger.fwarn(s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!") else
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(()) logger.fwarn(
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("") s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!"
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("") )
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("")
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("")
} yield Result(proc.exitValue, out, err) } yield Result(proc.exitValue, out, err)
} }
} }
def execSuccess[F[_]: Sync: ContextShift](cmd: Config.Command, blocker: Blocker, wd: Option[Path] = None, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] = def execSuccess[F[_]: Sync: ContextShift](
cmd: Config.Command,
blocker: Blocker,
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
exec(cmd, blocker, wd, stdin).flatMap { r => exec(cmd, blocker, wd, stdin).flatMap { r =>
if (r.rc != 0) Stream.raiseError[F](new Exception(s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}")) if (r.rc != 0)
Stream.raiseError[F](
new Exception(
s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}"
)
)
else Stream.emit(r) else Stream.emit(r)
} }
private def startProcess[F[_]: Sync,A](cmd: Config.Command, wd: Option[Path])(f: Process => Stream[F,A]): Stream[F, A] = { private def startProcess[F[_]: Sync, A](cmd: Config.Command, wd: Option[Path])(
f: Process => Stream[F, A]
): Stream[F, A] = {
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}") val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
val proc = log *> Sync[F].delay { val proc = log *> Sync[F].delay {
val pb = new ProcessBuilder(cmd.toCmd.asJava) val pb = new ProcessBuilder(cmd.toCmd.asJava)
wd.map(_.toFile).foreach(pb.directory) wd.map(_.toFile).foreach(pb.directory)
pb.start() pb.start()
} }
Stream.bracket(proc)(p => logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ => Stream
p.destroy() .bracket(proc)(p =>
}).flatMap(f) logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
p.destroy()
}
)
.flatMap(f)
} }
private def inputStreamToString[F[_]: Sync: ContextShift](in: InputStream, blocker: Blocker): F[String] = private def inputStreamToString[F[_]: Sync: ContextShift](
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false). in: InputStream,
through(text.utf8Decode). blocker: Blocker
chunks. ): F[String] =
map(_.toVector.mkString). io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
fold1(_ + _). .through(text.utf8Decode)
compile.last. .chunks
map(_.getOrElse("")) .map(_.toVector.mkString)
.fold1(_ + _)
.compile
.last
.map(_.getOrElse(""))
private def writeToProcess[F[_]: Sync: ContextShift](data: Stream[F, Byte], proc: Process, blocker: Blocker): F[Unit] = private def writeToProcess[F[_]: Sync: ContextShift](
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)). data: Stream[F, Byte],
compile.drain proc: Process,
blocker: Blocker
): F[Unit] =
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).compile.drain
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config.Command): F[Unit] = private def timeoutError[F[_]: Sync](proc: Process, cmd: Config.Command): F[Unit] =
Sync[F].delay(proc.destroyForcibly()).attempt *> { Sync[F].delay(proc.destroyForcibly()).attempt *> {
Sync[F].raiseError(new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})")) Sync[F].raiseError(
new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})")
)
} }
} }

View File

@ -12,18 +12,17 @@ object TikaMimetype {
private val tika = new TikaConfig().getDetector private val tika = new TikaConfig().getDetector
private def convert(mt: MediaType): MimeType = private def convert(mt: MediaType): MimeType =
Option(mt).map(_.toString). Option(mt)
map(MimeType.parse). .map(_.toString)
flatMap(_.toOption). .map(MimeType.parse)
map(normalize). .flatMap(_.toOption)
getOrElse(MimeType.octetStream) .map(normalize)
.getOrElse(MimeType.octetStream)
private def makeMetadata(hint: MimeTypeHint): Metadata = { private def makeMetadata(hint: MimeTypeHint): Metadata = {
val md = new Metadata val md = new Metadata
hint.filename. hint.filename.foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _)) hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
hint.advertised.
foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
md md
} }
@ -33,13 +32,10 @@ object TikaMimetype {
case _ => in case _ => in
} }
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType = { private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType =
convert(tika.detect(new java.io.ByteArrayInputStream(bv), makeMetadata(hint))) convert(tika.detect(new java.io.ByteArrayInputStream(bv), makeMetadata(hint)))
}
def detect[F[_]: Sync](data: Stream[F, Byte]): F[MimeType] = def detect[F[_]: Sync](data: Stream[F, Byte]): F[MimeType] =
data.take(1024). data.take(1024).compile.toVector.map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
compile.toVector.
map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
} }