Apply scalafmt to all files

This commit is contained in:
Eike Kettner 2020-02-09 01:54:11 +01:00
parent 6a9ec42a03
commit 5c37efeaba
32 changed files with 442 additions and 362 deletions

View File

@ -32,8 +32,8 @@ object BackendApp {
def create[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
store: Store[F],
httpClientEc: ExecutionContext,
blocker: Blocker
httpClientEc: ExecutionContext,
blocker: Blocker
): Resource[F, BackendApp[F]] =
for {
queue <- JobQueue(store)

View File

@ -176,7 +176,7 @@ object OItem {
def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] =
store.transact((for {
coll <- OptionT(RSource.findCollective(sourceId))
coll <- OptionT(RSource.findCollective(sourceId))
items <- OptionT.liftF(QItem.findByChecksum(checksum, coll))
} yield items).getOrElse(Vector.empty))

View File

@ -113,10 +113,10 @@ object OMail {
def createSettings(accId: AccountId, s: SmtpSettings): F[AddResult] =
(for {
ru <- OptionT(store.transact(s.toRecord(accId).value))
ru <- OptionT(store.transact(s.toRecord(accId).value))
ins = RUserEmail.insert(ru)
exists = RUserEmail.exists(ru.uid, ru.name)
res <- OptionT.liftF(store.add(ins, exists))
res <- OptionT.liftF(store.add(ins, exists))
} yield res).getOrElse(AddResult.Failure(new Exception("User not found")))
def updateSettings(accId: AccountId, name: Ident, data: SmtpSettings): F[Int] = {
@ -143,8 +143,10 @@ object OMail {
for {
_ <- OptionT.liftF(store.transact(RItem.existsById(m.item))).filter(identity)
ras <- OptionT.liftF(
store.transact(RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective))
)
store.transact(
RAttachment.findByItemAndCollectiveWithMeta(m.item, accId.collective)
)
)
} yield {
val addAttach = m.attach.filter(ras).map { a =>
Attach[F](Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all)))
@ -169,15 +171,15 @@ object OMail {
def storeMail(msgId: String, cfg: RUserEmail): F[Either[SendResult, Ident]] = {
val save = for {
data <- RSentMail.forItem(
m.item,
accId,
msgId,
cfg.mailFrom,
name,
m.subject,
m.recipients,
m.body
)
m.item,
accId,
msgId,
cfg.mailFrom,
name,
m.subject,
m.recipients,
m.body
)
_ <- OptionT.liftF(RSentMail.insert(data._1))
_ <- OptionT.liftF(RSentMailItem.insert(data._2))
} yield data._1.id
@ -195,7 +197,7 @@ object OMail {
mail <- createMail(mailCfg)
mid <- OptionT.liftF(sendMail(mailCfg.toMailConfig, mail))
res <- mid.traverse(id => OptionT.liftF(storeMail(id, mailCfg)))
conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id)))
conv = res.fold(identity, _.fold(identity, id => SendResult.Success(id)))
} yield conv).getOrElse(SendResult.NotFound)
}

View File

@ -19,9 +19,9 @@ object AccountId {
case n if n > 0 && input.length > 2 =>
val coll = input.substring(0, n)
val user = input.substring(n + 1)
Ident.fromString(coll).
flatMap(collId => Ident.fromString(user).
map(userId => AccountId(collId, userId)))
Ident
.fromString(coll)
.flatMap(collId => Ident.fromString(user).map(userId => AccountId(collId, userId)))
case _ =>
invalid
}

View File

@ -12,5 +12,4 @@ object BaseJsonCodecs {
implicit val decodeInstantEpoch: Decoder[Instant] =
Decoder.decodeLong.map(Instant.ofEpochMilli)
}

View File

@ -21,32 +21,29 @@ object CollectiveState {
* action. */
case object Blocked extends CollectiveState
def fromString(s: String): Either[String, CollectiveState] =
s.toLowerCase match {
case "active" => Right(Active)
case "active" => Right(Active)
case "readonly" => Right(ReadOnly)
case "closed" => Right(Closed)
case "blocked" => Right(Blocked)
case _ => Left(s"Unknown state: $s")
case "closed" => Right(Closed)
case "blocked" => Right(Blocked)
case _ => Left(s"Unknown state: $s")
}
def unsafe(str: String): CollectiveState =
fromString(str).fold(sys.error, identity)
def asString(state: CollectiveState): String = state match {
case Active => "active"
case Blocked => "blocked"
case Closed => "closed"
case Active => "active"
case Blocked => "blocked"
case Closed => "closed"
case ReadOnly => "readonly"
}
implicit val collectiveStateEncoder: Encoder[CollectiveState] =
Encoder.encodeString.contramap(CollectiveState.asString)
implicit val collectiveStateDecoder: Decoder[CollectiveState] =
Decoder.decodeString.emap(CollectiveState.fromString)
}
}

View File

@ -10,22 +10,22 @@ sealed trait ContactKind { self: Product =>
object ContactKind {
val all = List()
case object Phone extends ContactKind
case object Mobile extends ContactKind
case object Fax extends ContactKind
case object Email extends ContactKind
case object Phone extends ContactKind
case object Mobile extends ContactKind
case object Fax extends ContactKind
case object Email extends ContactKind
case object Docspell extends ContactKind
case object Website extends ContactKind
case object Website extends ContactKind
def fromString(s: String): Either[String, ContactKind] =
s.toLowerCase match {
case "phone" => Right(Phone)
case "mobile" => Right(Mobile)
case "fax" => Right(Fax)
case "email" => Right(Email)
case "phone" => Right(Phone)
case "mobile" => Right(Mobile)
case "fax" => Right(Fax)
case "email" => Right(Email)
case "docspell" => Right(Docspell)
case "website" => Right(Website)
case _ => Left(s"Not a state value: $s")
case "website" => Right(Website)
case _ => Left(s"Not a state value: $s")
}
def unsafe(str: String): ContactKind =
@ -34,7 +34,6 @@ object ContactKind {
def asString(s: ContactKind): String =
s.asString.toLowerCase
implicit val contactKindEncoder: Encoder[ContactKind] =
Encoder.encodeString.contramap(_.asString)

View File

@ -49,6 +49,6 @@ object Duration {
def stopTime[F[_]: Sync]: F[F[Duration]] =
for {
now <- Timestamp.current[F]
end = Timestamp.current[F]
end = Timestamp.current[F]
} yield end.map(e => Duration.millis(e.toMillis - now.toMillis))
}

View File

@ -10,48 +10,41 @@ sealed trait JobState { self: Product =>
object JobState {
/** Waiting for being executed. */
case object Waiting extends JobState {
}
case object Waiting extends JobState {}
/** A scheduler has picked up this job and will pass it to the next
* free slot. */
case object Scheduled extends JobState {
}
case object Scheduled extends JobState {}
/** Is currently executing */
case object Running extends JobState {
}
case object Running extends JobState {}
/** Finished with failure and is being retried. */
case object Stuck extends JobState {
}
case object Stuck extends JobState {}
/** Finished finally with a failure */
case object Failed extends JobState {
}
case object Failed extends JobState {}
/** Finished by cancellation. */
case object Cancelled extends JobState {
}
case object Cancelled extends JobState {}
/** Finished with success */
case object Success extends JobState {
}
case object Success extends JobState {}
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
val all: Set[JobState] = Set(Waiting, Scheduled, Running, Stuck, Failed, Cancelled, Success)
val queued: Set[JobState] = Set(Waiting, Scheduled, Stuck)
val done: Set[JobState] = Set(Failed, Cancelled, Success)
val done: Set[JobState] = Set(Failed, Cancelled, Success)
def parse(str: String): Either[String, JobState] =
str.toLowerCase match {
case "waiting" => Right(Waiting)
case "waiting" => Right(Waiting)
case "scheduled" => Right(Scheduled)
case "running" => Right(Running)
case "stuck" => Right(Stuck)
case "failed" => Right(Failed)
case "running" => Right(Running)
case "stuck" => Right(Stuck)
case "failed" => Right(Failed)
case "cancelled" => Right(Cancelled)
case "success" => Right(Success)
case _ => Left(s"Not a job state: $str")
case "success" => Right(Success)
case _ => Left(s"Not a job state: $str")
}
def unsafe(str: String): JobState =
@ -60,7 +53,6 @@ object JobState {
def asString(state: JobState): String =
state.name
implicit val jobStateEncoder: Encoder[JobState] =
Encoder.encodeString.contramap(_.name)

View File

@ -51,8 +51,8 @@ case class LenientUri(
def open[F[_]: Sync]: Either[String, Resource[F, HttpURLConnection]] =
toJavaUrl.map { url =>
Resource
.make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))(
conn => Sync[F].delay(conn.disconnect())
.make(Sync[F].delay(url.openConnection().asInstanceOf[HttpURLConnection]))(conn =>
Sync[F].delay(conn.disconnect())
)
}
@ -61,17 +61,16 @@ case class LenientUri(
.emit(Either.catchNonFatal(new URL(asString)))
.covary[F]
.rethrow
.flatMap(
url => fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
.flatMap(url =>
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
)
def host: Option[String] =
authority.map(
a =>
a.indexOf(':') match {
case -1 => a
case n => a.substring(0, n)
}
authority.map(a =>
a.indexOf(':') match {
case -1 => a
case n => a.substring(0, n)
}
)
def asString: String = {

View File

@ -8,13 +8,11 @@ import io.circe.generic.semiauto._
case class MetaProposalList private (proposals: List[MetaProposal]) {
def isEmpty: Boolean = proposals.isEmpty
def isEmpty: Boolean = proposals.isEmpty
def nonEmpty: Boolean = proposals.nonEmpty
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean = {
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).
reduce(_ && _)
}
def hasResults(mt: MetaProposalType, mts: MetaProposalType*): Boolean =
(mts :+ mt).map(mtp => proposals.exists(_.proposalType == mtp)).reduce(_ && _)
def hasResultsAll: Boolean =
proposals.map(_.proposalType).toSet == MetaProposalType.all.toSet
@ -23,7 +21,7 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
proposals.foldLeft(Set.empty[MetaProposalType])(_ + _.proposalType)
def fillEmptyFrom(ml: MetaProposalList): MetaProposalList = {
val list = ml.proposals.foldLeft(proposals){ (mine, mp) =>
val list = ml.proposals.foldLeft(proposals) { (mine, mp) =>
if (hasResults(mp.proposalType)) mine
else mp :: mine
}
@ -48,21 +46,24 @@ object MetaProposalList {
fromSeq1(mt, refs.map(ref => Candidate(ref, Set(label))))
def fromSeq1(mt: MetaProposalType, refs: Seq[Candidate]): MetaProposalList =
NonEmptyList.fromList(refs.toList).
map(nl => MetaProposalList.of(MetaProposal(mt, nl))).
getOrElse(empty)
NonEmptyList
.fromList(refs.toList)
.map(nl => MetaProposalList.of(MetaProposal(mt, nl)))
.getOrElse(empty)
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList = {
def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList =
new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) }))
}
def flatten(ml: Seq[MetaProposalList]): MetaProposalList = {
val init: Map[MetaProposalType, MetaProposal] = Map.empty
def updateMap(map: Map[MetaProposalType, MetaProposal], mp: MetaProposal): Map[MetaProposalType, MetaProposal] =
def updateMap(
map: Map[MetaProposalType, MetaProposal],
mp: MetaProposal
): Map[MetaProposalType, MetaProposal] =
map.get(mp.proposalType) match {
case Some(mp0) => map.updated(mp.proposalType, mp0.addIdRef(mp.values.toList))
case None => map.updated(mp.proposalType, mp)
case None => map.updated(mp.proposalType, mp)
}
val merged = ml.foldLeft(init) { (map, el) =>

View File

@ -10,25 +10,25 @@ sealed trait MetaProposalType { self: Product =>
object MetaProposalType {
case object CorrOrg extends MetaProposalType
case object CorrOrg extends MetaProposalType
case object CorrPerson extends MetaProposalType
case object ConcPerson extends MetaProposalType
case object ConcEquip extends MetaProposalType
case object DocDate extends MetaProposalType
case object DueDate extends MetaProposalType
case object ConcEquip extends MetaProposalType
case object DocDate extends MetaProposalType
case object DueDate extends MetaProposalType
val all: List[MetaProposalType] =
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
def fromString(str: String): Either[String, MetaProposalType] =
str.toLowerCase match {
case "corrorg" => Right(CorrOrg)
case "corrorg" => Right(CorrOrg)
case "corrperson" => Right(CorrPerson)
case "concperson" => Right(ConcPerson)
case "concequip" => Right(ConcEquip)
case "docdate" => Right(DocDate)
case "duedate" => Right(DueDate)
case _ => Left(s"Invalid item-proposal-type: $str")
case "concequip" => Right(ConcEquip)
case "docdate" => Right(DocDate)
case "duedate" => Right(DueDate)
case _ => Left(s"Invalid item-proposal-type: $str")
}
def unsafe(str: String): MetaProposalType =

View File

@ -11,31 +11,30 @@ sealed trait NerTag { self: Product =>
object NerTag {
case object Organization extends NerTag
case object Person extends NerTag
case object Location extends NerTag
case object Misc extends NerTag
case object Email extends NerTag
case object Website extends NerTag
case object Date extends NerTag
case object Person extends NerTag
case object Location extends NerTag
case object Misc extends NerTag
case object Email extends NerTag
case object Website extends NerTag
case object Date extends NerTag
val all: List[NerTag] = List(Organization, Person, Location)
def fromString(str: String): Either[String, NerTag] =
str.toLowerCase match {
case "organization" => Right(Organization)
case "person" => Right(Person)
case "location" => Right(Location)
case "misc" => Right(Misc)
case "email" => Right(Email)
case "website" => Right(Website)
case "date" => Right(Date)
case _ => Left(s"Invalid ner tag: $str")
case "person" => Right(Person)
case "location" => Right(Location)
case "misc" => Right(Misc)
case "email" => Right(Email)
case "website" => Right(Website)
case "date" => Right(Date)
case _ => Left(s"Invalid ner tag: $str")
}
def unsafe(str: String): NerTag =
fromString(str).fold(sys.error, identity)
implicit val jsonDecoder: Decoder[NerTag] =
Decoder.decodeString.emap(fromString)
implicit val jsonEncoder: Encoder[NerTag] =

View File

@ -24,12 +24,14 @@ object Implicits {
ConfigReader[String].emap(reason(Ident.fromString))
implicit val byteVectorReader: ConfigReader[ByteVector] =
ConfigReader[String].emap(reason(str => {
ConfigReader[String].emap(reason { str =>
if (str.startsWith("hex:")) ByteVector.fromHex(str.drop(4)).toRight("Invalid hex value.")
else if (str.startsWith("b64:")) ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
else if (str.startsWith("b64:"))
ByteVector.fromBase64(str.drop(4)).toRight("Invalid Base64 string.")
else ByteVector.encodeUtf8(str).left.map(ex => s"Invalid utf8 string: ${ex.getMessage}")
}))
})
def reason[A: ClassTag](f: String => Either[String, A]): String => Either[FailureReason, A] =
in => f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
in =>
f(in).left.map(str => CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str))
}

View File

@ -2,9 +2,6 @@ package docspell.common
package object syntax {
object all extends EitherSyntax
with StreamSyntax
with StringSyntax
with LoggerSyntax
object all extends EitherSyntax with StreamSyntax with StringSyntax with LoggerSyntax
}

View File

@ -16,7 +16,6 @@ object QueryParam {
implicit val queryStringDecoder: QueryParamDecoder[QueryString] =
QueryParamDecoder[String].map(s => QueryString(s.trim.toLowerCase))
// implicit val booleanDecoder: QueryParamDecoder[Boolean] =
// QueryParamDecoder.fromUnsafeCast(qp => Option(qp.value).exists(_.equalsIgnoreCase("true")))(
// "Boolean"

View File

@ -139,8 +139,7 @@ object ItemRoutes {
}
}
final implicit class OptionString(opt: Option[String]) {
implicit final class OptionString(opt: Option[String]) {
def notEmpty: Option[String] =
opt.map(_.trim).filter(_.nonEmpty)
}

View File

@ -24,13 +24,13 @@ object MailSendRoutes {
HttpRoutes.of {
case req @ POST -> Root / Ident(name) / Ident(id) =>
for {
in <- req.as[SimpleMail]
in <- req.as[SimpleMail]
mail = convertIn(id, in)
res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m))
res <- mail.traverse(m => backend.mail.sendMail(user.account, name, m))
resp <- res.fold(
err => Ok(BasicResult(false, s"Invalid mail data: $err")),
res => Ok(convertOut(res))
)
err => Ok(BasicResult(false, s"Invalid mail data: $err")),
res => Ok(convertOut(res))
)
} yield resp
}
}
@ -39,7 +39,7 @@ object MailSendRoutes {
for {
rec <- s.recipients.traverse(EmilUtil.readMailAddress)
fileIds <- s.attachmentIds.traverse(Ident.fromString)
sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds)
sel = if (s.addAllAttachments) AttachSelection.All else AttachSelection.Selected(fileIds)
} yield ItemMail(item, s.subject, rec, s.body, sel)
def convertOut(res: SendResult): BasicResult =

View File

@ -29,7 +29,7 @@ object MailSettingsRoutes {
case GET -> Root :? QueryParam.QueryOpt(q) =>
for {
list <- backend.mail.getSettings(user.account, q.map(_.q))
res = list.map(convert)
res = list.map(convert)
resp <- Ok(EmailSettingsList(res.toList))
} yield resp
@ -45,13 +45,13 @@ object MailSettingsRoutes {
ru = makeSettings(in)
up <- OptionT.liftF(ru.traverse(r => backend.mail.createSettings(user.account, r)))
resp <- OptionT.liftF(
Ok(
up.fold(
err => BasicResult(false, err),
ar => Conversions.basicResult(ar, "Mail settings stored.")
)
)
)
Ok(
up.fold(
err => BasicResult(false, err),
ar => Conversions.basicResult(ar, "Mail settings stored.")
)
)
)
} yield resp).getOrElseF(NotFound())
case req @ PUT -> Root / Ident(name) =>
@ -60,24 +60,24 @@ object MailSettingsRoutes {
ru = makeSettings(in)
up <- OptionT.liftF(ru.traverse(r => backend.mail.updateSettings(user.account, name, r)))
resp <- OptionT.liftF(
Ok(
up.fold(
err => BasicResult(false, err),
n =>
if (n > 0) BasicResult(true, "Mail settings stored.")
else BasicResult(false, "Mail settings could not be saved")
)
)
)
Ok(
up.fold(
err => BasicResult(false, err),
n =>
if (n > 0) BasicResult(true, "Mail settings stored.")
else BasicResult(false, "Mail settings could not be saved")
)
)
)
} yield resp).getOrElseF(NotFound())
case DELETE -> Root / Ident(name) =>
for {
n <- backend.mail.deleteSettings(user.account, name)
resp <- Ok(
if (n > 0) BasicResult(true, "Mail settings removed")
else BasicResult(false, "Mail settings could not be removed")
)
if (n > 0) BasicResult(true, "Mail settings removed")
else BasicResult(false, "Mail settings could not be removed")
)
} yield resp
}

View File

@ -23,7 +23,7 @@ object SentMailRoutes {
HttpRoutes.of {
case GET -> Root / "item" / Ident(id) =>
for {
all <- backend.mail.getSentMailsForItem(user.account, id)
all <- backend.mail.getSentMailsForItem(user.account, id)
resp <- Ok(SentMails(all.map(convert).toList))
} yield resp
@ -35,7 +35,7 @@ object SentMailRoutes {
case DELETE -> Root / "mail" / Ident(mailId) =>
for {
n <- backend.mail.deleteSentMail(user.account, mailId)
n <- backend.mail.deleteSentMail(user.account, mailId)
resp <- Ok(BasicResult(n > 0, s"Mails deleted: $n"))
} yield resp
}

View File

@ -37,7 +37,7 @@ object TemplateRoutes {
new InnerRoutes[F] {
def doc =
HttpRoutes.of[F] {
case GET -> Root =>
case GET -> Root =>
for {
templ <- docTemplate
resp <- Ok(DocData().render(templ), `Content-Type`(`text/html`))

View File

@ -194,8 +194,9 @@ object QItem {
IC.cid.prefix("i").is(q.collective),
IC.state.prefix("i").isOneOf(q.states),
IC.incoming.prefix("i").isOrDiscard(q.direction),
name.map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n))).
getOrElse(Fragment.empty),
name
.map(n => or(IC.name.prefix("i").lowerLike(n), IC.notes.prefix("i").lowerLike(n)))
.getOrElse(Fragment.empty),
RPerson.Columns.pid.prefix("p0").isOrDiscard(q.corrPerson),
ROrganization.Columns.oid.prefix("o0").isOrDiscard(q.corrOrg),
RPerson.Columns.pid.prefix("p1").isOrDiscard(q.concPerson),

View File

@ -8,12 +8,12 @@ object RFileMeta {
val table = fr"filemeta"
object Columns {
val id = Column("id")
val id = Column("id")
val timestamp = Column("timestamp")
val mimetype = Column("mimetype")
val length = Column("length")
val checksum = Column("checksum")
val chunks = Column("chunks")
val mimetype = Column("mimetype")
val length = Column("length")
val checksum = Column("checksum")
val chunks = Column("chunks")
val chunksize = Column("chunksize")
val all = List(id, timestamp, mimetype, length, checksum, chunks, chunksize)

View File

@ -52,8 +52,16 @@ object RSentMail {
for {
user <- OptionT(RUser.findByAccount(accId))
sm <- OptionT.liftF(
RSentMail[ConnectionIO](user.uid, messageId, sender, connName, subject, recipients, body)
)
RSentMail[ConnectionIO](
user.uid,
messageId,
sender,
connName,
subject,
recipients,
body
)
)
si <- OptionT.liftF(RSentMailItem[ConnectionIO](itemId, sm.id, Some(sm.created)))
} yield (sm, si)

View File

@ -9,43 +9,47 @@ object Contact {
private[this] val protocols = Set("ftp", "http", "https")
def annotate(text: String): Vector[NerLabel] =
TextSplitter.splitToken[Nothing](text, " \t\r\n".toSet).
map({ token =>
if (isEmailAddress(token.value)) NerLabel(token.value, NerTag.Email, token.begin, token.end).some
else if (isWebsite(token.value)) NerLabel(token.value, NerTag.Website, token.begin, token.end).some
TextSplitter
.splitToken[Nothing](text, " \t\r\n".toSet)
.map({ token =>
if (isEmailAddress(token.value))
NerLabel(token.value, NerTag.Email, token.begin, token.end).some
else if (isWebsite(token.value))
NerLabel(token.value, NerTag.Website, token.begin, token.end).some
else None
}).
flatMap(_.map(Stream.emit).getOrElse(Stream.empty)).
toVector
})
.flatMap(_.map(Stream.emit).getOrElse(Stream.empty))
.toVector
def isEmailAddress(str: String): Boolean = {
val atIdx = str.indexOf('@')
if (atIdx <= 0 || str.indexOf('@', atIdx + 1) > 0) false
else {
val name = str.substring(0, atIdx)
val dom = str.substring(atIdx + 1)
val dom = str.substring(atIdx + 1)
Domain.isDomain(dom) && name.forall(c => !c.isWhitespace)
}
}
def isWebsite(str: String): Boolean =
LenientUri.parse(str).
toOption.
map(uri => protocols.contains(uri.scheme.head)).
getOrElse(Domain.isDomain(str))
LenientUri
.parse(str)
.toOption
.map(uri => protocols.contains(uri.scheme.head))
.getOrElse(Domain.isDomain(str))
def isDocspellOpenUpload(str: String): Boolean = {
def isUploadPath(p: LenientUri.Path): Boolean =
p match {
case LenientUri.RootPath => false
case LenientUri.RootPath => false
case LenientUri.EmptyPath => false
case LenientUri.NonEmptyPath(segs) =>
Ident.fromString(segs.last).isRight &&
segs.init.takeRight(3) == List("open", "upload", "item")
}
LenientUri.parse(str).
toOption.
exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path))
LenientUri
.parse(str)
.toOption
.exists(uri => protocols.contains(uri.scheme.head) && isUploadPath(uri.path))
}
}

View File

@ -11,7 +11,7 @@ private[text] object Tld {
/**
* Some selected TLDs.
*/
private [this] val known = List(
private[this] val known = List(
".com",
".org",
".net",

View File

@ -10,16 +10,22 @@ import scala.util.Try
object DateFind {
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] = {
TextSplitter.splitToken(text, " \t.,\n\r/".toSet).
sliding(3).
filter(_.length == 3).
map(q => SimpleDate.fromParts(q.toList, lang).
map(sd => NerDateLabel(sd.toLocalDate,
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)))).
collect({ case Some(d) => d })
}
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
TextSplitter
.splitToken(text, " \t.,\n\r/".toSet)
.sliding(3)
.filter(_.length == 3)
.map(q =>
SimpleDate
.fromParts(q.toList, lang)
.map(sd =>
NerDateLabel(
sd.toLocalDate,
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)
)
)
)
.collect({ case Some(d) => d })
private case class SimpleDate(year: Int, month: Int, day: Int) {
def toLocalDate: LocalDate =
@ -27,13 +33,13 @@ object DateFind {
}
private object SimpleDate {
val p0 = readYear >> readMonth >> readDay map {
val p0 = (readYear >> readMonth >> readDay).map {
case ((y, m), d) => SimpleDate(y, m, d)
}
val p1 = readDay >> readMonth >> readYear map {
val p1 = (readDay >> readMonth >> readYear).map {
case ((d, m), y) => SimpleDate(y, m, d)
}
val p2 = readMonth >> readDay >> readYear map {
val p2 = (readMonth >> readDay >> readYear).map {
case ((m, d), y) => SimpleDate(y, m, d)
}
@ -46,14 +52,14 @@ object DateFind {
p.read(parts).toOption
}
def readYear: Reader[Int] = {
Reader.readFirst(w => w.value.length match {
case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption
case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption
case _ => None
})
}
def readYear: Reader[Int] =
Reader.readFirst(w =>
w.value.length match {
case 2 => Try(w.value.toInt).filter(n => n >= 0).toOption
case 4 => Try(w.value.toInt).filter(n => n > 1000).toOption
case _ => None
}
)
def readMonth: Reader[Int] =
Reader.readFirst(w => Some(months.indexWhere(_.contains(w.value))).filter(_ > 0).map(_ + 1))
@ -69,10 +75,12 @@ object DateFind {
Reader(read.andThen(_.map(f)))
def or(other: Reader[A]): Reader[A] =
Reader(words => read(words) match {
case Result.Failure => other.read(words)
case s @ Result.Success(_, _) => s
})
Reader(words =>
read(words) match {
case Result.Failure => other.read(words)
case s @ Result.Success(_, _) => s
}
)
}
object Reader {
@ -81,12 +89,11 @@ object DateFind {
def readFirst[A](f: Word => Option[A]): Reader[A] =
Reader({
case Nil => Result.Failure
case Nil => Result.Failure
case a :: as => f(a).map(value => Result.Success(value, as)).getOrElse(Result.Failure)
})
}
sealed trait Result[+A] {
def toOption: Option[A]
def map[B](f: A => B): Result[B]
@ -95,14 +102,14 @@ object DateFind {
object Result {
final case class Success[A](value: A, rest: List[Word]) extends Result[A] {
val toOption = Some(value)
val toOption = Some(value)
def map[B](f: A => B): Result[B] = Success(f(value), rest)
def next[B](r: Reader[B]): Result[(A, B)] =
r.read(rest).map(b => (value, b))
}
final case object Failure extends Result[Nothing] {
val toOption = None
def map[B](f: Nothing => B): Result[B] = this
val toOption = None
def map[B](f: Nothing => B): Result[B] = this
def next[B](r: Reader[B]): Result[(Nothing, B)] = this
}
}

View File

@ -14,23 +14,28 @@ import java.net.URL
import scala.util.Using
object StanfordNerClassifier {
private [this] val logger = getLogger
private[this] val logger = getLogger
lazy val germanNerClassifier = makeClassifier(Language.German)
lazy val germanNerClassifier = makeClassifier(Language.German)
lazy val englishNerClassifier = makeClassifier(Language.English)
def nerAnnotate(lang: Language)(text: String): Vector[NerLabel] = {
val nerClassifier = lang match {
case Language.English => englishNerClassifier
case Language.German => germanNerClassifier
case Language.German => germanNerClassifier
}
nerClassifier.classify(text).asScala.flatMap(a => a.asScala).
collect(Function.unlift(label => {
nerClassifier
.classify(text)
.asScala
.flatMap(a => a.asScala)
.collect(Function.unlift { label =>
val tag = label.get(classOf[CoreAnnotations.AnswerAnnotation])
NerTag.fromString(Option(tag).getOrElse("")).toOption.
map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition()))
})).
toVector
NerTag
.fromString(Option(tag).getOrElse(""))
.toOption
.map(t => NerLabel(label.word(), t, label.beginPosition(), label.endPosition()))
})
.toVector
}
private def makeClassifier(lang: Language): AbstractSequenceClassifier[CoreLabel] = {
@ -48,7 +53,9 @@ object StanfordNerClassifier {
check(lang match {
case Language.German =>
getClass.getResource("/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz")
getClass.getResource(
"/edu/stanford/nlp/models/ner/german.conll.germeval2014.hgc_175m_600.crf.ser.gz"
)
case Language.English =>
getClass.getResource("/edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz")
})

View File

@ -5,11 +5,11 @@ import java.nio.file.{Path, Paths}
import docspell.common._
case class Config(
allowedContentTypes: Set[MimeType]
, ghostscript: Config.Ghostscript
, pageRange: Config.PageRange
, unpaper: Config.Unpaper
, tesseract: Config.Tesseract
allowedContentTypes: Set[MimeType],
ghostscript: Config.Ghostscript,
pageRange: Config.PageRange,
unpaper: Config.Unpaper,
tesseract: Config.Tesseract
) {
def isAllowed(mt: MimeType): Boolean =
@ -22,7 +22,7 @@ object Config {
case class Command(program: String, args: Seq[String], timeout: Duration) {
def mapArgs(f: String => String): Command =
Command(program, args map f, timeout)
Command(program, args.map(f), timeout)
def toCmd: List[String] =
program :: args.toList
@ -44,23 +44,23 @@ object Config {
),
pageRange = PageRange(10),
ghostscript = Ghostscript(
Command("gs", Seq("-dNOPAUSE"
, "-dBATCH"
, "-dSAFER"
, "-sDEVICE=tiffscaled8"
, "-sOutputFile={{outfile}}"
, "{{infile}}"),
Duration.seconds(30)),
Paths.get(System.getProperty("java.io.tmpdir")).
resolve("docspell-extraction")),
unpaper = Unpaper(Command("unpaper"
, Seq("{{infile}}", "{{outfile}}")
, Duration.seconds(30))),
Command(
"gs",
Seq(
"-dNOPAUSE",
"-dBATCH",
"-dSAFER",
"-sDEVICE=tiffscaled8",
"-sOutputFile={{outfile}}",
"{{infile}}"
),
Duration.seconds(30)
),
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
),
unpaper = Unpaper(Command("unpaper", Seq("{{infile}}", "{{outfile}}"), Duration.seconds(30))),
tesseract = Tesseract(
Command("tesseract", Seq("{{file}}"
, "stdout"
, "-l"
, "{{lang}}"),
Duration.minutes(1)))
Command("tesseract", Seq("{{file}}", "stdout", "-l", "{{lang}}"), Duration.minutes(1))
)
)
}

View File

@ -11,71 +11,106 @@ object Ocr {
/** Extract the text of all pages in the given pdf file.
*/
def extractPdf[F[_]: Sync: ContextShift](pdf: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] =
def extractPdf[F[_]: Sync: ContextShift](
pdf: Stream[F, Byte],
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
runGhostscript(pdf, config, wd, blocker).
flatMap({ tmpImg =>
runGhostscript(pdf, config, wd, blocker)
.flatMap({ tmpImg =>
runTesseractFile(tmpImg, blocker, lang, config)
}).
fold1(_ + "\n\n\n" + _)
})
.fold1(_ + "\n\n\n" + _)
}
/** Extract the text from the given image file
*/
def extractImage[F[_]: Sync: ContextShift](img: Stream[F, Byte], blocker: Blocker, lang: String, config: Config): Stream[F, String] =
def extractImage[F[_]: Sync: ContextShift](
img: Stream[F, Byte],
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
runTesseractStdin(img, blocker, lang, config)
def extractPdFFile[F[_]: Sync: ContextShift](pdf: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] =
def extractPdFFile[F[_]: Sync: ContextShift](
pdf: Path,
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker).
flatMap({ tif =>
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker)
.flatMap({ tif =>
runTesseractFile(tif, blocker, lang, config)
}).
fold1(_ + "\n\n\n" + _)
})
.fold1(_ + "\n\n\n" + _)
}
def extractImageFile[F[_]: Sync: ContextShift](img: Path, blocker: Blocker, lang: String, config: Config): Stream[F, String] =
def extractImageFile[F[_]: Sync: ContextShift](
img: Path,
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
runTesseractFile(img, blocker, lang, config)
/** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned.
*/
private[text] def runGhostscript[F[_]: Sync: ContextShift](
pdf: Stream[F, Byte]
, cfg: Config
, wd: Path
, blocker: Blocker): Stream[F, Path] = {
pdf: Stream[F, Byte],
cfg: Config,
wd: Path,
blocker: Blocker
): Stream[F, Path] = {
val xargs =
if (cfg.pageRange.begin > 0) s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args
if (cfg.pageRange.begin > 0)
s"-dLastPage=${cfg.pageRange.begin}" +: cfg.ghostscript.command.args
else cfg.ghostscript.command.args
val cmd = cfg.ghostscript.command.copy(args = xargs).mapArgs(replace(Map(
"{{infile}}" -> "-",
"{{outfile}}" -> "%d.tif"
)))
SystemCommand.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf).
evalMap({ _ =>
val cmd = cfg.ghostscript.command
.copy(args = xargs)
.mapArgs(
replace(
Map(
"{{infile}}" -> "-",
"{{outfile}}" -> "%d.tif"
)
)
)
SystemCommand
.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf)
.evalMap({ _ =>
File.listFiles(pathEndsWith(".tif"), wd)
}).
flatMap(fs => Stream.emits(fs))
})
.flatMap(fs => Stream.emits(fs))
}
/** Run ghostscript to extract all pdf pages into tiff files. The
* files are stored to a temporary location on disk and returned.
*/
private[text] def runGhostscriptFile[F[_]: Sync: ContextShift](
pdf: Path
, ghostscript: Config.Command
, wd: Path, blocker: Blocker): Stream[F, Path] = {
val cmd = ghostscript.mapArgs(replace(Map(
"{{infile}}" -> pdf.toAbsolutePath.toString,
"{{outfile}}" -> "%d.tif"
)))
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).
evalMap({ _ =>
pdf: Path,
ghostscript: Config.Command,
wd: Path,
blocker: Blocker
): Stream[F, Path] = {
val cmd = ghostscript.mapArgs(
replace(
Map(
"{{infile}}" -> pdf.toAbsolutePath.toString,
"{{outfile}}" -> "%d.tif"
)
)
)
SystemCommand
.execSuccess[F](cmd, blocker, wd = Some(wd))
.evalMap({ _ =>
File.listFiles(pathEndsWith(".tif"), wd)
}).
flatMap(fs => Stream.emits(fs))
})
.flatMap(fs => Stream.emits(fs))
}
private def pathEndsWith(ext: String): Path => Boolean =
@ -84,65 +119,72 @@ object Ocr {
/** Run unpaper to optimize the image for ocr. The
* files are stored to a temporary location on disk and returned.
*/
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](img: Path
, unpaper: Config.Command
, wd: Path, blocker: Blocker): Stream[F, Path] = {
val targetFile = img.resolveSibling("u-"+ img.getFileName.toString).toAbsolutePath
val cmd = unpaper.mapArgs(replace(Map(
"{{infile}}" -> img.toAbsolutePath.toString,
"{{outfile}}" -> targetFile.toString
)))
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).
map(_ => targetFile).
handleErrorWith(th => {
logger.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](
img: Path,
unpaper: Config.Command,
wd: Path,
blocker: Blocker
): Stream[F, Path] = {
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
val cmd = unpaper.mapArgs(
replace(
Map(
"{{infile}}" -> img.toAbsolutePath.toString,
"{{outfile}}" -> targetFile.toString
)
)
)
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).map(_ => targetFile).handleErrorWith {
th =>
logger
.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
Stream.emit(img)
})
}
}
/** Run tesseract on the given image file and return the extracted
* text.
*/
private[text] def runTesseractFile[F[_]: Sync: ContextShift](
img: Path
, blocker: Blocker
, lang: String
, config: Config): Stream[F, String] = {
img: Path,
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] =
// tesseract cannot cope with absolute filenames
// so use the parent as working dir
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).
flatMap(uimg => {
val cmd = config.tesseract.command.mapArgs(replace(Map(
"{{file}}" -> uimg.getFileName.toString
, "{{lang}}" -> fixLanguage(lang))))
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
})
}
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).flatMap { uimg =>
val cmd = config.tesseract.command.mapArgs(
replace(Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)))
)
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
}
/** Run tesseract on the given image file and return the extracted
* text.
*/
private[text] def runTesseractStdin[F[_]: Sync: ContextShift](
img: Stream[F, Byte]
, blocker: Blocker
, lang: String
, config: Config): Stream[F, String] = {
val cmd = config.tesseract.command.mapArgs(replace(Map(
"{{file}}" -> "stdin"
, "{{lang}}" -> fixLanguage(lang))))
img: Stream[F, Byte],
blocker: Blocker,
lang: String,
config: Config
): Stream[F, String] = {
val cmd = config.tesseract.command
.mapArgs(replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang))))
SystemCommand.execSuccess(cmd, blocker, stdin = img).map(_.stdout)
}
private def replace(repl: Map[String, String]): String => String =
s => repl.foldLeft(s) { case (res, (k, v)) =>
res.replace(k, v)
}
s =>
repl.foldLeft(s) {
case (res, (k, v)) =>
res.replace(k, v)
}
private def fixLanguage(lang: String): String =
lang match {
case "de" => "deu"
case "en" => "eng"
case l => l
case l => l
}
}

View File

@ -16,57 +16,87 @@ object SystemCommand {
final case class Result(rc: Int, stdout: String, stderr: String)
def exec[F[_]: Sync: ContextShift]( cmd: Config.Command
, blocker: Blocker
, wd: Option[Path] = None
, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] =
startProcess(cmd, wd){ proc =>
def exec[F[_]: Sync: ContextShift](
cmd: Config.Command,
blocker: Blocker,
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
startProcess(cmd, wd) { proc =>
Stream.eval {
for {
_ <- writeToProcess(stdin, proc, blocker)
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
else logger.fwarn(s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!")
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("")
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("")
_ <- writeToProcess(stdin, proc, blocker)
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
else
logger.fwarn(
s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!"
)
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
out <- if (term) inputStreamToString(proc.getInputStream, blocker) else Sync[F].pure("")
err <- if (term) inputStreamToString(proc.getErrorStream, blocker) else Sync[F].pure("")
} yield Result(proc.exitValue, out, err)
}
}
def execSuccess[F[_]: Sync: ContextShift](cmd: Config.Command, blocker: Blocker, wd: Option[Path] = None, stdin: Stream[F, Byte] = Stream.empty): Stream[F, Result] =
def execSuccess[F[_]: Sync: ContextShift](
cmd: Config.Command,
blocker: Blocker,
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
exec(cmd, blocker, wd, stdin).flatMap { r =>
if (r.rc != 0) Stream.raiseError[F](new Exception(s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}"))
if (r.rc != 0)
Stream.raiseError[F](
new Exception(
s"Command `${cmd.cmdString}` returned non-zero exit code ${r.rc}. Stderr: ${r.stderr}"
)
)
else Stream.emit(r)
}
private def startProcess[F[_]: Sync,A](cmd: Config.Command, wd: Option[Path])(f: Process => Stream[F,A]): Stream[F, A] = {
private def startProcess[F[_]: Sync, A](cmd: Config.Command, wd: Option[Path])(
f: Process => Stream[F, A]
): Stream[F, A] = {
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
val proc = log *> Sync[F].delay {
val pb = new ProcessBuilder(cmd.toCmd.asJava)
wd.map(_.toFile).foreach(pb.directory)
pb.start()
}
Stream.bracket(proc)(p => logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
p.destroy()
}).flatMap(f)
Stream
.bracket(proc)(p =>
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
p.destroy()
}
)
.flatMap(f)
}
private def inputStreamToString[F[_]: Sync: ContextShift](in: InputStream, blocker: Blocker): F[String] =
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false).
through(text.utf8Decode).
chunks.
map(_.toVector.mkString).
fold1(_ + _).
compile.last.
map(_.getOrElse(""))
private def inputStreamToString[F[_]: Sync: ContextShift](
in: InputStream,
blocker: Blocker
): F[String] =
io.readInputStream(Sync[F].pure(in), 16 * 1024, blocker, closeAfterUse = false)
.through(text.utf8Decode)
.chunks
.map(_.toVector.mkString)
.fold1(_ + _)
.compile
.last
.map(_.getOrElse(""))
private def writeToProcess[F[_]: Sync: ContextShift](data: Stream[F, Byte], proc: Process, blocker: Blocker): F[Unit] =
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).
compile.drain
private def writeToProcess[F[_]: Sync: ContextShift](
data: Stream[F, Byte],
proc: Process,
blocker: Blocker
): F[Unit] =
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).compile.drain
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config.Command): F[Unit] =
Sync[F].delay(proc.destroyForcibly()).attempt *> {
Sync[F].raiseError(new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})"))
Sync[F].raiseError(
new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})")
)
}
}

View File

@ -12,18 +12,17 @@ object TikaMimetype {
private val tika = new TikaConfig().getDetector
private def convert(mt: MediaType): MimeType =
Option(mt).map(_.toString).
map(MimeType.parse).
flatMap(_.toOption).
map(normalize).
getOrElse(MimeType.octetStream)
Option(mt)
.map(_.toString)
.map(MimeType.parse)
.flatMap(_.toOption)
.map(normalize)
.getOrElse(MimeType.octetStream)
private def makeMetadata(hint: MimeTypeHint): Metadata = {
val md = new Metadata
hint.filename.
foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
hint.advertised.
foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
hint.filename.foreach(md.set(TikaMetadataKeys.RESOURCE_NAME_KEY, _))
hint.advertised.foreach(md.set(HttpHeaders.CONTENT_TYPE, _))
md
}
@ -33,13 +32,10 @@ object TikaMimetype {
case _ => in
}
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType = {
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType =
convert(tika.detect(new java.io.ByteArrayInputStream(bv), makeMetadata(hint)))
}
def detect[F[_]: Sync](data: Stream[F, Byte]): F[MimeType] =
data.take(1024).
compile.toVector.
map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
data.take(1024).compile.toVector.map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
}