Store message-id of eml files

This commit is contained in:
Eike Kettner
2020-03-25 21:58:54 +01:00
parent 6b13993257
commit 09ea724c13
5 changed files with 44 additions and 26 deletions

View File

@ -20,12 +20,12 @@ object ReadMail {
def readBytesP[F[_]: ConcurrentEffect: ContextShift](
logger: Logger[F]
): Pipe[F, Byte, Binary[F]] =
s =>
Stream.eval(logger.debug(s"Converting e-mail into its parts")) >>
bytesToMail(s).flatMap(mailToEntries[F](logger))
_.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger))
def bytesToMail[F[_]: Sync](data: Stream[F, Byte]): Stream[F, Mail[F]] =
data.through(Binary.decode(StandardCharsets.US_ASCII)).foldMonoid.evalMap(read[F])
def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] =
s =>
Stream.eval(logger.debug(s"Converting e-mail file...")) >>
s.through(Binary.decode(StandardCharsets.US_ASCII)).foldMonoid.evalMap(read[F])
def mailToEntries[F[_]: ConcurrentEffect: ContextShift](
logger: Logger[F]
@ -59,20 +59,20 @@ object ReadMail {
}
private def fixHtml(cnt: BodyContent): BodyContent = {
val str = cnt.asString.trim.toLowerCase
val str = cnt.asString.trim.toLowerCase
val head = htmlHeader(cnt.charsetOrUtf8)
if (str.startsWith("<html")) cnt
else cnt match {
case BodyContent.StringContent(s) =>
BodyContent(head + s + htmlHeaderEnd)
case BodyContent.ByteContent(bv, cs) =>
val begin = ByteVector.view(head.getBytes(cnt.charsetOrUtf8))
val end = ByteVector.view(htmlHeaderEnd.getBytes(cnt.charsetOrUtf8))
BodyContent(begin ++ bv ++ end, cs)
}
else
cnt match {
case BodyContent.StringContent(s) =>
BodyContent(head + s + htmlHeaderEnd)
case BodyContent.ByteContent(bv, cs) =>
val begin = ByteVector.view(head.getBytes(cnt.charsetOrUtf8))
val end = ByteVector.view(htmlHeaderEnd.getBytes(cnt.charsetOrUtf8))
BodyContent(begin ++ bv ++ end, cs)
}
}
implicit class MimeTypeConv(m: emil.MimeType) {
def toDocspell: MimeType =
MimeType(m.primary, m.sub, m.params)

View File

@ -120,7 +120,7 @@ object ExtractArchive {
zipData
.through(Zip.unzipP[F](8192, ctx.blocker))
.flatMap(handleEntry(ctx, ra, archive))
.flatMap(handleEntry(ctx, ra, archive, None))
.foldMonoid
.compile
.lastOrError
@ -130,14 +130,19 @@ object ExtractArchive {
ctx: Context[F, _],
archive: Option[RAttachmentArchive]
)(ra: RAttachment): F[Extracted] = {
val email = ctx.store.bitpeace
val email: Stream[F, Byte] = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
email
.through(ReadMail.readBytesP[F](ctx.logger))
.flatMap(handleEntry(ctx, ra, archive))
.through(ReadMail.bytesToMail[F](ctx.logger))
.flatMap { mail =>
val mId = mail.header.messageId
ReadMail
.mailToEntries(ctx.logger)(mail)
.flatMap(handleEntry(ctx, ra, archive, mId))
}
.foldMonoid
.compile
.lastOrError
@ -146,7 +151,8 @@ object ExtractArchive {
def handleEntry[F[_]: Sync](
ctx: Context[F, _],
ra: RAttachment,
archive: Option[RAttachmentArchive]
archive: Option[RAttachmentArchive],
messageId: Option[String]
)(
entry: Binary[F]
): Stream[F, Extracted] = {
@ -163,7 +169,7 @@ object ExtractArchive {
ra.created,
Option(entry.name).map(_.trim).filter(_.nonEmpty)
)
val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id)
val aa = archive.getOrElse(RAttachmentArchive.of(ra, messageId)).copy(id = id)
Extracted.of(nra, aa)
}
}