mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 02:49:32 +00:00
commit
71f0320830
@ -8,7 +8,7 @@ import io.circe.{Decoder, Encoder}
|
||||
trait Glob {
|
||||
|
||||
/** Matches the input string against this glob. */
|
||||
def matches(in: String): Boolean
|
||||
def matches(caseSensitive: Boolean)(in: String): Boolean
|
||||
|
||||
/** If this glob consists of multiple segments, it is the same as
|
||||
* `matches`. If it is only a single segment, it is matched against
|
||||
@ -25,42 +25,6 @@ trait Glob {
|
||||
}
|
||||
|
||||
object Glob {
|
||||
private val separator = '/'
|
||||
private val anyChar = '|'
|
||||
|
||||
val all = new Glob {
|
||||
def matches(in: String) = true
|
||||
def matchFilenameOrPath(in: String) = true
|
||||
val asString = "*"
|
||||
}
|
||||
|
||||
def pattern(pattern: Pattern): Glob =
|
||||
PatternGlob(pattern)
|
||||
|
||||
/** A simple glob supporting `*` and `?`. */
|
||||
final private case class PatternGlob(pattern: Pattern) extends Glob {
|
||||
def matches(in: String): Boolean =
|
||||
pattern.parts
|
||||
.zipWith(Glob.split(in, Glob.separator))(_.matches(_))
|
||||
.forall(identity)
|
||||
|
||||
def matchFilenameOrPath(in: String): Boolean =
|
||||
if (pattern.parts.tail.isEmpty) matches(split(in, separator).last)
|
||||
else matches(in)
|
||||
|
||||
def asString: String =
|
||||
pattern.asString
|
||||
}
|
||||
|
||||
final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
|
||||
def matches(in: String) =
|
||||
globs.exists(_.matches(in))
|
||||
def matchFilenameOrPath(in: String) =
|
||||
globs.exists(_.matchFilenameOrPath(in))
|
||||
def asString =
|
||||
globs.toList.map(_.asString).mkString(anyChar.toString)
|
||||
}
|
||||
|
||||
def apply(in: String): Glob = {
|
||||
def single(str: String) =
|
||||
PatternGlob(Pattern(split(str, separator).map(makeSegment)))
|
||||
@ -75,6 +39,42 @@ object Glob {
|
||||
}
|
||||
}
|
||||
|
||||
private val separator = '/'
|
||||
private val anyChar = '|'
|
||||
|
||||
val all = new Glob {
|
||||
def matches(caseSensitive: Boolean)(in: String) = true
|
||||
def matchFilenameOrPath(in: String) = true
|
||||
val asString = "*"
|
||||
}
|
||||
|
||||
def pattern(pattern: Pattern): Glob =
|
||||
PatternGlob(pattern)
|
||||
|
||||
/** A simple glob supporting `*` and `?`. */
|
||||
final private case class PatternGlob(pattern: Pattern) extends Glob {
|
||||
def matches(caseSensitive: Boolean)(in: String): Boolean =
|
||||
pattern.parts
|
||||
.zipWith(Glob.split(in, Glob.separator))(_.matches(caseSensitive)(_))
|
||||
.forall(identity)
|
||||
|
||||
def matchFilenameOrPath(in: String): Boolean =
|
||||
if (pattern.parts.tail.isEmpty) matches(true)(split(in, separator).last)
|
||||
else matches(true)(in)
|
||||
|
||||
def asString: String =
|
||||
pattern.asString
|
||||
}
|
||||
|
||||
final private case class AnyGlob(globs: NonEmptyList[Glob]) extends Glob {
|
||||
def matches(caseSensitive: Boolean)(in: String) =
|
||||
globs.exists(_.matches(caseSensitive)(in))
|
||||
def matchFilenameOrPath(in: String) =
|
||||
globs.exists(_.matchFilenameOrPath(in))
|
||||
def asString =
|
||||
globs.toList.map(_.asString).mkString(anyChar.toString)
|
||||
}
|
||||
|
||||
case class Pattern(parts: NonEmptyList[Segment]) {
|
||||
def asString =
|
||||
parts.map(_.asString).toList.mkString(separator.toString)
|
||||
@ -86,12 +86,12 @@ object Glob {
|
||||
}
|
||||
|
||||
case class Segment(tokens: NonEmptyList[Token]) {
|
||||
def matches(in: String): Boolean =
|
||||
consume(in).exists(_.isEmpty)
|
||||
def matches(caseSensitive: Boolean)(in: String): Boolean =
|
||||
consume(in, caseSensitive).exists(_.isEmpty)
|
||||
|
||||
def consume(in: String): Option[String] =
|
||||
def consume(in: String, caseSensitive: Boolean): Option[String] =
|
||||
tokens.foldLeft(in.some) { (rem, token) =>
|
||||
rem.flatMap(token.consume)
|
||||
rem.flatMap(token.consume(caseSensitive))
|
||||
}
|
||||
|
||||
def asString: String =
|
||||
@ -103,34 +103,47 @@ object Glob {
|
||||
}
|
||||
|
||||
sealed trait Token {
|
||||
def consume(str: String): Option[String]
|
||||
def consume(caseSensitive: Boolean)(str: String): Option[String]
|
||||
|
||||
def asString: String
|
||||
}
|
||||
object Token {
|
||||
case class Literal(asString: String) extends Token {
|
||||
def consume(str: String): Option[String] =
|
||||
if (str.startsWith(asString)) str.drop(asString.length).some
|
||||
def consume(caseSensitive: Boolean)(str: String): Option[String] =
|
||||
if (str.startsWith(asString, caseSensitive)) str.drop(asString.length).some
|
||||
else None
|
||||
}
|
||||
case class Until(value: String) extends Token {
|
||||
def consume(str: String): Option[String] =
|
||||
def consume(caseSensitive: Boolean)(str: String): Option[String] =
|
||||
if (value.isEmpty) Some("")
|
||||
else
|
||||
str.indexOf(value) match {
|
||||
case -1 => None
|
||||
case n => str.substring(n + value.length).some
|
||||
}
|
||||
str
|
||||
.findFirst(value, caseSensitive)
|
||||
.map(n => str.substring(n + value.length))
|
||||
val asString =
|
||||
s"*$value"
|
||||
}
|
||||
case object Single extends Token {
|
||||
def consume(str: String): Option[String] =
|
||||
if (str.isEmpty()) None
|
||||
def consume(caseSensitive: Boolean)(str: String): Option[String] =
|
||||
if (str.isEmpty) None
|
||||
else Some(str.drop(1))
|
||||
|
||||
val asString = "?"
|
||||
}
|
||||
|
||||
implicit final class StringHelper(val str: String) extends AnyVal {
|
||||
def findFirst(sub: String, caseSensitive: Boolean): Option[Int] = {
|
||||
val vstr = if (caseSensitive) str else str.toLowerCase
|
||||
val vsub = if (caseSensitive) sub else sub.toLowerCase
|
||||
Option(vstr.indexOf(vsub)).filter(_ >= 0)
|
||||
}
|
||||
|
||||
def startsWith(prefix: String, caseSensitive: Boolean): Boolean = {
|
||||
val vstr = if (caseSensitive) str else str.toLowerCase
|
||||
val vprefix = if (caseSensitive) prefix else prefix.toLowerCase
|
||||
vstr.startsWith(vprefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def split(str: String, sep: Char): NonEmptyList[String] =
|
||||
@ -139,6 +152,7 @@ object Glob {
|
||||
.getOrElse(NonEmptyList.of(str))
|
||||
|
||||
private def makeSegment(str: String): Segment = {
|
||||
@annotation.tailrec
|
||||
def loop(rem: String, res: List[Token]): List[Token] =
|
||||
if (rem.isEmpty) res
|
||||
else
|
||||
|
@ -6,8 +6,10 @@ import Glob._
|
||||
object GlobTest extends SimpleTestSuite {
|
||||
|
||||
test("literals") {
|
||||
assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello"))
|
||||
assert(!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches("hello1"))
|
||||
assert(Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello"))
|
||||
assert(
|
||||
!Glob.pattern(Pattern(Segment(Token.Literal("hello")))).matches(true)("hello1")
|
||||
)
|
||||
}
|
||||
|
||||
test("single wildcards 1") {
|
||||
@ -16,19 +18,19 @@ object GlobTest extends SimpleTestSuite {
|
||||
Pattern(Segment(Token.Literal("s"), Token.Until("p"), Token.Until("t")))
|
||||
)
|
||||
|
||||
assert(glob.matches("snapshot"))
|
||||
assert(!glob.matches("snapshots"))
|
||||
assert(glob.matches(true)("snapshot"))
|
||||
assert(!glob.matches(true)("snapshots"))
|
||||
}
|
||||
|
||||
test("single wildcards 2") {
|
||||
val glob =
|
||||
Glob.pattern(Pattern(Segment(Token.Literal("test."), Token.Until(""))))
|
||||
|
||||
assert(glob.matches("test.txt"))
|
||||
assert(glob.matches("test.pdf"))
|
||||
assert(glob.matches("test.converted.pdf"))
|
||||
assert(!glob.matches("test1.txt"))
|
||||
assert(!glob.matches("atest.txt"))
|
||||
assert(glob.matches(true)("test.txt"))
|
||||
assert(glob.matches(true)("test.pdf"))
|
||||
assert(glob.matches(true)("test.converted.pdf"))
|
||||
assert(!glob.matches(true)("test1.txt"))
|
||||
assert(!glob.matches(true)("atest.txt"))
|
||||
}
|
||||
|
||||
test("single parsing") {
|
||||
@ -60,12 +62,12 @@ object GlobTest extends SimpleTestSuite {
|
||||
}
|
||||
|
||||
test("with splitting") {
|
||||
assert(Glob("a/b/*").matches("a/b/hello"))
|
||||
assert(!Glob("a/b/*").matches("/a/b/hello"))
|
||||
assert(Glob("/a/b/*").matches("/a/b/hello"))
|
||||
assert(!Glob("/a/b/*").matches("a/b/hello"))
|
||||
assert(!Glob("*/a/b/*").matches("a/b/hello"))
|
||||
assert(Glob("*/a/b/*").matches("test/a/b/hello"))
|
||||
assert(Glob("a/b/*").matches(true)("a/b/hello"))
|
||||
assert(!Glob("a/b/*").matches(true)("/a/b/hello"))
|
||||
assert(Glob("/a/b/*").matches(true)("/a/b/hello"))
|
||||
assert(!Glob("/a/b/*").matches(true)("a/b/hello"))
|
||||
assert(!Glob("*/a/b/*").matches(true)("a/b/hello"))
|
||||
assert(Glob("*/a/b/*").matches(true)("test/a/b/hello"))
|
||||
}
|
||||
|
||||
test("asString") {
|
||||
@ -79,9 +81,9 @@ object GlobTest extends SimpleTestSuite {
|
||||
}
|
||||
|
||||
test("simple matches") {
|
||||
assert(Glob("/test.*").matches("/test.pdf"))
|
||||
assert(!Glob("/test.*").matches("test.pdf"))
|
||||
assert(!Glob("test.*").matches("/test.pdf"))
|
||||
assert(Glob("/test.*").matches(true)("/test.pdf"))
|
||||
assert(!Glob("/test.*").matches(true)("test.pdf"))
|
||||
assert(!Glob("test.*").matches(true)("/test.pdf"))
|
||||
}
|
||||
|
||||
test("matchFilenameOrPath") {
|
||||
@ -100,12 +102,24 @@ object GlobTest extends SimpleTestSuite {
|
||||
}
|
||||
|
||||
test("anyglob") {
|
||||
assert(Glob("*.pdf|*.txt").matches("test.pdf"))
|
||||
assert(Glob("*.pdf|*.txt").matches("test.txt"))
|
||||
assert(!Glob("*.pdf|*.txt").matches("test.xls"))
|
||||
assert(Glob("*.pdf | *.txt").matches("test.pdf"))
|
||||
assert(Glob("*.pdf | mail.html").matches("test.pdf"))
|
||||
assert(Glob("*.pdf | mail.html").matches("mail.html"))
|
||||
assert(!Glob("*.pdf | mail.html").matches("test.docx"))
|
||||
assert(Glob("*.pdf|*.txt").matches(true)("test.pdf"))
|
||||
assert(Glob("*.pdf|*.txt").matches(true)("test.txt"))
|
||||
assert(!Glob("*.pdf|*.txt").matches(true)("test.xls"))
|
||||
assert(Glob("*.pdf | *.txt").matches(true)("test.pdf"))
|
||||
assert(Glob("*.pdf | mail.html").matches(true)("test.pdf"))
|
||||
assert(Glob("*.pdf | mail.html").matches(true)("mail.html"))
|
||||
assert(!Glob("*.pdf | mail.html").matches(true)("test.docx"))
|
||||
}
|
||||
|
||||
test("case insensitive") {
|
||||
assert(Glob("*hello*").matches(false)("hello world"))
|
||||
assert(Glob("*hello*").matches(false)("world hello"))
|
||||
assert(Glob("*hello*").matches(false)("Hello world"))
|
||||
assert(Glob("*hello*").matches(false)("world Hello"))
|
||||
assert(Glob("*hello*").matches(false)("World Hello"))
|
||||
assert(Glob("*hello*").matches(false)("Hello World"))
|
||||
assert(Glob("*Hello*").matches(false)("world hello"))
|
||||
assert(Glob("*heLLo*").matches(false)("Hello world"))
|
||||
assert(Glob("*hellO*").matches(false)("world Hello"))
|
||||
}
|
||||
}
|
||||
|
@ -51,11 +51,11 @@ object ReadMail {
|
||||
(Stream
|
||||
.eval(bodyEntry)
|
||||
.flatMap(e => Stream.emits(e.toSeq))
|
||||
.filter(a => glob.matches(a.name)) ++
|
||||
.filter(a => glob.matches(caseSensitive = false)(a.name)) ++
|
||||
Stream
|
||||
.eval(TnefExtract.replace(mail))
|
||||
.flatMap(m => Stream.emits(m.attachments.all))
|
||||
.filter(a => a.filename.exists(glob.matches))
|
||||
.filter(a => a.filename.exists(glob.matches(caseSensitive = false)))
|
||||
.map(a =>
|
||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
||||
))
|
||||
|
@ -243,7 +243,9 @@ object ExtractArchive {
|
||||
)
|
||||
|
||||
def filterNames(filter: Glob): Extracted =
|
||||
copy(files = files.filter(ra => filter.matches(ra.name.getOrElse(""))))
|
||||
copy(files =
|
||||
files.filter(ra => filter.matches(caseSensitive = false)(ra.name.getOrElse("")))
|
||||
)
|
||||
|
||||
def setMeta(m: MetaProposal): Extracted =
|
||||
setMeta(MetaProposalList.of(m))
|
||||
|
@ -135,15 +135,21 @@ object ScanMailboxTask {
|
||||
|
||||
final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) {
|
||||
|
||||
private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] =
|
||||
MailOp(_ => f(ctx.logger))
|
||||
|
||||
def handleFolder[C](a: Access[F, C], upload: OUpload[F])(
|
||||
name: String
|
||||
): MailOp[F, C, ScanResult] =
|
||||
for {
|
||||
_ <- Kleisli.liftF(ctx.logger.info(s"Processing folder $name"))
|
||||
folder <- requireFolder(a)(name)
|
||||
search <- searchMails(a)(folder)
|
||||
headers <- Kleisli.liftF(filterSubjects(search.mails).flatMap(filterMessageIds))
|
||||
_ <- headers.traverse(handleOne(ctx.args, a, upload))
|
||||
_ <- Kleisli.liftF(ctx.logger.info(s"Processing folder $name"))
|
||||
folder <- requireFolder(a)(name)
|
||||
search <- searchMails(a)(folder)
|
||||
items = search.mails.map(MailHeaderItem(_))
|
||||
headers <- Kleisli.liftF(
|
||||
filterSubjects(items).flatMap(filterMessageIds)
|
||||
)
|
||||
_ <- headers.traverse(handleOne(ctx.args, a, upload))
|
||||
} yield ScanResult(name, search.mails.size, search.count - search.mails.size)
|
||||
|
||||
def requireFolder[C](a: Access[F, C])(name: String): MailOp[F, C, MailFolder] =
|
||||
@ -163,43 +169,39 @@ object ScanMailboxTask {
|
||||
}
|
||||
|
||||
for {
|
||||
_ <- Kleisli.liftF(
|
||||
ctx.logger.debug(
|
||||
s"Searching next ${cfg.mailBatchSize} mails in ${folder.name}."
|
||||
)
|
||||
_ <- logOp(
|
||||
_.debug(s"Searching next ${cfg.mailBatchSize} mails in ${folder.name}.")
|
||||
)
|
||||
query <- Kleisli.liftF(q)
|
||||
mails <- a.search(folder, cfg.mailBatchSize)(query)
|
||||
_ <- Kleisli.liftF(
|
||||
ctx.logger.debug(
|
||||
_ <- logOp(
|
||||
_.debug(
|
||||
s"Found ${mails.count} mails in folder. Reading first ${mails.mails.size}"
|
||||
)
|
||||
)
|
||||
} yield mails
|
||||
}
|
||||
|
||||
def filterSubjects(headers: Vector[MailHeader]): F[Vector[MailHeader]] =
|
||||
def filterSubjects(headers: Vector[MailHeaderItem]): F[Vector[MailHeaderItem]] =
|
||||
ctx.args.subjectFilter match {
|
||||
case Some(sf) =>
|
||||
def check(mh: MailHeader): F[Option[MailHeader]] =
|
||||
if (sf.matches(mh.subject))
|
||||
ctx.logger.debug(
|
||||
s"Including mail '${mh.subject}', it matches the filter."
|
||||
) *> Option(mh).pure[F]
|
||||
def check(mh: MailHeaderItem): F[MailHeaderItem] =
|
||||
if (mh.notProcess || sf.matches(caseSensitive = false)(mh.mh.subject))
|
||||
mh.pure[F]
|
||||
else
|
||||
ctx.logger.debug(
|
||||
s"Excluding mail '${mh.subject}', it doesn't match the filter."
|
||||
) *> (None: Option[MailHeader]).pure[F]
|
||||
s"Excluding mail '${mh.mh.subject}', it doesn't match the subject filter."
|
||||
) *> mh.skip.pure[F]
|
||||
ctx.logger.info(
|
||||
s"Filtering mails on subject using filter: ${sf.asString}"
|
||||
) *> headers.traverseFilter(check)
|
||||
) *> headers.traverse(check)
|
||||
|
||||
case None =>
|
||||
ctx.logger.debug("Not matching on subjects. No filter given") *> headers.pure[F]
|
||||
}
|
||||
|
||||
def filterMessageIds(headers: Vector[MailHeader]): F[Vector[MailHeader]] =
|
||||
NonEmptyList.fromFoldable(headers.flatMap(_.messageId)) match {
|
||||
def filterMessageIds[C](headers: Vector[MailHeaderItem]): F[Vector[MailHeaderItem]] =
|
||||
NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match {
|
||||
case Some(nl) =>
|
||||
for {
|
||||
archives <- ctx.store.transact(
|
||||
@ -207,12 +209,14 @@ object ScanMailboxTask {
|
||||
.findByMessageIdAndCollective(nl, ctx.args.account.collective)
|
||||
)
|
||||
existing = archives.flatMap(_.messageId).toSet
|
||||
mails = headers.filterNot(mh => mh.messageId.forall(existing.contains))
|
||||
_ <- headers.size - mails.size match {
|
||||
case 0 => ().pure[F]
|
||||
case n =>
|
||||
ctx.logger.info(s"Excluded $n mails since items for them already exist.")
|
||||
}
|
||||
mails <- headers
|
||||
.traverse(mh =>
|
||||
if (mh.process && mh.mh.messageId.forall(existing.contains))
|
||||
ctx.logger.debug(
|
||||
s"Excluding mail '${mh.mh.subject}' it has been imported in the past.'"
|
||||
) *> mh.skip.pure[F]
|
||||
else mh.pure[F]
|
||||
)
|
||||
} yield mails
|
||||
|
||||
case None =>
|
||||
@ -248,15 +252,21 @@ object ScanMailboxTask {
|
||||
def postHandle[C](a: Access[F, C])(mh: MailHeader): MailOp[F, C, Unit] =
|
||||
ctx.args.targetFolder match {
|
||||
case Some(tf) =>
|
||||
a.getOrCreateFolder(None, tf).flatMap(folder => a.moveMail(mh, folder))
|
||||
logOp(_.debug(s"Post handling mail: ${mh.subject} - moving to folder: $tf"))
|
||||
.flatMap(_ =>
|
||||
a.getOrCreateFolder(None, tf).flatMap(folder => a.moveMail(mh, folder))
|
||||
)
|
||||
case None if ctx.args.deleteMail =>
|
||||
a.deleteMail(mh).flatMapF { r =>
|
||||
if (r.count == 0)
|
||||
ctx.logger.warn(s"Mail '${mh.subject}' could not be deleted")
|
||||
else ().pure[F]
|
||||
}
|
||||
logOp(_.debug(s"Post handling mail: ${mh.subject} - deleting mail.")).flatMap(
|
||||
_ =>
|
||||
a.deleteMail(mh).flatMapF { r =>
|
||||
if (r.count == 0)
|
||||
ctx.logger.warn(s"Mail could not be deleted!")
|
||||
else ().pure[F]
|
||||
}
|
||||
)
|
||||
case None =>
|
||||
MailOp.pure(())
|
||||
logOp(_.debug(s"Post handling mail: ${mh.subject} - no handling defined!"))
|
||||
}
|
||||
|
||||
def submitMail(upload: OUpload[F], args: Args)(
|
||||
@ -292,13 +302,15 @@ object ScanMailboxTask {
|
||||
}
|
||||
|
||||
def handleOne[C](args: Args, a: Access[F, C], upload: OUpload[F])(
|
||||
mh: MailHeader
|
||||
mh: MailHeaderItem
|
||||
): MailOp[F, C, Unit] =
|
||||
for {
|
||||
mail <- a.loadMail(mh)
|
||||
mail <- a.loadMail(mh.mh)
|
||||
res <- mail match {
|
||||
case Some(m) =>
|
||||
case Some(m) if mh.process =>
|
||||
Kleisli.liftF(submitMail(upload, args)(m).attempt)
|
||||
case Some(_) =>
|
||||
Kleisli.liftF(Either.right(mh).pure[F])
|
||||
case None =>
|
||||
MailOp.pure[F, C, Either[Throwable, OUpload.UploadResult]](
|
||||
Either.left(new Exception(s"Mail not found"))
|
||||
@ -307,10 +319,15 @@ object ScanMailboxTask {
|
||||
_ <- res.fold(
|
||||
ex =>
|
||||
Kleisli.liftF(
|
||||
ctx.logger.warn(s"Error submitting '${mh.subject}': ${ex.getMessage}")
|
||||
ctx.logger.warn(s"Error submitting '${mh.mh.subject}': ${ex.getMessage}")
|
||||
),
|
||||
_ => postHandle(a)(mh)
|
||||
_ => postHandle(a)(mh.mh)
|
||||
)
|
||||
} yield ()
|
||||
}
|
||||
|
||||
case class MailHeaderItem(mh: MailHeader, process: Boolean = true) {
|
||||
def skip = copy(process = false)
|
||||
def notProcess = !process
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user