mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-04 12:30:12 +00:00 
			
		
		
		
	Fix looping over already seen mails when they are skipped
When skipping mails due to a filter, it must still enter the post-handling step. Otherwise it will be seen again on next run. Issue: #551
This commit is contained in:
		@@ -135,6 +135,9 @@ object ScanMailboxTask {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) {
 | 
					  final private class Impl[F[_]: Sync](cfg: Config.ScanMailbox, ctx: Context[F, Args]) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    private def logOp[C](f: Logger[F] => F[Unit]): MailOp[F, C, Unit] =
 | 
				
			||||||
 | 
					      MailOp(_ => f(ctx.logger))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handleFolder[C](a: Access[F, C], upload: OUpload[F])(
 | 
					    def handleFolder[C](a: Access[F, C], upload: OUpload[F])(
 | 
				
			||||||
        name: String
 | 
					        name: String
 | 
				
			||||||
    ): MailOp[F, C, ScanResult] =
 | 
					    ): MailOp[F, C, ScanResult] =
 | 
				
			||||||
@@ -142,7 +145,10 @@ object ScanMailboxTask {
 | 
				
			|||||||
        _      <- Kleisli.liftF(ctx.logger.info(s"Processing folder $name"))
 | 
					        _      <- Kleisli.liftF(ctx.logger.info(s"Processing folder $name"))
 | 
				
			||||||
        folder <- requireFolder(a)(name)
 | 
					        folder <- requireFolder(a)(name)
 | 
				
			||||||
        search <- searchMails(a)(folder)
 | 
					        search <- searchMails(a)(folder)
 | 
				
			||||||
        headers <- Kleisli.liftF(filterSubjects(search.mails).flatMap(filterMessageIds))
 | 
					        items = search.mails.map(MailHeaderItem(_))
 | 
				
			||||||
 | 
					        headers <- Kleisli.liftF(
 | 
				
			||||||
 | 
					          filterSubjects(items).flatMap(filterMessageIds)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
        _ <- headers.traverse(handleOne(ctx.args, a, upload))
 | 
					        _ <- headers.traverse(handleOne(ctx.args, a, upload))
 | 
				
			||||||
      } yield ScanResult(name, search.mails.size, search.count - search.mails.size)
 | 
					      } yield ScanResult(name, search.mails.size, search.count - search.mails.size)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -163,43 +169,39 @@ object ScanMailboxTask {
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      for {
 | 
					      for {
 | 
				
			||||||
        _ <- Kleisli.liftF(
 | 
					        _ <- logOp(
 | 
				
			||||||
          ctx.logger.debug(
 | 
					          _.debug(s"Searching next ${cfg.mailBatchSize} mails in ${folder.name}.")
 | 
				
			||||||
            s"Searching next ${cfg.mailBatchSize} mails in ${folder.name}."
 | 
					 | 
				
			||||||
          )
 | 
					 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        query <- Kleisli.liftF(q)
 | 
					        query <- Kleisli.liftF(q)
 | 
				
			||||||
        mails <- a.search(folder, cfg.mailBatchSize)(query)
 | 
					        mails <- a.search(folder, cfg.mailBatchSize)(query)
 | 
				
			||||||
        _ <- Kleisli.liftF(
 | 
					        _ <- logOp(
 | 
				
			||||||
          ctx.logger.debug(
 | 
					          _.debug(
 | 
				
			||||||
            s"Found ${mails.count} mails in folder. Reading first ${mails.mails.size}"
 | 
					            s"Found ${mails.count} mails in folder. Reading first ${mails.mails.size}"
 | 
				
			||||||
          )
 | 
					          )
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
      } yield mails
 | 
					      } yield mails
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def filterSubjects(headers: Vector[MailHeader]): F[Vector[MailHeader]] =
 | 
					    def filterSubjects(headers: Vector[MailHeaderItem]): F[Vector[MailHeaderItem]] =
 | 
				
			||||||
      ctx.args.subjectFilter match {
 | 
					      ctx.args.subjectFilter match {
 | 
				
			||||||
        case Some(sf) =>
 | 
					        case Some(sf) =>
 | 
				
			||||||
          def check(mh: MailHeader): F[Option[MailHeader]] =
 | 
					          def check(mh: MailHeaderItem): F[MailHeaderItem] =
 | 
				
			||||||
            if (sf.matches(caseSensitive = false)(mh.subject))
 | 
					            if (mh.notProcess || sf.matches(caseSensitive = false)(mh.mh.subject))
 | 
				
			||||||
              ctx.logger.debug(
 | 
					              mh.pure[F]
 | 
				
			||||||
                s"Including mail '${mh.subject}', it matches the filter."
 | 
					 | 
				
			||||||
              ) *> Option(mh).pure[F]
 | 
					 | 
				
			||||||
            else
 | 
					            else
 | 
				
			||||||
              ctx.logger.debug(
 | 
					              ctx.logger.debug(
 | 
				
			||||||
                s"Excluding mail '${mh.subject}', it doesn't match the filter."
 | 
					                s"Excluding mail '${mh.mh.subject}', it doesn't match the subject filter."
 | 
				
			||||||
              ) *> (None: Option[MailHeader]).pure[F]
 | 
					              ) *> mh.skip.pure[F]
 | 
				
			||||||
          ctx.logger.info(
 | 
					          ctx.logger.info(
 | 
				
			||||||
            s"Filtering mails on subject using filter: ${sf.asString}"
 | 
					            s"Filtering mails on subject using filter: ${sf.asString}"
 | 
				
			||||||
          ) *> headers.traverseFilter(check)
 | 
					          ) *> headers.traverse(check)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        case None =>
 | 
					        case None =>
 | 
				
			||||||
          ctx.logger.debug("Not matching on subjects. No filter given") *> headers.pure[F]
 | 
					          ctx.logger.debug("Not matching on subjects. No filter given") *> headers.pure[F]
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def filterMessageIds(headers: Vector[MailHeader]): F[Vector[MailHeader]] =
 | 
					    def filterMessageIds[C](headers: Vector[MailHeaderItem]): F[Vector[MailHeaderItem]] =
 | 
				
			||||||
      NonEmptyList.fromFoldable(headers.flatMap(_.messageId)) match {
 | 
					      NonEmptyList.fromFoldable(headers.flatMap(_.mh.messageId)) match {
 | 
				
			||||||
        case Some(nl) =>
 | 
					        case Some(nl) =>
 | 
				
			||||||
          for {
 | 
					          for {
 | 
				
			||||||
            archives <- ctx.store.transact(
 | 
					            archives <- ctx.store.transact(
 | 
				
			||||||
@@ -207,12 +209,14 @@ object ScanMailboxTask {
 | 
				
			|||||||
                .findByMessageIdAndCollective(nl, ctx.args.account.collective)
 | 
					                .findByMessageIdAndCollective(nl, ctx.args.account.collective)
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            existing = archives.flatMap(_.messageId).toSet
 | 
					            existing = archives.flatMap(_.messageId).toSet
 | 
				
			||||||
            mails    = headers.filterNot(mh => mh.messageId.forall(existing.contains))
 | 
					            mails <- headers
 | 
				
			||||||
            _ <- headers.size - mails.size match {
 | 
					              .traverse(mh =>
 | 
				
			||||||
              case 0 => ().pure[F]
 | 
					                if (mh.process && mh.mh.messageId.forall(existing.contains))
 | 
				
			||||||
              case n =>
 | 
					                  ctx.logger.debug(
 | 
				
			||||||
                ctx.logger.info(s"Excluded $n mails since items for them already exist.")
 | 
					                    s"Excluding mail '${mh.mh.subject}' it has been imported in the past.'"
 | 
				
			||||||
            }
 | 
					                  ) *> mh.skip.pure[F]
 | 
				
			||||||
 | 
					                else mh.pure[F]
 | 
				
			||||||
 | 
					              )
 | 
				
			||||||
          } yield mails
 | 
					          } yield mails
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        case None =>
 | 
					        case None =>
 | 
				
			||||||
@@ -248,15 +252,21 @@ object ScanMailboxTask {
 | 
				
			|||||||
    def postHandle[C](a: Access[F, C])(mh: MailHeader): MailOp[F, C, Unit] =
 | 
					    def postHandle[C](a: Access[F, C])(mh: MailHeader): MailOp[F, C, Unit] =
 | 
				
			||||||
      ctx.args.targetFolder match {
 | 
					      ctx.args.targetFolder match {
 | 
				
			||||||
        case Some(tf) =>
 | 
					        case Some(tf) =>
 | 
				
			||||||
 | 
					          logOp(_.debug(s"Post handling mail: ${mh.subject} - moving to folder: $tf"))
 | 
				
			||||||
 | 
					            .flatMap(_ =>
 | 
				
			||||||
              a.getOrCreateFolder(None, tf).flatMap(folder => a.moveMail(mh, folder))
 | 
					              a.getOrCreateFolder(None, tf).flatMap(folder => a.moveMail(mh, folder))
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
        case None if ctx.args.deleteMail =>
 | 
					        case None if ctx.args.deleteMail =>
 | 
				
			||||||
 | 
					          logOp(_.debug(s"Post handling mail: ${mh.subject} - deleting mail.")).flatMap(
 | 
				
			||||||
 | 
					            _ =>
 | 
				
			||||||
              a.deleteMail(mh).flatMapF { r =>
 | 
					              a.deleteMail(mh).flatMapF { r =>
 | 
				
			||||||
                if (r.count == 0)
 | 
					                if (r.count == 0)
 | 
				
			||||||
              ctx.logger.warn(s"Mail '${mh.subject}' could not be deleted")
 | 
					                  ctx.logger.warn(s"Mail could not be deleted!")
 | 
				
			||||||
                else ().pure[F]
 | 
					                else ().pure[F]
 | 
				
			||||||
              }
 | 
					              }
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
        case None =>
 | 
					        case None =>
 | 
				
			||||||
          MailOp.pure(())
 | 
					          logOp(_.debug(s"Post handling mail: ${mh.subject} - no handling defined!"))
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def submitMail(upload: OUpload[F], args: Args)(
 | 
					    def submitMail(upload: OUpload[F], args: Args)(
 | 
				
			||||||
@@ -292,13 +302,15 @@ object ScanMailboxTask {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handleOne[C](args: Args, a: Access[F, C], upload: OUpload[F])(
 | 
					    def handleOne[C](args: Args, a: Access[F, C], upload: OUpload[F])(
 | 
				
			||||||
        mh: MailHeader
 | 
					        mh: MailHeaderItem
 | 
				
			||||||
    ): MailOp[F, C, Unit] =
 | 
					    ): MailOp[F, C, Unit] =
 | 
				
			||||||
      for {
 | 
					      for {
 | 
				
			||||||
        mail <- a.loadMail(mh)
 | 
					        mail <- a.loadMail(mh.mh)
 | 
				
			||||||
        res <- mail match {
 | 
					        res <- mail match {
 | 
				
			||||||
          case Some(m) =>
 | 
					          case Some(m) if mh.process =>
 | 
				
			||||||
            Kleisli.liftF(submitMail(upload, args)(m).attempt)
 | 
					            Kleisli.liftF(submitMail(upload, args)(m).attempt)
 | 
				
			||||||
 | 
					          case Some(_) =>
 | 
				
			||||||
 | 
					            Kleisli.liftF(Either.right(mh).pure[F])
 | 
				
			||||||
          case None =>
 | 
					          case None =>
 | 
				
			||||||
            MailOp.pure[F, C, Either[Throwable, OUpload.UploadResult]](
 | 
					            MailOp.pure[F, C, Either[Throwable, OUpload.UploadResult]](
 | 
				
			||||||
              Either.left(new Exception(s"Mail not found"))
 | 
					              Either.left(new Exception(s"Mail not found"))
 | 
				
			||||||
@@ -307,10 +319,15 @@ object ScanMailboxTask {
 | 
				
			|||||||
        _ <- res.fold(
 | 
					        _ <- res.fold(
 | 
				
			||||||
          ex =>
 | 
					          ex =>
 | 
				
			||||||
            Kleisli.liftF(
 | 
					            Kleisli.liftF(
 | 
				
			||||||
              ctx.logger.warn(s"Error submitting '${mh.subject}': ${ex.getMessage}")
 | 
					              ctx.logger.warn(s"Error submitting '${mh.mh.subject}': ${ex.getMessage}")
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
          _ => postHandle(a)(mh)
 | 
					          _ => postHandle(a)(mh.mh)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
      } yield ()
 | 
					      } yield ()
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  case class MailHeaderItem(mh: MailHeader, process: Boolean = true) {
 | 
				
			||||||
 | 
					    def skip       = copy(process = false)
 | 
				
			||||||
 | 
					    def notProcess = !process
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user