Download multiple files as zip

This commit is contained in:
eikek
2022-04-09 14:01:36 +02:00
parent e65b8de686
commit 4488291319
55 changed files with 2328 additions and 38 deletions

View File

@ -187,6 +187,17 @@ docspell.joex {
delete-batch = "100"
}
# Zip files created for downloading multiple files are cached and
# can be cleared periodically.
cleanup-downloads = {
# Whether to enable clearing old download archives.
enabled = true
# The minimum age of a download file to be deleted.
older-than = "14 days"
}
# Removes node entries that are not reachable anymore.
check-nodes {
# Whether this task is enabled

View File

@ -11,12 +11,14 @@ import cats.effect.{Async, Resource}
import docspell.analysis.TextAnalyser
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._
import docspell.backend.task.DownloadZipArgs
import docspell.common._
import docspell.config.FtsType
import docspell.ftsclient.FtsClient
import docspell.ftspsql.PsqlFtsClient
import docspell.ftssolr.SolrFtsClient
import docspell.joex.analysis.RegexNerFile
import docspell.joex.download.DownloadZipTask
import docspell.joex.emptytrash.EmptyTrashTask
import docspell.joex.filecopy.{FileCopyTask, FileIntegrityCheckTask}
import docspell.joex.fts.{MigrationTask, ReIndexTask}
@ -54,8 +56,11 @@ final class JoexTasks[F[_]: Async](
upload: OUpload[F],
createIndex: CreateIndex[F],
joex: OJoex[F],
jobs: OJob[F],
itemSearch: OItemSearch[F]
) {
val downloadAll: ODownloadAll[F] =
ODownloadAll(store, jobs, jobStoreModule.jobs)
def get: JobTaskRegistry[F] =
JobTaskRegistry
@ -105,7 +110,7 @@ final class JoexTasks[F[_]: Async](
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
HouseKeepingTask[F](cfg, store, fileRepo),
HouseKeepingTask[F](cfg, store, fileRepo, downloadAll),
HouseKeepingTask.onCancel[F]
)
)
@ -207,6 +212,17 @@ final class JoexTasks[F[_]: Async](
FileIntegrityCheckTask.onCancel[F]
)
)
.withTask(
JobTask.json(
DownloadZipArgs.taskName,
DownloadZipTask[F](
cfg.files.chunkSize,
store,
ODownloadAll(store, jobs, jobStoreModule.jobs)
),
DownloadZipTask.onCancel[F]
)
)
}
object JoexTasks {
@ -233,6 +249,7 @@ object JoexTasks {
updateCheck <- UpdateCheck.resource(httpClient)
notification <- ONotification(store, notificationModule)
fileRepo <- OFileRepository(store, jobStoreModule.jobs)
jobs <- OJob(store, joex, pubSub)
} yield new JoexTasks[F](
cfg,
store,
@ -248,6 +265,7 @@ object JoexTasks {
upload,
createIndex,
joex,
jobs,
itemSearchOps
)

View File

@ -0,0 +1,101 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.download
import java.time.format.DateTimeFormatter
import cats.effect._
import cats.syntax.all._
import fs2.{Pipe, Stream}
import docspell.backend.ops.ODownloadAll
import docspell.backend.ops.ODownloadAll.model.DownloadSummary
import docspell.backend.task.DownloadZipArgs
import docspell.common._
import docspell.files.Zip
import docspell.scheduler.Task
import docspell.store.Store
import docspell.store.queries.{ItemFileMeta, QItem}
import docspell.store.records.RDownloadQuery
object DownloadZipTask {
type Args = DownloadZipArgs
def onCancel[F[_]]: Task[F, Args, Unit] =
Task.log(_.warn(s"Cancelling ${DownloadZipArgs.taskName.id} task"))
def apply[F[_]: Async](
chunkSize: Int,
store: Store[F],
downloadOps: ODownloadAll[F]
): Task[F, Args, Result] =
Task { ctx =>
val req = ctx.args.req
val query = req.toQuery(ctx.args.accountId)
val allFiles =
Stream
.eval(Timestamp.current[F])
.flatMap(now =>
store.transact(
QItem
.findFilesDetailed(query, req.fileType, now.toUtcDate, req.maxFiles, 50)
)
)
.through(makeEntries(store))
val storeZipFile =
allFiles
.through(Zip.zip(ctx.logger, chunkSize))
.through(
store.fileRepo.save(
ctx.args.accountId.collective,
FileCategory.DownloadAll,
MimeTypeHint.advertised("application/zip")
)
)
for {
_ <- ctx.logger.info(s"Start zipping ${req.itemQueryString}")
summary <- downloadOps.getSummary(ctx.args.accountId, req)
_ <- ctx.logger.debug(s"Summary: $summary")
file <- storeZipFile.compile.lastOrError
row <- createRow(summary, ctx.args.accountId.collective, file)
_ <- ctx.logger.debug(s"Inserting zip file: $row")
_ <- store.transact(RDownloadQuery.insert(row))
} yield Result(summary.fileCount)
}
def makeEntries[F[_]](
store: Store[F]
): Pipe[F, ItemFileMeta, (String, Stream[F, Byte])] =
_.map { itemFile =>
val name = itemFile.fileName.getOrElse(itemFile.name)
val month =
DateTimeFormatter
.ofPattern("YYYY-MM")
.format(itemFile.date.toUtcDate)
val entry = itemFile.corrOrg
.map(_.name)
.orElse(itemFile.corrPerson.map(_.name))
.map(_.replace('/', '_'))
.map(folder => s"$month/$folder/$name")
.getOrElse(s"$month/$name")
val bytes = store.fileRepo.getBytes(itemFile.fileMeta.id)
(entry, bytes)
}
def createRow[F[_]: Sync](
summary: DownloadSummary,
cid: Ident,
file: FileKey
): F[RDownloadQuery] =
Timestamp.current[F].map { now =>
RDownloadQuery(summary.id, cid, file, summary.fileCount, now, None, 0)
}
}

View File

@ -0,0 +1,24 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.download
import docspell.scheduler.JobTaskResultEncoder
import io.circe.Encoder
import io.circe.generic.semiauto.deriveEncoder
final case class Result(fileCount: Int) {}
object Result {
implicit val jsonEncoder: Encoder[Result] =
deriveEncoder
implicit val jobTaskResultEncoder: JobTaskResultEncoder[Result] =
JobTaskResultEncoder.fromJson[Result].withMessage { result =>
s"Zipped ${result.fileCount} files."
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright 2020 Eike K. & Contributors
*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
package docspell.joex.hk
import cats.effect._
import cats.syntax.all._
import docspell.backend.ops.ODownloadAll
import docspell.common._
import docspell.scheduler._
object CleanupDownloadsTask {
def apply[F[_]: Sync](
cfg: HouseKeepingConfig.CleanupDownloads,
ops: ODownloadAll[F]
): Task[F, Unit, CleanupResult] =
Task { ctx =>
if (cfg.enabled)
for {
now <- Timestamp.current[F]
ts = now - cfg.olderThan
_ <- ctx.logger.info(s"Cleanup downloads older than $ts")
n <- ops.deleteOlderThan(ts)
_ <- ctx.logger.info(s"Removed $n download archives")
} yield CleanupResult.of(n)
else
ctx.logger.info("CleanupDownloads task is disabled in the configuration") *>
CleanupResult.disabled.pure[F]
}
}

View File

@ -16,6 +16,7 @@ case class HouseKeepingConfig(
cleanupInvites: CleanupInvites,
cleanupJobs: CleanupJobs,
cleanupRememberMe: CleanupRememberMe,
cleanupDownloads: CleanupDownloads,
checkNodes: CheckNodes,
integrityCheck: IntegrityCheck
)
@ -26,6 +27,8 @@ object HouseKeepingConfig {
case class CleanupJobs(enabled: Boolean, olderThan: Duration, deleteBatch: Int)
case class CleanupDownloads(enabled: Boolean, olderThan: Duration)
case class CleanupRememberMe(enabled: Boolean, olderThan: Duration)
case class CheckNodes(enabled: Boolean, minNotFound: Int)

View File

@ -9,7 +9,7 @@ package docspell.joex.hk
import cats.effect._
import cats.implicits._
import docspell.backend.ops.OFileRepository
import docspell.backend.ops.{ODownloadAll, OFileRepository}
import docspell.common._
import docspell.joex.Config
import docspell.joex.filecopy.FileIntegrityCheckTask
@ -29,7 +29,8 @@ object HouseKeepingTask {
def apply[F[_]: Async](
cfg: Config,
store: Store[F],
fileRepo: OFileRepository[F]
fileRepo: OFileRepository[F],
downloadAll: ODownloadAll[F]
): Task[F, Unit, Result] = {
val combined =
(
@ -37,6 +38,7 @@ object HouseKeepingTask {
CleanupInvitesTask(cfg.houseKeeping.cleanupInvites, store),
CleanupJobsTask(cfg.houseKeeping.cleanupJobs, store),
CleanupRememberMeTask(cfg.houseKeeping.cleanupRememberMe, store),
CleanupDownloadsTask(cfg.houseKeeping.cleanupDownloads, downloadAll),
IntegrityCheckTask(cfg.houseKeeping.integrityCheck, store, fileRepo)
).mapN(Result.apply)
@ -63,6 +65,7 @@ object HouseKeepingTask {
cleanupInvites: CleanupResult,
cleanupJobs: CleanupResult,
cleanupRememberMe: CleanupResult,
cleanupDownloads: CleanupResult,
integrityCheck: FileIntegrityCheckTask.Result
)
@ -76,6 +79,7 @@ object HouseKeepingTask {
s"- Invites removed: ${r.cleanupInvites.asString}\n" +
s"- Jobs removed: ${r.cleanupJobs.asString}\n" +
s"- RememberMe removed: ${r.cleanupRememberMe.asString}\n" +
s"- Downloads remove: ${r.cleanupDownloads.asString}\n" +
s"- Integrity check: ok=${r.integrityCheck.ok}, failed=${r.integrityCheck.failedKeys.size}, notFound=${r.integrityCheck.notFoundKeys.size}"
}