eikek 
							
						 
					 
					
						
						
							
						
						9013f2de5b 
					 
					
						
						
							
							Update scalafmt settings  
						
						
						
						
					 
					
						2021-09-22 17:23:24 +02:00 
						 
				 
			
				
					
						
							
							
								eikek 
							
						 
					 
					
						
						
							
						
						9785db0683 
					 
					
						
						
							
							Change license header of all files  
						
						
						
						
					 
					
						2021-09-21 22:35:38 +02:00 
						 
				 
			
				
					
						
							
							
								Scala Steward 
							
						 
					 
					
						
						
							
						
						e4fecefaea 
					 
					
						
						
							
							Reformat with scalafmt 3.0.0  
						
						
						
						
					 
					
						2021-08-19 08:50:30 +02:00 
						 
				 
			
				
					
						
							
							
								eikek 
							
						 
					 
					
						
						
							
						
						1901fe1a8c 
					 
					
						
						
							
							Adopt deprecated APIs from fs2; use fs2.Path  
						
						
						
						
					 
					
						2021-08-07 17:51:56 +02:00 
						 
				 
			
				
					
						
							
							
								Scala Steward 
							
						 
					 
					
						
						
							
						
						558007235b 
					 
					
						
						
							
							Update tika-core to 2.0.0  
						
						... 
						
						
						
						Include new ODF parser from tika-2.0.0 
						
						
					 
					
						2021-07-25 13:08:18 +02:00 
						 
				 
			
				
					
						
							
							
								eikek 
							
						 
					 
					
						
						
							
						
						8e5c88fd32 
					 
					
						
						
							
							Add copyright header to source files  
						
						
						
						
					 
					
						2021-07-04 10:57:53 +02:00 
						 
				 
			
				
					
						
							
							
								eikek 
							
						 
					 
					
						
						
							
						
						02b8078f01 
					 
					
						
						
							
							Use fs2 Files api  
						
						
						
						
					 
					
						2021-06-22 23:17:32 +02:00 
						 
				 
			
				
					
						
							
							
								eikek 
							
						 
					 
					
						
						
							
						
						bd791b4593 
					 
					
						
						
							
							Upgrade code base to CE3  
						
						
						
						
					 
					
						2021-06-22 22:53:34 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						e1bbc2edf5 
					 
					
						
						
							
							Apply autoformat  
						
						
						
						
					 
					
						2021-04-10 16:31:58 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						8c6ad8fc4e 
					 
					
						
						
							
							This test only doesn't work on my ci  
						
						
						
						
					 
					
						2021-03-13 16:57:08 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						6a63694a3e 
					 
					
						
						
							
							Convert unit tests to munit  
						
						
						
						
					 
					
						2021-03-10 19:48:56 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						cfa36a5270 
					 
					
						
						
							
							Fix preview png tests  
						
						... 
						
						
						
						Outcome was checked manually. 
						
						
					 
					
						2021-03-01 00:33:57 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						a77f34b7ba 
					 
					
						
						
							
							Add a processing step to retrieve page counts  
						
						
						
						
					 
					
						2020-11-09 11:08:24 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						f4e50c5229 
					 
					
						
						
							
							Provide endpoints to submit tasks to re-generate previews  
						
						... 
						
						
						
						The scaling factor can be given in the config file. When this changes,
images can be regenerated via POSTing to certain endpoints. It is
possible to regenerate just one attachment preview or all within a
collective. 
						
						
					 
					
						2020-11-09 09:00:02 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						350a271b22 
					 
					
						
						
							
							Add simple pdf page preview function  
						
						
						
						
					 
					
						2020-11-08 01:25:14 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						c658677032 
					 
					
						
						
							
							Autoformat  
						
						
						
						
					 
					
						2020-09-09 00:29:32 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						cec4948710 
					 
					
						
						
							
							Add pdf meta data to extracted text to add it to full-text index  
						
						
						
						
					 
					
						2020-07-19 01:07:49 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						209c068436 
					 
					
						
						
							
							Use keywords in pdfs to search for existing tags  
						
						... 
						
						
						
						During processing, keywords stored in PDF metadata are used to look
them up in the tag database and associate any existing tags to the
item.
See #175  
						
						
					 
					
						2020-07-19 00:28:04 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						da68405f9b 
					 
					
						
						
							
							Extract meta data from pdfs using pdfbox  
						
						
						
						
					 
					
						2020-07-18 23:04:46 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						347a029af8 
					 
					
						
						
							
							Scalafix organize-imports  
						
						
						
						
					 
					
						2020-06-28 21:20:47 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						2e88207ff1 
					 
					
						
						
							
							Post process all extracted text  
						
						... 
						
						
						
						Removes 0 bytes and leading/trailing whitespace 
						
						
					 
					
						2020-05-25 13:56:06 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						ee394eae86 
					 
					
						
						
							
							Try streamline the different impls for MimeType  
						
						
						
						
					 
					
						2020-05-25 09:24:24 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						c41cdeefec 
					 
					
						
						
							
							Update scalafmt to 2.5.1 + scalafmtAll  
						
						
						
						
					 
					
						2020-05-04 23:53:57 +02:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						9656ba62f4 
					 
					
						
						
							
							scalafmtAll  
						
						
						
						
					 
					
						2020-03-26 18:26:00 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						cf7ccd572c 
					 
					
						
						
							
							Improve handling encodings  
						
						... 
						
						
						
						Html and text files are not fixed to be UTF-8. The encoding is now
detected, which may not work for all files. Default/fallback will be
utf-8.
There is still a problem with mails that contain html parts not in
utf8 encoding. The mail text is always returned as a string and the
original encoding is lost. Then the html is stored using utf-8 bytes,
but wkhtmltopdf reads it using latin1. It seems that the `--encoding`
setting doesn't override encoding provided by the document. 
						
						
					 
					
						2020-03-23 22:51:28 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						2f87065b2e 
					 
					
						
						
							
							sbt scalafmtAll  
						
						
						
						
					 
					
						2020-02-25 20:55:00 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						97305d27ff 
					 
					
						
						
							
							Integrate support for more files into processing and upload  
						
						... 
						
						
						
						The restriction that only pdf files can be uploaded is removed. All
files can now be uploaded. The processing may not process all. It is
still possible to restrict file uploads by types via a configuration. 
						
						
					 
					
						2020-02-19 23:27:00 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						9b1349734e 
					 
					
						
						
							
							Convert some files to pdf  
						
						
						
						
					 
					
						2020-02-19 02:03:10 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						5869e2ee6e 
					 
					
						
						
							
							Streamline extern-conv stdin/infile  
						
						
						
						
					 
					
						2020-02-18 12:43:47 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						0dcc00836b 
					 
					
						
						
							
							Make logger configurable in system commands  
						
						
						
						
					 
					
						2020-02-18 12:02:43 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						e0682464b5 
					 
					
						
						
							
							Configure pdf extraction; move Logger and DataType to common  
						
						
						
						
					 
					
						2020-02-17 14:01:36 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						3d615181e0 
					 
					
						
						
							
							Early draft for text extraction  
						
						
						
						
					 
					
						2020-02-17 01:57:22 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						8143a4edcc 
					 
					
						
						
							
							Adding extraction primitives  
						
						
						
						
					 
					
						2020-02-16 21:37:26 +01:00 
						 
				 
			
				
					
						
							
							
								Eike Kettner 
							
						 
					 
					
						
						
							
						
						851ee7ef0f 
					 
					
						
						
							
							Reorganize processing code  
						
						... 
						
						
						
						Use separate modules for
- text extraction
- conversion to pdf
- text analysis 
						
						
					 
					
						2020-02-15 21:25:25 +01:00