From a5d4ee6e6631ace34007274011da12db419e1a97 Mon Sep 17 00:00:00 2001 From: Maxence Lange Date: Fri, 25 May 2018 10:28:48 -0100 Subject: [PATCH] return accepted mime type --- lib/Service/TesseractService.php | 52 ++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/lib/Service/TesseractService.php b/lib/Service/TesseractService.php index 97b5ab1..9577b2f 100644 --- a/lib/Service/TesseractService.php +++ b/lib/Service/TesseractService.php @@ -29,9 +29,9 @@ use Exception; use OC\Files\View; -use OCA\Files_FullTextSearch\Exceptions\FileNotFoundException; use OCP\AppFramework\IAppContainer; use OCP\Files\File; +use OCP\Files\NotFoundException; use thiagoalessio\TesseractOCR\TesseractOCR; class TesseractService { @@ -58,18 +58,64 @@ public function __construct( } + /** + * @param string $mimeType + * @param string $extension + * + * @return bool + */ + public function parsedMimeType($mimeType, $extension) { + $ocrMimes = [ + 'image/png', + 'image/jpeg', + 'image/tiff', + 'image/vnd.djvu' + ]; + + foreach ($ocrMimes as $mime) { + if (strpos($mimeType, $mime) === 0) { + return true; + } + } + + if ($mimeType === 'application/octet-stream') { + return $this->parsedExtension($extension); + } + + return false; + } + + + /** + * @param string $extension + * + * @return bool + */ + private function parsedExtension($extension) { + $ocrExtensions = [ +// 'djvu' + ]; + + if (in_array($extension, $ocrExtensions)) { + return true; + } + + return false; + } + + /** * @param $file * * @return string - * @throws FileNotFoundException + * @throws NotFoundException */ public function ocrFile(File $file) { try { $path = $this->getAbsolutePath($file); } catch (Exception $e) { - throw new FileNotFoundException('file not found'); + throw new NotFoundException('file not found'); } $ocr = new TesseractOCR($path);