-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathscr
executable file
·83 lines (65 loc) · 2.06 KB
/
scr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/bin/bash
# Screenshot OCR tool
# requires: imagemagick, tesseract-ocr
# recommended to be installed with incron
# e.g. /home/iain/working/screenshots IN_CLOSE_WRITE /home/iain/bin/scr ocr $@/$#
op=$1
defaultfolder="${HOME}/working/screenshots"
convertpattern='s/\/\./\//g; s/\.txt\(:\|$\)/\1/'
if [ -n "$3" ]; then
folder=$3
else
folder=$defaultfolder
fi
case $op in
ocr)
# OCR an image
# text saved in hidden file next to image
fn=$2
txt=$(dirname "$fn")/.$(basename "$fn")
echo "Doing OCR on $fn..."
if [ -f "$txt.txt" ]; then
echo "Already got a txt file"
exit 1
fi
fnnocase=$(echo "$fn" | tr A-Z a-z)
if [[ ! $fnnocase =~ \.(png|jpg)$ ]] || [[ $fn =~ \.lg\.png$ ]]; then
echo "Not a screenshot"
exit 1
fi
# Increase size to improve OCR accuracy
convert -limit memory 8000MB -modulate 100,0 -resize 400% "$fn" "$fn.lg.png"
# OCR to text file
tesseract "$fn.lg.png" "$txt"
# Write text to file metadata
exiftool -overwrite_original -comment="$(cat "$txt.txt")" "$fn"
exiftool -overwrite_original -Caption-Abstract="$(cat "$txt.txt")" "$fn"
# Delete large version
rm "$fn.lg.png"
;;
ocrall|oa)
# OCR all images in the given dir
ls "$folder" | while read f; do $0 ocr "$folder/$f"; done
;;
search|s)
# Find txt in a screenshot dir
search=$2
grep -d skip -i "$search" "$folder"/.* | sed "$convertpattern" 2>/dev/null
;;
open|o)
# Open each matching image
$0 search "$2" "$folder" | cut -d: -f 1 | sort -u | while read f; do xdg-open "$f"; done
;;
cat|c)
# Dump out all text for matching images
grep -l -d skip -i "$2" "$folder"/.* | sort -u | while read f; do echo $f | sed "$convertpattern"; cat $f; done 2>/dev/null
;;
figure|fig|f)
# Output base64 encoded HTML figures of each image matching
$0 search "$2" "$folder" | cut -d: -f 1 | sort -u | while read f;
do
mimetype=$(file -bN --mime-type "$f")
content=$(base64 -w0 < "$f")
echo "\n<figure><img src=\"data:$mimetype;base64,$content\" width=\"100%\" alt=\"\" />\n<figcaption>: </figcaption>\n</figure>\n"
done
esac