forked from jaymzcd/color-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimagescan.py
executable file
·240 lines (192 loc) · 8.15 KB
/
imagescan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#!/usr/bin/python
# -*- coding: utf-8 -*-
# A color analysis script to help you label your store's products with color data
# automagically. It will take either a single file or scour an entire folder for
# folders of images and do each one individually printing a summary of what it
# thinks is the correct color value. A work in progress...
#
# ~jaymz | @jaymzcampbell | jaymz.eu
#
#
# MIT Licesnsed for what its worth, copy: http://www.opensource.org/licenses/mit-license.php
import Image
import ImageFilter
import os
import glob
import sys
import colorsys
import re
from copy import copy
from operator import itemgetter
from decimal import Decimal
output = open('colors.csv', 'w')
# Pixels will be first compared to these values before being
# added to the data list of color information on the first pass
LBOUND = 0
UBOUND = 255
MIN_SATURATION = 30 # avoid washed out pixels influencing counts
# Base folder for the processFolder function, it'll iterate over here on subfolders
FOLDER = '/home/jaymz/documents/crooked-docs/data-export/store-migration/product-images/'
# Meh, i need to flip between these two, you can probably tweak this :)
SUMMARY_FORMAT, SQL_FORMAT = True, True
# Names based off: http://bluelobsterart.com/wordpress/wp-content/uploads/2009/03/rgb-color-wheel-lg.jpg
COLOR = ['RED', 'ORANGE', 'YELLOW',
'LIME', 'GREEN', 'TURQUOISE',
'CYAN', 'OCEAN', 'BLUE',
'VIOLET', 'MAGENTA', 'RASPBERRY',
]
TONE = ['DARK', '', 'BRIGHT']
# via the createColorSQL.py file , addition added in GRAY/BLACK/WHITE to after this
SQL_IDS = {'DARK YELLOW': 7, 'DARK ORANGE': 4, 'BRIGHT GREEN': 15, 'BRIGHT ORANGE': 6, 'DARK RED': 1, 'BRIGHT OCEAN': 24, 'BRIGHT RED': 3, 'DARK OCEAN': 22, 'YELLOW': 8, 'OCEAN': 23, 'BRIGHT YELLOW': 9, 'RASPBERRY': 35, 'GREEN': 14, 'BRIGHT TURQUOISE': 18, 'CYAN': 20, 'MAGENTA': 32, 'RED': 2, 'ORANGE': 5, 'BLUE': 26, 'TURQUOISE': 17, 'LIME': 11, 'BRIGHT LIME': 12, 'DARK MAGENTA': 31, 'DARK LIME': 10, 'BRIGHT MAGENTA': 33, 'BRIGHT VIOLET': 30, 'DARK VIOLET': 28, 'DARK BLUE': 25, 'BRIGHT BLUE': 27, 'VIOLET': 29, 'BRIGHT RASPBERRY': 36, 'DARK TURQUOISE': 16, 'DARK CYAN': 19, 'BRIGHT CYAN': 21, 'DARK GREEN': 13, 'DARK RASPBERRY': 34}
pcnt = 0
def trimFloat(val, places=2):
return float(repr(val)[0:places+2])
def withinBounds(allowance, _rgb):
rgb = copy(_rgb)
diff = 0
allowance = Decimal(repr(allowance))
for c in rgb:
for d in rgb:
dec_d = Decimal(repr(d)).quantize(allowance)
dec_c = Decimal(repr(c)).quantize(allowance)
diff = abs(dec_d-dec_c)
if (d != c) and diff>allowance:
return False
return True
def processImage(i, name=None):
""" Scales down the image, blurs it to ease the blending of the color values
and reduce spikes from anomolies. It then samples pixels creating a list of
colors. This list is then looped over to build counts which are placed into
bins of 30° hue's seperated into three based on their value. Pixels less than
a certain saturation are discarded. """
global pcnt
i = i.resize((200,200))
i = i.convert("RGB")
i = i.filter(ImageFilter.BLUR)
d = i.getdata()
cnt = 0
h = [] #holds the hsv info
grays = [] #holds just gray content
black_count = 0
white_count = 0
total_samples = 0
for p in d:
cnt = cnt + 1
if cnt == 8: #take every 4th pixel
if p[0]>LBOUND and p[1]>LBOUND and p[2]>LBOUND and p[0]<UBOUND and p[1]<UBOUND and p[2]<UBOUND:
r = trimFloat(float(p[0])/255)
g = trimFloat(float(p[1])/255)
b = trimFloat(float(p[2])/255)
if not withinBounds(0.02, (r,g,b)):
h.append(colorsys.rgb_to_hsv(r,g,b))
else:
if (r+g+b)/3>0.94:
white_count += 1
elif (r+g+b)/3<0.3:
black_count += 1
else:
grays.append(colorsys.rgb_to_hsv(r,g,b))
total_samples += 1
cnt = 0 #reset sample counter
h.sort()
grays.sort()
bin_width = 30 # size of hue slices (degress)
max_bin = 360
darks = [0] * int(max_bin/bin_width)
mids = [0] * int(max_bin/bin_width)
lites = [0] * int(max_bin/bin_width)
for p in h[::]:
hue = p[0]*360
sat = p[1]*100
val = p[2]*100
if sat >= MIN_SATURATION:
bin_number = ((int(hue)+15)/bin_width)%(max_bin/bin_width)
if val<33:
darks[bin_number] += 1
elif val>33 and val < 66:
mids[bin_number] += 1
else:
lites[bin_number] += 1
#print "HUE BIN: %s VALUE : %d" % (int(hue)/bin_width, int(hue))
c = 0
data = zip(darks, mids, lites)
if SUMMARY_FORMAT:
for x in data:
print '%d %s : %s %d°' % (c, COLOR[c], x, c*bin_width)
c += 1
# the following area needs a rework. the index technique works alright as long
# as counts and values dont all match up, then it starts picking the first one
# so this needs re-writing to better order the list data
darks_sort, mids_sort, lites_sort = darks[::], mids[::], lites[::]
darks_sort.sort()
mids_sort.sort()
lites_sort.sort()
sorted_counts = (darks_sort, mids_sort, lites_sort)
primary_idx = (darks.index(sorted_counts[0][-1]), mids.index(sorted_counts[1][-1]), lites.index(sorted_counts[2][-1]))
primary_cnts = (darks[primary_idx[0]], mids[primary_idx[1]], lites[primary_idx[2]])
tone = primary_cnts.index(max(primary_cnts))
max_hbin = primary_idx[tone]
pcnt += 1
if SUMMARY_FORMAT:
print "\nDominant Hue: %s %s" % (TONE[tone], COLOR[max_hbin])
if SQL_FORMAT and name and max(primary_cnts) > 30:
output.write('%d, %s, %s\n' % (pcnt, name, SQL_IDS[' '.join([TONE[tone], COLOR[max_hbin]]).strip()]))
sorted_counts[0][-1], sorted_counts[1][-1], sorted_counts[2][-1] = (0, 0, 0) # kind of reset the primary to null
for l in sorted_counts:
l.sort()
primary_idx = (darks.index(sorted_counts[0][-1]), mids.index(sorted_counts[1][-1]), lites.index(sorted_counts[2][-1]))
primary_cnts = (darks[primary_idx[0]], mids[primary_idx[1]], lites[primary_idx[2]])
tone = primary_cnts.index(max(primary_cnts))
max_hbin = primary_idx[tone]
if SUMMARY_FORMAT:
print "Secondary Hue: %s %s" % (TONE[tone], COLOR[max_hbin])
if SQL_FORMAT and name and max(primary_cnts) > 30:
pcnt += 1
output.write('%d, %s, %s\n' % (pcnt, name, SQL_IDS[' '.join([TONE[tone], COLOR[max_hbin]]).strip()]))
# area to rewrite ends...
gray_total = [(g[0]+g[1]+g[2])/3 for g in grays]
gray_average = reduce(lambda x,y : x+y, gray_total)/len(gray_total)
black_percent = black_count/float(total_samples)*100
gray_percent = len(gray_total)/float(total_samples)*100
white_percent = white_count/float(total_samples)*100
if SUMMARY_FORMAT:
print "\nAverage Gray: %s (samples: %0.1f%%), White count: %s (%0.1f%%), Black count: %s (%0.1f%%)" % (gray_average, gray_percent, white_count, white_percent, black_count, black_percent)
print "Total samples taken: %s\n\n" % total_samples
if SQL_FORMAT:
if black_percent > 10:
pcnt += 1
output.write('%d, %s, %d\n' % (pcnt, name, 38))
if gray_percent > 10:
pcnt += 1
output.write('%d, %s, %d\n' % (pcnt, name, 37))
if white_percent > 30:
pcnt += 1
output.write('%d, %s, %d\n' % (pcnt, name, 39))
# Helper functions follow along with __main__ def
def processFolder(folder):
for image_folder in glob.glob(folder+'*'):
try:
folder_images = []
for image in os.listdir(image_folder):
if "jpg" in image and "._" not in image:
folder_images.append(image)
folder_images.sort()
j = os.path.join(image_folder, folder_images[1])
if SUMMARY_FORMAT:
print "working: "+j
i = Image.open(j)
processImage(i, image_folder.split('/')[-1])
except:
pass
def processFile(_file):
i = Image.open(_file)
processImage(i)
if __name__ == "__main__":
try:
if 'product-images' not in sys.argv[1]:
processFile('product-images/'+sys.argv[1])
else:
processFile(sys.argv[1])
except IndexError:
processFolder(FOLDER)
output.close()