111 lines
4.1 KiB

# data sources: CSS docs, jennyscrayoncollection 2017 article on Crayola colors, XKCD color survey
# target: Single C++ header file to provide a struct of color RGB codes and names.
# This file pre-computes the nearest neighbor of every color.
# if an RGB code being searched for is closer than the nearest neighbor, it's the closest color name.
def hex_to_rgb(txt):
if '#' in txt: txt = txt.replace('#', '')
r = txt[0:2]
g = txt[2:4]
b = txt[4:6]
return tuple([int(s, 16) for s in (r,g,b)])
class palette:
def __init__(self, name, filename, priority): = name
self.priority = priority
with open(filename, "r") as f:
print(f"scanning {filename}")
self.colors = {}
for line in'\n'):
if len(line.split('\t')) < 2: continue
name, code = line.split('\t')
#print(name, code)
self.colors[name] = hex_to_rgb(code)
def __repr__(self):
return f"<Palette '{}' - {len(self.colors)} colors, priority = {self.priority}>"
palettes = [
#palette("jenny", "jenny_colors.txt", 3), # I should probably use wikipedia as a source for copyright reasons
palette("crayon", "wikicrayons_colors.txt", 2),
palette("xkcd", "xkcd_colors.txt", 1),
palette("css", "css_colors.txt", 0),
#palette("matplotlib", "matplotlib_colors.txt", 2) # there's like 10 colors total, I think we'll survive without them
all_colors = []
from math import sqrt
def rgbdist(c1, c2):
return sqrt((c1.r - c2.r)**2 + (c1.g - c2.g)**2 + (c1.b - c2.b)**2)
class Color:
def __init__(self, r, g, b, name, prefix, priority):
self.r = r
self.g = g
self.b = b = name
self.prefix = prefix
self.priority = priority
self.nearest_neighbor = None
def __repr__(self):
return f"<Color ({self.r}, {self.g}, {self.b}) - '{self.prefix}:{}', priority = {self.priority}, nearest_neighbor={ if self.nearest_neighbor is not None else None}>"
def nn(self, colors):
nearest = None
nearest_dist = 999999
for c in colors:
dist = rgbdist(self, c)
if dist == 0: continue
if dist < nearest_dist:
nearest = c
nearest_dist = dist
self.nearest_neighbor = nearest
for p in palettes:
prefix =
priority = p.priority
for name, rgb in p.colors.items():
all_colors.append(Color(*rgb, name, prefix, priority))
for c in all_colors:
smallest_dist = 9999999999999
largest_dist = 0
for c in all_colors:
dist = rgbdist(c, c.nearest_neighbor)
if dist > largest_dist: largest_dist = dist
if dist < smallest_dist: smallest_dist = dist
#print(f"{c.prefix}:{} -> {c.nearest_neighbor.prefix}:{}\t{rgbdist(c, c.nearest_neighbor):.2f}")
# questions -
# are there any colors where their nearest neighbor's nearest neighbor isn't them? (There should be)
nonnear_pairs = 0
for c in all_colors:
neighbor = c.nearest_neighbor
their_neighbor = neighbor.nearest_neighbor
if c is not their_neighbor:
#print(f"{c.prefix}:{} -> {neighbor.prefix}:{} -> {their_neighbor.prefix}:{}")
nonnear_pairs += 1
print("Non-near pairs:", nonnear_pairs)
#print(f"{c.prefix}:{} -> {c.nearest_neighbor.prefix}:{}\t{rgbdist(c, c.nearest_neighbor):.2f}")
# Are there duplicates? They should be removed from the palette that won't be used
dupes = 0
for c in all_colors:
for c2 in all_colors:
if c is c2: continue
if c.r == c2.r and c.g == c2.g and c.b == c2.b:
dupes += 1
print("dupes:", dupes, "this many will need to be removed:", dupes / 2)
# What order to put them in? Do we want large radiuses first, or some sort of "common color" table?
# does manhattan distance change any answers over the 16.7M RGB values?
# What's the worst case lookup? (Checking all 1200 colors to find the name?)