McRogueFace/generate_color_table.py

# data sources: CSS docs, jennyscrayoncollection 2017 article on Crayola colors, XKCD color survey

# target: Single C++ header file to provide a struct of color RGB codes and names.
# This file pre-computes the nearest neighbor of every color.
# if an RGB code being searched for is closer than the nearest neighbor, it's the closest color name.

def hex_to_rgb(txt):
    if '#' in txt: txt = txt.replace('#', '')
    r = txt[0:2]
    g = txt[2:4]
    b = txt[4:6]
    return tuple([int(s, 16) for s in (r,g,b)])

class palette:
    def __init__(self, name, filename, priority):
        self.name = name
        self.priority = priority
        with open(filename, "r") as f:
            print(f"scanning {filename}")
            self.colors = {}
            for line in f.read().split('\n'):
                if len(line.split('\t')) < 2: continue
                name, code = line.split('\t')
                #print(name, code)
                self.colors[name] = hex_to_rgb(code)
                
    def __repr__(self):
        return f"<Palette '{self.name}' - {len(self.colors)} colors, priority = {self.priority}>"
    
palettes = [
    #palette("jenny", "jenny_colors.txt", 3), # I should probably use wikipedia as a source for copyright reasons
    palette("crayon", "wikicrayons_colors.txt", 2),
    palette("xkcd", "xkcd_colors.txt", 1),
    palette("css", "css_colors.txt", 0),
    #palette("matplotlib", "matplotlib_colors.txt", 2) # there's like 10 colors total, I think we'll survive without them
    ]

all_colors = []

from math import sqrt
def rgbdist(c1, c2):
    return sqrt((c1.r - c2.r)**2 + (c1.g - c2.g)**2 + (c1.b - c2.b)**2)

class Color:
    def __init__(self, r, g, b, name, prefix, priority):
        self.r = r
        self.g = g
        self.b = b
        self.name = name
        self.prefix = prefix
        self.priority = priority
        self.nearest_neighbor = None
        
    def __repr__(self):
        return f"<Color ({self.r}, {self.g}, {self.b}) - '{self.prefix}:{self.name}', priority = {self.priority}, nearest_neighbor={self.nearest_neighbor.name if self.nearest_neighbor is not None else None}>"
    
    def nn(self, colors):
        nearest = None
        nearest_dist = 999999
        for c in colors:
            dist = rgbdist(self, c)
            if dist == 0: continue
            if dist < nearest_dist:
                nearest = c
                nearest_dist = dist
        self.nearest_neighbor = nearest
        
for p in palettes:
    prefix = p.name
    priority = p.priority
    for name, rgb in p.colors.items():
        all_colors.append(Color(*rgb, name, prefix, priority))
    print(f"{prefix}->{len(all_colors)}")
        
for c in all_colors:
    c.nn(all_colors)

smallest_dist = 9999999999999
largest_dist = 0
for c in all_colors:
    dist = rgbdist(c, c.nearest_neighbor)
    if dist > largest_dist: largest_dist = dist
    if dist < smallest_dist: smallest_dist = dist
    #print(f"{c.prefix}:{c.name} -> {c.nearest_neighbor.prefix}:{c.nearest_neighbor.name}\t{rgbdist(c, c.nearest_neighbor):.2f}")
# questions -

# are there any colors where their nearest neighbor's nearest neighbor isn't them? (There should be)
nonnear_pairs = 0
for c in all_colors:
    neighbor = c.nearest_neighbor
    their_neighbor = neighbor.nearest_neighbor
    if c is not their_neighbor:
        #print(f"{c.prefix}:{c.name} -> {neighbor.prefix}:{neighbor.name} -> {their_neighbor.prefix}:{their_neighbor.name}")
        nonnear_pairs += 1
print("Non-near pairs:", nonnear_pairs)
    #print(f"{c.prefix}:{c.name} -> {c.nearest_neighbor.prefix}:{c.nearest_neighbor.name}\t{rgbdist(c, c.nearest_neighbor):.2f}")

# Are there duplicates? They should be removed from the palette that won't be used
dupes = 0
for c in all_colors:
    for c2 in all_colors:
        if c is c2: continue
        if c.r == c2.r and c.g == c2.g and c.b == c2.b:
            dupes += 1
print("dupes:", dupes, "this many will need to be removed:", dupes / 2)

# What order to put them in? Do we want large radiuses first, or some sort of "common color" table?

# does manhattan distance change any answers over the 16.7M RGB values?

# What's the worst case lookup? (Checking all 1200 colors to find the name?)
Squashed commit of the following: [standardize_color_handling] closes #11 Check the abandoned feature branch for PyLinkedColor, a time-expensive but now abandoned feature to link a color value to a UIDrawable. There are some TODOs left in the PyColor class, but that can go under cleanup. I'm way over time on this, so I'm taking a small victory :) commit 572aa526058ae012f622393eae01c65cbc8dc05e Author: John McCardle <mccardle.john@gmail.com> Date: Sat Mar 30 21:18:26 2024 -0400 More color table updates commit 01706bd59d9b62fe1ea8f8dcce0929da738490dd Author: John McCardle <mccardle.john@gmail.com> Date: Sat Mar 30 21:13:31 2024 -0400 Color wrapup... Cutting PyLinkedColor to simplify my cursedly mortal, finite existence commit 3991ac13d6471e491cbccf2ddb8d36bad528b2f7 Author: John McCardle <mccardle.john@gmail.com> Date: Thu Mar 28 23:50:50 2024 -0400 Still having segfaults with LinkedColor and captions (specifically outline color, but that might not be the actual cause). PyColor shaping back up in simplified form. commit 06e24a1b27c2f1ec520537f3a5b9b08d68d07829 Author: John McCardle <mccardle.john@gmail.com> Date: Thu Mar 28 20:53:49 2024 -0400 LinkedColor now reflecting changes to the linked color value. Needs set method + RGBA / color properties commit 41509dfe9640a67f924c5f843fe6bceb0cdb8f78 Author: John McCardle <mccardle.john@gmail.com> Date: Wed Mar 27 21:10:03 2024 -0400 Addressing issues with PyColor by splitting behavior off into PyLinkedColor commit 13a4ddf41b41dfc123a00468377b4f8fae0da845 Author: John McCardle <mccardle.john@gmail.com> Date: Tue Mar 26 23:02:00 2024 -0400 Build runs again. PyColor objects are being instantiated, with bugs and no test of color changing commit 1601fc7faba53e8d0d5814688b80e5cbfec2a700 Author: John McCardle <mccardle.john@gmail.com> Date: Mon Mar 25 20:48:08 2024 -0400 Still doesn't compile, but now the issue is in UI.h overcoupling. Progress! commit 13672c8fdbe7f3db385c93234331bb16267ef18b Author: John McCardle <mccardle.john@gmail.com> Date: Sun Mar 24 21:19:37 2024 -0400 Dabbling around this morning; still not building commit 79090b553f08af7dd03892c2153073d8457a566d Author: John McCardle <mccardle.john@gmail.com> Date: Sun Mar 24 08:36:06 2024 -0400 Unsaved changes from last night commit 2cac6f03c601de4591dbd8205418a1cbfe7e7e9f Author: John McCardle <mccardle.john@gmail.com> Date: Sat Mar 23 23:07:10 2024 -0400 untested PyColor base implementation commit 3728e5fcc8bd745ef0268312a808fab6c82d7d91 Author: John McCardle <mccardle.john@gmail.com> Date: Sat Mar 23 23:06:36 2024 -0400 Color naming prototype 2024-03-31 01:20:40 +00:00			`# data sources: CSS docs, jennyscrayoncollection 2017 article on Crayola colors, XKCD color survey`

			`# target: Single C++ header file to provide a struct of color RGB codes and names.`
			`# This file pre-computes the nearest neighbor of every color.`
			`# if an RGB code being searched for is closer than the nearest neighbor, it's the closest color name.`

			`def hex_to_rgb(txt):`
			`if '#' in txt: txt = txt.replace('#', '')`
			`r = txt[0:2]`
			`g = txt[2:4]`
			`b = txt[4:6]`
			`return tuple([int(s, 16) for s in (r,g,b)])`

			`class palette:`
			`def __init__(self, name, filename, priority):`
			`self.name = name`
			`self.priority = priority`
			`with open(filename, "r") as f:`
			`print(f"scanning {filename}")`
			`self.colors = {}`
			`for line in f.read().split('\n'):`
			`if len(line.split('\t')) < 2: continue`
			`name, code = line.split('\t')`
			`#print(name, code)`
			`self.colors[name] = hex_to_rgb(code)`

			`def __repr__(self):`
			`return f"<Palette '{self.name}' - {len(self.colors)} colors, priority = {self.priority}>"`

			`palettes = [`
			`#palette("jenny", "jenny_colors.txt", 3), # I should probably use wikipedia as a source for copyright reasons`
			`palette("crayon", "wikicrayons_colors.txt", 2),`
			`palette("xkcd", "xkcd_colors.txt", 1),`
			`palette("css", "css_colors.txt", 0),`
			`#palette("matplotlib", "matplotlib_colors.txt", 2) # there's like 10 colors total, I think we'll survive without them`
			`]`

			`all_colors = []`

			`from math import sqrt`
			`def rgbdist(c1, c2):`
			`return sqrt((c1.r - c2.r)2 + (c1.g - c2.g)2 + (c1.b - c2.b)**2)`

			`class Color:`
			`def __init__(self, r, g, b, name, prefix, priority):`
			`self.r = r`
			`self.g = g`
			`self.b = b`
			`self.name = name`
			`self.prefix = prefix`
			`self.priority = priority`
			`self.nearest_neighbor = None`

			`def __repr__(self):`
			`return f"<Color ({self.r}, {self.g}, {self.b}) - '{self.prefix}:{self.name}', priority = {self.priority}, nearest_neighbor={self.nearest_neighbor.name if self.nearest_neighbor is not None else None}>"`

			`def nn(self, colors):`
			`nearest = None`
			`nearest_dist = 999999`
			`for c in colors:`
			`dist = rgbdist(self, c)`
			`if dist == 0: continue`
			`if dist < nearest_dist:`
			`nearest = c`
			`nearest_dist = dist`
			`self.nearest_neighbor = nearest`

			`for p in palettes:`
			`prefix = p.name`
			`priority = p.priority`
			`for name, rgb in p.colors.items():`
			`all_colors.append(Color(*rgb, name, prefix, priority))`
			`print(f"{prefix}->{len(all_colors)}")`

			`for c in all_colors:`
			`c.nn(all_colors)`

			`smallest_dist = 9999999999999`
			`largest_dist = 0`
			`for c in all_colors:`
			`dist = rgbdist(c, c.nearest_neighbor)`
			`if dist > largest_dist: largest_dist = dist`
			`if dist < smallest_dist: smallest_dist = dist`
			`#print(f"{c.prefix}:{c.name} -> {c.nearest_neighbor.prefix}:{c.nearest_neighbor.name}\t{rgbdist(c, c.nearest_neighbor):.2f}")`
			`# questions -`

			`# are there any colors where their nearest neighbor's nearest neighbor isn't them? (There should be)`
			`nonnear_pairs = 0`
			`for c in all_colors:`
			`neighbor = c.nearest_neighbor`
			`their_neighbor = neighbor.nearest_neighbor`
			`if c is not their_neighbor:`
			`#print(f"{c.prefix}:{c.name} -> {neighbor.prefix}:{neighbor.name} -> {their_neighbor.prefix}:{their_neighbor.name}")`
			`nonnear_pairs += 1`
			`print("Non-near pairs:", nonnear_pairs)`
			`#print(f"{c.prefix}:{c.name} -> {c.nearest_neighbor.prefix}:{c.nearest_neighbor.name}\t{rgbdist(c, c.nearest_neighbor):.2f}")`

			`# Are there duplicates? They should be removed from the palette that won't be used`
			`dupes = 0`
			`for c in all_colors:`
			`for c2 in all_colors:`
			`if c is c2: continue`
			`if c.r == c2.r and c.g == c2.g and c.b == c2.b:`
			`dupes += 1`
			`print("dupes:", dupes, "this many will need to be removed:", dupes / 2)`

			`# What order to put them in? Do we want large radiuses first, or some sort of "common color" table?`

			`# does manhattan distance change any answers over the 16.7M RGB values?`

			`# What's the worst case lookup? (Checking all 1200 colors to find the name?)`