170 lines
6.4 KiB
Python
170 lines
6.4 KiB
Python
# Backblogger.py : Scan directories for images, and scaffold into a blog post.
|
|
import os
|
|
import json
|
|
from datetime import datetime, date
|
|
import cv2
|
|
from PIL import Image as PILImage
|
|
from PIL.ExifTags import TAGS
|
|
|
|
def markdown_date(ts):
|
|
d = datetime.fromtimestamp(ts)
|
|
return f"{d.year}-{d.month}-{d.day}"
|
|
|
|
IMG_WIDTH = 760
|
|
class BlogImage:
|
|
def __init__(self, filepath, article_number=None, image_number=None):
|
|
self.path = filepath
|
|
self.article_number = article_number
|
|
self.image_number = image_number
|
|
self._metadata = None
|
|
self.resized = False
|
|
self.file_date = os.stat(self.path).st_ctime if self.path else None
|
|
|
|
def metadata(self, force_reload=False):
|
|
if self._metadata and not force_reload: return self._metadata
|
|
img = PILImage.open(self.path)
|
|
exif = img.getexif()
|
|
self._metadata = {TAGS.get(t, t): exif[t] for t in exif}
|
|
return self._metadata
|
|
|
|
def blog_image_name(self):
|
|
AN = str(self.article_number).zfill(2)
|
|
IN = str(self.image_number).zfill(2)
|
|
EXT = os.path.basename(self.path).split('.')[1]
|
|
return f"article{AN}_image{IN}.{EXT}"
|
|
|
|
def resize(self, target_width, dest_fn=None):
|
|
if dest_fn is None: dest_fn = self.blog_image_name()
|
|
img = cv2.imread(self.path)
|
|
h, w = img.shape[:2]
|
|
if w <= target_width:
|
|
cv2.imwrite(dest_fn, img)
|
|
return dest_fn
|
|
ratio = target_width / float(w)
|
|
new_h = int(h * ratio)
|
|
new_img = cv2.resize(img, (target_width, new_h), interpolation=cv2.INTER_AREA)
|
|
cv2.imwrite(dest_fn, new_img)
|
|
self.resized = True
|
|
|
|
def markdown_template(self, template):
|
|
img_replace = {
|
|
"{{IMG_FN}}": self.blog_image_name(),
|
|
"{{IMG_ORIG_FN}}": os.path.basename(self.path),
|
|
"{{IMG_FILE_DATE}}": markdown_date(self.file_date),
|
|
"{{IMG_META_DATE}}": self.metadata().get('DateTime', 'No Metadata')
|
|
}
|
|
for k in img_replace:
|
|
template = template.replace(k, img_replace[k])
|
|
return template
|
|
|
|
def serialize(self):
|
|
return {
|
|
"path": os.path.basename(self.path),
|
|
"metadata": self._metadata,
|
|
"resized": self.resized,
|
|
"resize_path": self.blog_image_name() if self.resized else '',
|
|
"date": self.file_date
|
|
}
|
|
|
|
def deserialize(self, data, directory):
|
|
self.path = os.path.join(directory, data['path'])
|
|
self._metadata = data['metadata']
|
|
self.resized = data['resized']
|
|
if data['date']: os.stat(self.path).st_ctime if self.path else None
|
|
|
|
def __repr__(self):
|
|
return f"<BlogImage path={os.path.basename(self.path)}, article_number={self.article_number}, image_number={self.image_number}>"
|
|
|
|
def best_date(self):
|
|
meta_date = self.metadata().get('DateTime')
|
|
if meta_date:
|
|
try:
|
|
return datetime.strptime(meta_date, '%Y:%m:%d %H:%M:%S').timestamp()
|
|
except:
|
|
print(f'DateTime exif failed parsing: {meta_date} ({self.path})')
|
|
if self.file_date:
|
|
return self.file_date
|
|
return -1
|
|
|
|
def __lt__(self, other):
|
|
return self.best_date() < other.best_date()
|
|
|
|
class BlogScaffold:
|
|
def __init__(self, path):
|
|
self.path = os.path.abspath(path)
|
|
self.data = { "images": [],
|
|
"blogfile": None
|
|
}
|
|
self.blog_images = []
|
|
# Check the path for backblog metadata
|
|
self.scanned = os.path.exists(os.path.join(self.path, "backblog.json"))
|
|
if not self.scanned:
|
|
self.scan()
|
|
return
|
|
with open(os.path.join(self.path, "backblog.json"), "r") as f:
|
|
self.data = json.loads(f.read())
|
|
|
|
def scan(self):
|
|
_, _, files = next(os.walk(self.path))
|
|
self.blog_images = []
|
|
for f in files:
|
|
if f.split('.')[1].lower() not in ('jpg', 'png', 'bmp', 'jpeg'): continue
|
|
self.blog_images.append(BlogImage(os.path.join(self.path, f)))
|
|
self.scanned = True
|
|
self.data['images'] = [bi.serialize() for bi in self.blog_images]
|
|
self.save()
|
|
|
|
def save(self):
|
|
with open(os.path.join(self.path, "backblog.json"), "w") as f:
|
|
f.write(json.dumps(self.data))
|
|
|
|
def image_times(self):
|
|
for i in self.data['images']:
|
|
yield datetime.fromtimestamp(i['date'])
|
|
|
|
def markdown_template(self, article_number):
|
|
if not self.scanned: self.scan()
|
|
replace = {
|
|
"{{TITLE}}": "Backblog basic template about " + self.path,
|
|
"{{SLUG}}": os.path.basename(self.path),
|
|
"{{CATEGORY}}": "category",
|
|
"{{EARLIESTDATE}}": markdown_date(min([i['date'] for i in self.data['images']])),
|
|
"{{TODAY}}": str(date.today()),
|
|
"{{ARTICLENUM}}": str(article_number).zfill(2)
|
|
}
|
|
txt = None
|
|
with open("template.md", "r") as f:
|
|
txt = f.read()
|
|
img_template = txt.split("%%%")[1]
|
|
img_txt = ""
|
|
for i, image in enumerate(sorted(self.blog_images)):
|
|
image.article_number = article_number
|
|
image.image_number = i
|
|
image.resize(IMG_WIDTH)
|
|
img_txt += image.markdown_template(img_template)
|
|
txt = txt.split("%%%")[0] + img_txt + txt.split("%%%")[2]
|
|
for k in replace:
|
|
txt = txt.replace(k, replace[k])
|
|
template_fn = f"{str(article_number).zfill(2)}_{os.path.basename(self.path)}.md"
|
|
render_template_fn = os.path.join(self.path, template_fn)
|
|
with open(render_template_fn, "w") as f:
|
|
f.write(txt)
|
|
self.data['blogfile'] = template_fn
|
|
|
|
|
|
def __repr__(self):
|
|
if not self.scanned:
|
|
return f"<BlogScaffold path={self.path}, scanned=False>"
|
|
return f"<BlogScaffold path={self.path}, blogfile={self.data['blogfile']}, {len(self.data['images'])} image files>"
|
|
|
|
if __name__ == '__main__':
|
|
subdirs = os.listdir('..')
|
|
# don't scan program's directory
|
|
subdirs.remove(os.path.basename(os.path.abspath('.')))
|
|
#os.chdir('..')
|
|
scaffolds = [BlogScaffold(os.path.join('..', sd)) for sd in subdirs]
|
|
|
|
scaffolds.sort(key = lambda s: s.blog_images[-1], reverse=True)
|
|
for s in scaffolds:
|
|
print(f"{os.path.basename(s.path)} - earliest {markdown_date(s.blog_images[0].best_date())} - latest {markdown_date(s.blog_images[-1].best_date())}")
|