backblogger/backblogger.py

170 lines
6.4 KiB
Python
Raw Normal View History

2021-01-21 02:00:00 +00:00
# Backblogger.py : Scan directories for images, and scaffold into a blog post.
import os
import json
from datetime import datetime, date
import cv2
from PIL import Image as PILImage
from PIL.ExifTags import TAGS
2021-01-21 02:00:00 +00:00
def markdown_date(ts):
d = datetime.fromtimestamp(ts)
return f"{d.year}-{d.month}-{d.day}"
IMG_WIDTH = 760
class BlogImage:
def __init__(self, filepath, article_number=None, image_number=None):
self.path = filepath
self.article_number = article_number
self.image_number = image_number
self._metadata = None
self.resized = False
self.file_date = os.stat(self.path).st_ctime if self.path else None
def metadata(self, force_reload=False):
if self._metadata and not force_reload: return self._metadata
img = PILImage.open(self.path)
exif = img.getexif()
self._metadata = {TAGS.get(t, t): exif[t] for t in exif}
return self._metadata
def blog_image_name(self):
AN = str(self.article_number).zfill(2)
IN = str(self.image_number).zfill(2)
EXT = os.path.basename(self.path).split('.')[1]
return f"article{AN}_image{IN}.{EXT}"
def resize(self, target_width, dest_fn=None):
if dest_fn is None: dest_fn = self.blog_image_name()
img = cv2.imread(self.path)
h, w = img.shape[:2]
if w <= target_width:
cv2.imwrite(dest_fn, img)
return dest_fn
ratio = target_width / float(w)
new_h = int(h * ratio)
new_img = cv2.resize(img, (target_width, new_h), interpolation=cv2.INTER_AREA)
cv2.imwrite(dest_fn, new_img)
self.resized = True
def markdown_template(self, template):
img_replace = {
"{{IMG_FN}}": self.blog_image_name(),
"{{IMG_ORIG_FN}}": os.path.basename(self.path),
"{{IMG_FILE_DATE}}": markdown_date(self.file_date),
"{{IMG_META_DATE}}": self.metadata().get('DateTime', 'No Metadata')
}
for k in img_replace:
template = template.replace(k, img_replace[k])
return template
def serialize(self):
return {
"path": os.path.basename(self.path),
"metadata": self._metadata,
"resized": self.resized,
"resize_path": self.blog_image_name() if self.resized else '',
"date": self.file_date
}
def deserialize(self, data, directory):
self.path = os.path.join(directory, data['path'])
self._metadata = data['metadata']
self.resized = data['resized']
if data['date']: os.stat(self.path).st_ctime if self.path else None
def __repr__(self):
return f"<BlogImage path={os.path.basename(self.path)}, article_number={self.article_number}, image_number={self.image_number}>"
def best_date(self):
meta_date = self.metadata().get('DateTime')
if meta_date:
try:
return datetime.strptime(meta_date, '%Y:%m:%d %H:%M:%S').timestamp()
except:
print(f'DateTime exif failed parsing: {meta_date} ({self.path})')
if self.file_date:
return self.file_date
return -1
def __lt__(self, other):
return self.best_date() < other.best_date()
2021-01-21 02:00:00 +00:00
class BlogScaffold:
def __init__(self, path):
self.path = os.path.abspath(path)
self.data = { "images": [],
"blogfile": None
}
self.blog_images = []
2021-01-21 02:00:00 +00:00
# Check the path for backblog metadata
self.scanned = os.path.exists(os.path.join(self.path, "backblog.json"))
if not self.scanned:
self.scan()
return
with open(os.path.join(self.path, "backblog.json"), "r") as f:
self.data = json.loads(f.read())
def scan(self):
_, _, files = next(os.walk(self.path))
self.blog_images = []
2021-01-21 02:00:00 +00:00
for f in files:
if f.split('.')[1].lower() not in ('jpg', 'png', 'bmp', 'jpeg'): continue
self.blog_images.append(BlogImage(os.path.join(self.path, f)))
2021-01-21 02:00:00 +00:00
self.scanned = True
self.data['images'] = [bi.serialize() for bi in self.blog_images]
2021-01-21 02:00:00 +00:00
self.save()
def save(self):
with open(os.path.join(self.path, "backblog.json"), "w") as f:
f.write(json.dumps(self.data))
def image_times(self):
for i in self.data['images']:
yield datetime.fromtimestamp(i['date'])
def markdown_template(self, article_number):
if not self.scanned: self.scan()
2021-01-21 02:00:00 +00:00
replace = {
"{{TITLE}}": "Backblog basic template about " + self.path,
"{{SLUG}}": os.path.basename(self.path),
"{{CATEGORY}}": "category",
"{{EARLIESTDATE}}": markdown_date(min([i['date'] for i in self.data['images']])),
"{{TODAY}}": str(date.today()),
"{{ARTICLENUM}}": str(article_number).zfill(2)
2021-01-21 02:00:00 +00:00
}
txt = None
with open("template.md", "r") as f:
txt = f.read()
img_template = txt.split("%%%")[1]
img_txt = ""
for i, image in enumerate(sorted(self.blog_images)):
image.article_number = article_number
image.image_number = i
image.resize(IMG_WIDTH)
img_txt += image.markdown_template(img_template)
txt = txt.split("%%%")[0] + img_txt + txt.split("%%%")[2]
for k in replace:
txt = txt.replace(k, replace[k])
template_fn = f"{str(article_number).zfill(2)}_{os.path.basename(self.path)}.md"
render_template_fn = os.path.join(self.path, template_fn)
with open(render_template_fn, "w") as f:
f.write(txt)
self.data['blogfile'] = template_fn
2021-01-21 02:00:00 +00:00
def __repr__(self):
if not self.scanned:
return f"<BlogScaffold path={self.path}, scanned=False>"
return f"<BlogScaffold path={self.path}, blogfile={self.data['blogfile']}, {len(self.data['images'])} image files>"
if __name__ == '__main__':
subdirs = os.listdir('..')
# don't scan program's directory
subdirs.remove(os.path.basename(os.path.abspath('.')))
#os.chdir('..')
2021-01-21 02:00:00 +00:00
scaffolds = [BlogScaffold(os.path.join('..', sd)) for sd in subdirs]
scaffolds.sort(key = lambda s: s.blog_images[-1], reverse=True)
for s in scaffolds:
print(f"{os.path.basename(s.path)} - earliest {markdown_date(s.blog_images[0].best_date())} - latest {markdown_date(s.blog_images[-1].best_date())}")