Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active June 30, 2024 10:21
Show Gist options
  • Save UserUnknownFactor/82f3ffc2b0e57946e4f0d621a11686cd to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/82f3ffc2b0e57946e4f0d621a11686cd to your computer and use it in GitHub Desktop.
Tools for mass image translation (text applying tested on PGMMV with images of the same type/size that only differ in text area)
import os, csv, glob, re
from PIL import Image, ImageDraw, ImageFont
"""
This tool can read translations from:
`original→translation→image_without_extension[;text_pos_x,text_pos_y;overlay_path;overlay_x,overlay_y;custom_font;font_size;font_color]`
formatted .csv file and apply it to all images in the specified folder, after replacing
background to a specified image at specified coordinates. Text supports colored borders
and style tags like <b>, <i> (not enabled now) or <color="#ff0000"> in the text.
All stuff is only configurable in the code.
"""
USE_CACHE = True
try:
if USE_CACHE:
import diskcache
except:
USE_CACHE = False
DELIMITER_CHAR = '→'
ESCAPE_CHAR = '¶'
DIALECT_TRANSLATION = "translation"
csv.register_dialect(DIALECT_TRANSLATION, delimiter=DELIMITER_CHAR, quotechar="\uffff", quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR, lineterminator='\n')
re_tags = re.compile('(</?[ib]>|<color=[\'"]?[^\'" >]+[\'"]?>|</color>)')
class StyledText:
def __init__(self, size, regular_font_path, italic_font_path=None, bold_font_path=None, line_height_mul = 1.2):
size = size * 1.33
self.line_height = size * line_height_mul # Adjust line height as needed
self.size = size
self.current_font = self.regular_font = ImageFont.truetype(regular_font_path, size)
if italic_font_path:
self.italic_font = ImageFont.truetype(italic_font_path, size)
else:
self.italic_font = self.regular_font
if bold_font_path:
self.bold_font = ImageFont.truetype(bold_font_path, size)
else:
self.bold_font = self.regular_font
def get_text_width(self, text):
dummy_draw = ImageDraw.Draw(Image.new('RGB', (1, 1)))
bbox = dummy_draw.textbbox((0, 0), text, font=self.current_font)
return bbox[2] - bbox[0]
@staticmethod
def draw_text_with_border(draw, position, text, font, fill, border_color, border_width):
""" Draws text with a border around it. """
if border_color is not None and border_width:
x, y = position
for dx in range(-border_width, border_width+1):
for dy in range(-border_width, border_width+1):
draw.text((x+dx, y+dy), text, font=font, fill=border_color)
draw.text(position, text, font=font, fill=fill)
def draw_text(self, position, text, img, base_color, border_color, border_width, alignment='center'):
draw = ImageDraw.Draw(img)
current_color = base_color # Initialize current color with base color
color_stack = [] # Stack to keep track of colors
text = text.replace('\\n', '\n')
text = text.replace('<br>', '\n').replace('<BR>', '\n')
lines = text.split('\n') # Split the text into lines
# Unpack the rectangle; used format is [x1, y1, x2, y2] instead of [x1, y1, w, h]
rect_left = position[0]
rect_top = position[1]
if len(position) > 2:
if len(position) == 3:
raise ValueError(f"Full rectangle must have 4 coordinates, not {position}")
rect_right = position[2]
rect_bottom = position[3]
rect_width = rect_right - rect_left
rect_height = rect_bottom - rect_top
else:
alignment = 'left top'
if len(position) < 2:
raise ValueError(f"Point must have at least 1 coordinate, not {position}")
is_left = 'left' in alignment
is_center = 'center' in alignment
is_right = 'right' in alignment
is_top = 'top' in alignment
is_bottom = 'bottom' in alignment
is_middle = is_center or 'middle' in alignment
total_line_height = len(lines) * self.line_height
if is_top:
vertical_offset = rect_top
elif is_bottom:
vertical_offset = rect_bottom - total_line_height
elif is_middle:
vertical_offset = rect_top + (rect_height - total_line_height) // 2
else:
raise ValueError("Invalid vertical alignment specified")
for i, line in enumerate(lines):
clear_line = re_tags.sub('', line)
if is_left:
horizontal_offset = rect_left
elif is_center:
horizontal_offset = rect_left + (rect_width - self.get_text_width(clear_line)) // 2
elif is_right:
horizontal_offset = rect_right - self.get_text_width(clear_line)
else:
raise ValueError("Invalid horizontal alignment specified")
vertical_line_offset = vertical_offset
for segment in re_tags.split(line):
if segment == '<i>':
self.current_font = self.italic_font
elif segment == '<b>':
self.current_font = self.bold_font
elif segment.startswith('<color='):
color = re.search(r'(?<=[<]color=)(?:[\'"]?)([^\'" >]+)', segment).group(1)
color_stack.append(current_color) # Push the current color onto the stack
current_color = color # Update current color
elif segment == '</color>':
current_color = color_stack.pop() if color_stack else base_color # Pop color from stack or revert to base color
elif segment == '</i>' or segment == '</b>':
self.current_font = self.regular_font
else:
if segment: # Check if segment is not empty
self.draw_text_with_border(draw, (horizontal_offset, vertical_line_offset), segment, font=self.current_font, fill=current_color, border_color=border_color, border_width=border_width)
horizontal_offset += self.get_text_width(segment)
vertical_offset += self.line_height
def get_font_path(font_name):
""" Search for a font in the system fonts directory. """
font_paths = glob.glob(".\\" + os.path.splitext(font_name)[0] + "*ttf")
if font_paths:
return font_paths[0]
font_paths = glob.glob("C:/Windows/Fonts/" + os.path.splitext(font_name)[0] + "*ttf")
return font_paths[0] if font_paths else None
def read_text_data(csv_file):
""" Reads text data from a CSV file using the csv module. """
with open(csv_file, 'r', newline='', encoding='utf-8-sig') as csvfile:
reader = csv.reader(csvfile, DIALECT_TRANSLATION)
return list(reader)
def make_area_transparent(image, box):
""" Makes the specified area of an image transparent
while preserving existing transparency.
Args:
image: A PIL.Image object with an alpha channel (RGBA).
box: A tuple (left, upper, right, lower) defining the rectangular area to make transparent.
"""
if image.mode != 'RGBA':
raise ValueError("Image must be in RGBA mode")
mask = Image.new("L", image.size, 0)
mask_draw = ImageDraw.Draw(mask)
mask_draw.rectangle(box, fill=255)
alpha = image.getchannel('A')
new_alpha = Image.composite(Image.new('L', image.size, 0), alpha, mask)
image.putalpha(new_alpha)
return image
CACHE_DIR = '__pycache__'
if USE_CACHE:
cache = diskcache.Cache(CACHE_DIR)
def is_file_changed(file_path):
changed = False
mod_time = os.path.getmtime(file_path)
cached_mod_time = cache.get(file_path)
if cached_mod_time is None or mod_time != cached_mod_time:
changed = True
cache.set(file_path, mod_time)
return changed
def is_line_changed(file_name, text_line):
changed = False
if cache.get(file_name) != text_line:
changed = True
cache.set(file_name, text_line)
return changed
def make_tuple_dim(tuple_string: str, dim: int=2, default: int|str|None=0, separator: str=',', to_int: bool=True):
if tuple_string is None or not isinstance(tuple_string, str):
return tuple()
tuple_string = tuple_string.split(separator)
tuple_string = tuple_string[:dim] + [default] * max(0, dim - len(tuple_string))
if to_int:
tuple_string = tuple(int(v) for v in tuple_string)
if len(tuple_string) > 3 and tuple_string[2] == 0 and tuple_string[3]==0:
return tuple_string[:2]
return tuple_string
return tuple(tuple_string)
def process_images(input_folder, output_folder, rectangle, default_overlay_img_path=None, csv_file='data.csv',
font_name='arial.ttf', font_size=20, text_color=(0, 0, 0),
border_color=(255, 255, 255), border_width=2,
default_overlay_position=(0, 0)):
""" Processes each image in the input folder """
#if USE_CACHE and not is_file_changed(csv_file): return
text_data = read_text_data(csv_file)
# Create a dictionary to possibly group multiple rows by filename
grouped_data = dict()
font_path = os.path.abspath(get_font_path(font_name))
if not font_path:
raise FileNotFoundError(f"Font {font_path} not found in system.")
print(f"Default font: {font_path}")
default_font = StyledText(font_size, font_path)
for row in text_data:
if len(row) < 3:
print(f"Error in row: {row}")
continue
_, text, comment = row[:3] # this is standard translation format of filetranslate
if not comment:
print(f"Error in row: {row}")
continue
filename, text_position, overlay_img_path, overlay_position, custom_font, custom_font_size, custom_font_color = make_tuple_dim(comment, 7, None, ';', False)
text_position = make_tuple_dim(text_position, 4)
overlay_position = make_tuple_dim(overlay_position)
if filename not in grouped_data:
grouped_data[filename] = {
'texts': [],
'overlays': [],
'fonts': [],
}
if custom_font_color:
text_color = custom_font_color
grouped_data[filename]['texts'].append((text, text_position, text_color))
grouped_data[filename]['overlays'].append((overlay_img_path, overlay_position))
if not custom_font:
custom_font = font_path
else:
custom_font = os.path.abspath(get_font_path(custom_font))
if not custom_font_size:
custom_font_size = font_size
else:
custom_font_size = int(custom_font_size)
if not custom_font:
raise FileNotFoundError(f"Font {font_path} not found in system.")
grouped_data[filename]['fonts'].append(
StyledText(custom_font_size, custom_font) if (
custom_font_size or custom_font
) else default_font)
print("Applying translation and overlays...")
for filename, data in grouped_data.items():
texts = data['texts']
image_type = '.png'
image_path = os.path.join(input_folder, filename + image_type)
if not os.path.exists(image_path):
image_path = os.path.join(input_folder, filename + '.jpg')
image_type = '.jpg'
if os.path.exists(image_path):
if USE_CACHE and not any(is_line_changed(filename, text) for text, _ in texts):
continue
img = Image.open(image_path).convert("RGBA")
overlays = [(default_overlay_img_path, default_overlay_position)] if not data['overlays'] else data['overlays']
for overlay_img_path, overlay_position in overlays:
if not overlay_img_path:
continue
overlay_img = Image.open(overlay_img_path).convert("RGBA")
overlay_position = overlay_position
"""
box = (
overlay_position[0], overlay_position[1],
overlay_position[0] + overlay_img.size[0], overlay_position[1] + overlay_img.size[1]
)
img = make_area_transparent(img, box)
"""
img.paste(overlay_img, overlay_position)#, overlay_image)
i = 0
for text, position, text_color in texts:
data['fonts'][i].draw_text(position or rectangle, text, img, text_color,
border_color=border_color, border_width=border_width)
i += 1
output_path = os.path.join(output_folder, filename + image_type)
img.save(output_path)
print(f"{output_path}")
def main():
input_folder = 'Resources\\img'
output_folder = 'translation_out\\Resources\\img'
overlay_img_path = 'overlay_image.png'
csv_file = 'overlay_image_strings.csv'
font_name = 'arial.ttf' # Specify the base name of the font
text_color = 'white'
border_color = '#00ff00'
border_width = 2
font_size = 20
overlay_position = (100, 50) # Position to paste the overlay image
# Coords to draw the text: 2 items = position, 4 = box
rectangle = (100, 20, 500, 80) # using a target rectangle instead
if not os.path.exists(output_folder):
os.makedirs(output_folder)
print(f"The translations file is: {csv_file}")
print(f"The overlay file is: {overlay_img_path}")
process_images(input_folder, output_folder, rectangle, overlay_img_path, csv_file, font_name, font_size, text_color, border_color, border_width, overlay_position)
if __name__ == '__main__':
main()
import os, csv, re, glob
DELIMITER_CHAR = '→'
ESCAPE_CHAR = '¶'
DIALECT_TRANSLATION = "translation"
csv.register_dialect(DIALECT_TRANSLATION, delimiter=DELIMITER_CHAR, quotechar="\uffff", quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR, lineterminator='\n')
def natural_sort_key(s):
"""Natural sort key function for sorting filenames with embedded numbers"""
convert = lambda text: int(text) if text.isdigit() else text.lower()
return [convert(c) for c in re.split('([0-9]+)', s)]
def merge_csvs(directory, mask, output_file):
"""Merges CSV files in a directory into a single CSV file
Args:
directory: The directory to scan.
mask: The file mask to match (e.g., "*.csv").
output_file: The name of the output CSV file.
"""
with open(output_file, 'w', newline='', encoding='utf-8-sig') as outfile:
writer = csv.writer(outfile, DIALECT_TRANSLATION)
files_found = glob.glob(os.path.join(directory, '**', mask), recursive=True)
files_found = sorted(files_found, key=natural_sort_key)
for filename in files_found:
with open(filename, 'r', newline='', encoding='utf-8-sig') as infile:
reader = csv.reader(infile, DIALECT_TRANSLATION)
writer.writerow(['\\n'.join([row[0] for row in reader]), '\\n'.join([row[1] for row in reader if len(row)>2]), os.path.basename(filename).replace('_strings.csv', '')])
if __name__ == "__main__":
directory = '.' # current directory
mask = '*_strings.csv' # match all CSV string files
output_file = 'overlay_image_strings.csv' # output file name
merge_csvs(directory, mask, output_file)
import os
import cv2 # pip install cv2
import numpy as np # pip install numpy
def remove_watermark(image, template, threshold=0.8):
"""Removes a subimage from an image using template matching.
Args:
image (numpy.ndarray): The image from which to remove the subimage.
template (numpy.ndarray): The subimage template.
Returns:
numpy.ndarray: The image with subimages removed.
"""
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
w, h = template_gray.shape[::-1]
# Match template
res = cv2.matchTemplate(image, template_gray, cv2.TM_CCOEFF_NORMED)
loc = np.where(res >= threshold)
# Remove watermark
for pt in zip(*loc[::-1]): # Switch x and y coordinates
cv2.rectangle(image, pt, (pt[0] + w, pt[1] + h), (255, 255, 255), -1)
return image
def preprocess_for_ocr(image_path):
"""Preprocesses an image with a colored background and borders for improved OCR
Args:
image_path (str): The path to the input image.
Returns:
numpy.ndarray: The preprocessed image.
"""
# 1. Load and convert to grayscale
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# 2. Thresholding for initial background removal
thresh_0, thresh_1 = 130, 255
_, thresh = cv2.threshold(image, thresh_0, thresh_1, cv2.THRESH_BINARY_INV)
# 3. Morphological operations (opening to remove noise)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 4. Find contours and create a mask for the text region
contours, _ = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
mask = np.zeros_like(image)
cv2.drawContours(mask, contours, -1, 255, -1)
# 5. Apply the mask to the original image
result = cv2.bitwise_and(image, image, mask=mask)
# 6. Denoising
"""
h Parameter regulating filter strength. Big h perfectly removes noise but also removes image details, smaller h preserves details but also preserves some noise.
templateWindowSize Size in pixels of the template patch that is used to compute weights. Should be odd.
searchWindowSize Size in pixels of the window that is used to compute weighted average for given pixel. Should be odd.
"""
denoised_image = cv2.fastNlMeansDenoising(result, None, 7, 7, 21)
# 7. Binarization
_, thresholded_image = cv2.threshold(denoised_image, thresh_0, thresh_1, cv2.THRESH_BINARY)
thresholded_image = cv2.bitwise_not(thresholded_image)
return thresholded_image
# Specify the folder containing the images
input_folder = "img"
output_folder = "for_ocr"
os.makedirs(output_folder, exist_ok=True)
# Load watermark template
template_image_path = 'watermark_text.png' # Path to the bad image template (like series of dots that messes with OCR)
watermark_template = cv2.imread(template_image_path)
# Iterate through all files in the folder
for filename in os.listdir(input_folder):
if filename.endswith((".jpg", ".jpeg", ".png")): # Process common image formats
image_path = os.path.join(input_folder, filename)
preprocessed_image = preprocess_for_ocr(image_path)
cleaned_image = remove_watermark(preprocessed_image, watermark_template)
output_path = os.path.join(output_folder, filename)
cv2.imwrite(output_path, cleaned_image)
print(f"Preprocessed and saved: {output_path}")
print("Preprocessing complete!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment