Skip to content

Instantly share code, notes, and snippets.

@AliRn76
Last active February 19, 2022 15:21
Show Gist options
  • Save AliRn76/36001738d774940b8ee32544c44672ca to your computer and use it in GitHub Desktop.
Save AliRn76/36001738d774940b8ee32544c44672ca to your computer and use it in GitHub Desktop.
Convert uploaded file to image (FastAPI)
import os
import shlex
import base64
import subprocess
import fitz # PyMuPDF
from uuid import uuid4
from fastapi import FastAPI
from pydantic import BaseModel
from PIL import Image, ImageDraw, ImageFont
app = FastAPI()
UPLOAD_PATH = 'files/'
if not os.path.exists(UPLOAD_PATH):
os.makedirs(UPLOAD_PATH)
class UploadSerializer(BaseModel):
name: str
file: str
def pdf_to_image(file_path: str) -> None:
doc = fitz.open(file_path)
if file_path[-4:] == '.pdf':
path = file_path[:-4]
os.remove(file_path)
else:
path = file_path
for page in doc:
pix = page.get_pixmap(alpha=False)
pix.save(f'{path}-{page.number}.jpg')
def write_file(file_data: str, path: str) -> None:
_file = base64.decodebytes(file_data.encode())
with open(path, 'wb') as file:
file.write(_file)
def get_file_extension(file_path: str) -> bytes:
cmd = shlex.split(f'file --mime-type {file_path}')
result = subprocess.check_output(cmd)
mime_type = result.split()[-1]
print(f'{mime_type = }')
return mime_type
def convert_doc_to_pdf(file_path: str) -> None:
cmd = shlex.split(f'lowriter --convert-to pdf --outdir {UPLOAD_PATH}. {file_path}') # LibreOfficeWriter
subprocess.call(cmd)
@app.get('/upload/')
def upload(data: UploadSerializer):
original_name = data.name
file_name = uuid4().hex
file_path = UPLOAD_PATH + file_name
write_file(file_data=data.file, path=file_path)
mime_type = get_file_extension(file_path=file_path)
if mime_type == b'image/jpeg':
extension = '.jpeg'
elif mime_type == b'application/msword':
extension = '.doc'
convert_doc_to_pdf(file_path=file_path)
pdf_to_image(file_path=f'{file_path}.pdf')
elif mime_type == b'application/pdf':
extension = '.pdf'
pdf_to_image(file_path=file_path)
elif mime_type == b'text/plain':
extension = '.txt'
fnt = ImageFont.truetype('AriaTextG1-Regular.otf', 15)
with open(file_path, 'r') as file:
text = file.read()
# Wrap Text
wrapper = TextWrapper(text=text, font=fnt, max_width=1080)
wrapped_text = wrapper.wrapped_text()
# Create Empty Image
image = Image.new(mode='RGB', size=(1080, 720), color='white')
# Draw Image
draw = ImageDraw.Draw(image)
draw.text((10, 10), wrapped_text, font=fnt, fill=(0, 0, 0))
# Save Image
filename = file_name + '.jpg'
image.save(UPLOAD_PATH + filename)
# TODO: if draw.textsize(text=wrapped_text, font=fnt)[1] > 720:
# Write rest of file to new image (while)
else:
extension = mime_type.decode().split('/')[-1]
os.rename(file_path, file_path + extension)
data = {
'data': original_name,
'path': file_path + extension,
'name': original_name
}
return data
class TextWrapper:
"""
Helper class to wrap text in lines, based on given text, font
and max allowed line width.
"""
def __init__(self, text, font, max_width):
self.text = text
self.text_lines = [
' '.join([self.strip_word(w) for w in line.split(' ') if w])
for line in text.split('\n')
]
self.font = font
self.max_width = max_width
self.draw = ImageDraw.Draw(
Image.new(
mode='RGB',
size=(100, 100)
)
)
self.space_width = self.draw.textsize(text=' ', font=self.font)[0]
@staticmethod
def strip_word(word):
if len(word.split('\t')) == 2:
if word.strip() == '\t':
final_word = f'{word.strip()} '
else:
final_word = f' {word.strip()}'
else:
final_word = word.strip()
return final_word
def get_text_width(self, text):
return self.draw.textsize(text=text, font=self.font)[0]
def wrapped_text(self):
wrapped_lines = []
buf = []
buf_width = 0
for line in self.text_lines:
for word in line.split(' '):
word_width = self.get_text_width(word)
if not buf:
expected_width = word_width
else:
expected_width = buf_width + self.space_width + word_width
if expected_width <= self.max_width:
# word fits in line
buf_width = expected_width
buf.append(word)
else:
# word doesn't fit in line
wrapped_lines.append(' '.join(buf))
buf = [word]
buf_width = word_width
if buf:
wrapped_lines.append(' '.join(buf))
buf = []
buf_width = 0
return '\n'.join(wrapped_lines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment