Created
May 6, 2024 09:32
-
-
Save sohang3112/46fb9f4651836b0928562d286f3ac069 to your computer and use it in GitHub Desktop.
Properly render Hindi (Devnagari) in wordcloud Python library (which uses matplotlib)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: Optimize Dockerfile by combining multiple RUN commands into one, and using Multi-Stage Builds | |
FROM public.ecr.aws/lambda/python:3.11 | |
RUN yum install -y freetype-devel harfbuzz-devel fribidi-devel meson gtk-doc | |
RUN yum install -y wget | |
RUN wget https://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/l/libraqm-0.7.0-4.el7.x86_64.rpm && yum localinstall -y libraqm-0.7.0-4.el7.x86_64.rpm | |
RUN yum install -y cairo-devel pkg-config python3-devel | |
# install gcc required for building wheels of pycairo | |
RUN yum install -y gcc | |
RUN pip3 install --upgrade pip setuptools wheel | |
RUN pip3 install pycairo | |
RUN pip3 install mplcairo wordcloud | |
COPY . . | |
#ENTRYPOINT ["/lambda-entrypoint.sh", "hindi_wordcloud.lambda_handle"] | |
# Error: entrypoint handler requires at least one argument, fixed after adding below line | |
ENTRYPOINT python3 hindi_wordcloud.py | |
CMD python3 hindi_wordcloud.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from wordcloud import WordCloud | |
import matplotlib | |
matplotlib.use("module://mplcairo.qt") | |
font_path = "Arial-Unicode-Regular.ttf" # change font path to any installed Devnagari font | |
def lambda_handler(event, context): | |
words = ["नमस्ते", "दृष्टि"] | |
word_freqs = Counter(words) | |
wc = WordCloud(width=800, height=400, background_color='white', font_path=font_path) | |
wc.generate_from_frequencies(word_freqs) | |
image = wc.to_image() | |
image.save('/data/hindi_wordcloud.png') | |
print('saved wordcloud') | |
if __name__ == '__main__': | |
print(lambda_handler({}, {})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment