This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
from sklearn.preprocessing import MultiLabelBinarizer | |
import pandas as pd | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# option 1. dynamically download JSON | |
# url = 'https://www.googleapis.com/webfonts/v1/webfonts?key=' | |
# key = 'YOUR-API-KEY' | |
# data = pd.read_json(url+key, orient='') | |
# option 2. use JSON already downloaded (replace with your own file path) | |
df = pd.read_json('../../input/fonts-master.json') | |
# df.head() | |
# flatten the JSON hierarchy (easier to handle this way) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# select only the columns we need | |
cols = ['family', 'variants', 'subsets', 'category'] | |
df = df[cols] | |
# df.head(5) | |
# Remove any space from family string so that it matchs with file name convention. | |
df.family = [name.replace(' ', '') for name in df.family] | |
df.head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mlb = MultiLabelBinarizer() | |
# one-hot encoding + prefix | |
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('variants')), | |
columns=[x for x in mlb.classes_], | |
index=df.index)) | |
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('subsets')), | |
columns=['subsets_' + x for x in mlb.classes_], | |
index=df.index)) | |
df = df.join(pd.get_dummies(df['category'], prefix="category")).drop(['category'], axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
col_names = { | |
"100": "thin", | |
"100italic": "thinitalic", | |
"200": "extralight", | |
"200italic": "extralightitalic", | |
"300": "light", | |
"300italic": "lightitalic", | |
"400": "regular", | |
"regular": "regular", | |
"400italic": "italic", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# modules we need this time | |
import matplotlib.pyplot as plt | |
from PIL import Image,ImageDraw,ImageFont | |
import pandas as pd | |
import numpy as np | |
# load the CSV we created in the last post | |
df = pd.read_csv('./google-fonts-annotaion.csv') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('Number of fonts in each variant:') | |
print(df[df.filter(regex=r'^variants_', axis=1).columns].eq(1).sum().sort_values(ascending=False)) | |
print('\nNumber of fonts in each subset:') | |
print(df[df.filter(regex=r'^subsets_', axis=1).columns].eq(1).sum().sort_values(ascending=False)) | |
print('\nNumber of fonts in each category:') | |
print(df[df.filter(regex=r'^category_', axis=1).columns].eq(1).sum().sort_values(ascending=False)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mask = (df.filter(regex='thin', axis=1).sum(axis=1).astype(bool) & | |
df.filter(regex='regular', axis=1).sum(axis=1).astype(bool) & | |
df.filter(regex='thai', axis=1).sum(axis=1).astype(bool)) | |
df_selected = df[mask] | |
df_selected |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mask = (df.filter(regex='chinese', axis=1).sum(axis=1).astype(bool) & | |
df.filter(regex='sans-serif', axis=1).sum(axis=1).astype(bool)) | |
df_selected = df[mask] | |
df_selected |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
regex_filters = ['_regular', '_japanese', '_serif'] | |
df_new = pd.concat([df.filter(regex=regex, axis=1).sum(axis=1).astype(bool) for regex in regex_filters], axis=1) | |
mask = df_new.all(axis=1) | |
df.loc[mask] |
OlderNewer