In this notebook, I will explore the CelebA dataset.
In [1]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
dir_anno = "data/Anno-20180622T163917Z-001/Anno/"
dir_data = "data/img_align_celeba/"
Let's take a look at the available labels/annotations¶
In [2]:
ls $dir_anno
load annotations
In [8]:
def get_annotation(fnmtxt, verbose=True):
if verbose:
print("_"*70)
print(fnmtxt)
rfile = open( dir_anno + fnmtxt , 'r' )
texts = rfile.read().split("\r\n")
rfile.close()
columns = np.array(texts[1].split(" "))
columns = columns[columns != ""]
df = []
for txt in texts[2:]:
txt = np.array(txt.split(" "))
txt = txt[txt!= ""]
df.append(txt)
df = pd.DataFrame(df)
if df.shape[1] == len(columns) + 1:
columns = ["image_id"]+ list(columns)
df.columns = columns
df = df.dropna()
if verbose:
print(" Total number of annotations {}\n".format(df.shape))
print(df.head())
## cast to integer
for nm in df.columns:
if nm != "image_id":
df[nm] = pd.to_numeric(df[nm],downcast="integer")
return(df)
attr = get_annotation("list_attr_celeba.txt")
align = get_annotation("list_landmarks_align_celeba.txt")
assert np.all(align["image_id"] == attr["image_id"])
Plot facial images with landmarks¶
In [4]:
def plot_image(align,nrow=2):
figsize = (20,10)
ncol = 5
fig = plt.figure(figsize=figsize)
N = nrow*ncol
for i, myid in enumerate(align["image_id"][:N]):
image = load_img(dir_data + "/" + myid)
image = img_to_array(image)/255.0
(_,
lefteye_x, lefteye_y,
righteye_x, righteye_y,
nose_x, nose_y,
leftmouth_x, leftmouth_y,
rightmouth_x, rightmouth_y) = align.iloc[i]
ax = fig.add_subplot(nrow,ncol,i+1)
ax.imshow(image)
ax.set_title(image.shape)
ax.scatter(lefteye_x, lefteye_y)
ax.scatter(righteye_x, righteye_y)
ax.scatter(nose_x, nose_y)
ax.scatter(leftmouth_x, leftmouth_y)
ax.scatter(rightmouth_x, rightmouth_y)
plot_image(align)
Plot all the (x,y) coordiantes of landmarks¶
In [5]:
landmarks = ["lefteye","righteye","nose","leftmouth","rightmouth"]
plt.figure(figsize=(10,10))
for lmark in landmarks:
plt.scatter(align[lmark + "_x"],
align[lmark + "_y"],
alpha=0.3,label=lmark)
plt.legend()
plt.gca().invert_yaxis()
plt.show()
Plot the distribution of attributes¶
In [6]:
for colnm in attr.columns:
if colnm != "image_id":
print(" {:20} {:5.2f}%".format(
colnm,100*np.mean(attr[colnm] == 1)))
Plot the celebs with specific attributes¶
In [7]:
for attrnm in ["Bald","Bangs", "Male","No_Beard","Pointy_Nose","Wearing_Earrings","Smiling","No_Beard"]:
print(attrnm)
plot_image(align.loc[attr[attrnm] == 1,:],nrow=1)
plt.show()