In [None]:

from pandas import *
from numpy import *
set_printoptions(legacy = "1.25")

mnist = read_csv("mnist.csv").to_numpy()

dataset = mnist[:,1:]
labels = mnist[:,0]

Q = cov(dataset.T)
totvar = Q.trace()


In [None]:

from scipy.linalg import eigh

# use eigh for symmetric matrices
lamda, U = eigh(Q)

# sort in ascending order then reverse
sorted = sort(lamda)[::-1]
percent = sorted*100/totvar

# cumulative sums
sums = cumsum(percent)

data = array([percent,sums])
print(data.T[:20].round(decimals = 3))

d = len(lamda)
from matplotlib.pyplot import *

stairs(percent,range(d+1))

grid()
show()


In [None]:

from scipy.linalg import eigh

# projection matrix onto top n 
# eigenvectors of variance
# of dataset

def pca(dataset,n):
	Q = cov(dataset.T)
	# columns of U are
	# eigenvectors of Q
	lamda, U = eigh(Q)
	# decreasing eigenvalue sort
	order = lamda.argsort()[::-1]   
	# sorted top n columns of U
	# are cols of Uproj
	#  U is dxd Uproj is dxn
	Uproj = U[:,order[:n]]
	P = dot(Uproj,Uproj.T)
	return P


In [None]:

from scipy.linalg import svd
	
# projection matrix onto top n 
# eigenvectors of variance
# of dataset
	
def pca_with_svd(dataset,n):
	# center dataset
	mu = mean(dataset,axis = 0)
	vectors = dataset - mu
	# rows of V are
	# right singular vectors
	V = svd(vectors)[2]
	# no need to sort, already decreasing order   
	Uproj = V[:n].T # top n rows as columns
	P = dot(Uproj,Uproj.T)
	return P


In [None]:

def display_image(v,row,col,i):
	A = reshape(v,(28,28))
	fig.add_subplot(row, col,i)
	xticks([])
	yticks([])
	imshow(A,cmap = "gray_r")


In [None]:

fig = figure(figsize=(10,5))
row, col = 2, 4
	
v = dataset[1] # second image
display_image(v,row,col,1)
	
for i,n in enumerate([784,600,350,150,50,10,1],start=2):
	# either will work
	P = pca(dataset,n)
	#P = pca_with_svd(dataset[:100],n)
	projv = dot(P,v)
	display_image(projv,row,col,i)


In [None]:

from sklearn.decomposition import PCA

N = len(dataset)
n = 10
engine = PCA(n_components = n)


In [None]:

reduced = engine.fit_transform(dataset)	
reduced.shape


In [None]:

projected = engine.inverse_transform(reduced)
projected.shape


In [None]:

from matplotlib.pyplot import *

fig = figure(figsize=(10,5))
row, col = 2, 4

v = dataset[1] # second image
display_image(v,row,col,1)

for i,n in enumerate([784,600,350,150,50,10,3],start=2):
	engine = PCA(n_components = n)
	reduced = engine.fit_transform(dataset)
	projected = engine.inverse_transform(reduced)
	projv = projected[1] # second image
	display_image(projv,row,col,i)


In [None]:

from scipy.spatial import ConvexHull

Colors = ('blue', 'red', 'green', 'orange', 'gray','cyan','turquoise', 'black', 'orchid', 'brown')

for i,color in enumerate(Colors):
	points = reduced[labels==i,:]
	scatter(points[:,0], points[:,1],label = i, edgecolor = 'black')
	#hull = ConvexHull(points)
	#for simplex in hull.simplices:
	#plot(points[simplex, 0], points[simplex, 1], '-',c = color)

grid()
legend(loc = 'upper right')
show()


In [None]:

%matplotlib ipympl
from matplotlib.pyplot import *

ax = axes(projection="3d")

Colors = ('blue', 'green', 'black', 'brown', 'gray','cyan' , 'turquoise', 'orange', 'orchid', 'red')

for i,color in enumerate(Colors): 
	ax.scatter(reduced[labels==i,0], reduced[labels==i,1], reduced[labels==i,2], label = i, c = color, edgecolor = 'black')

ax.set_aspect("equal")
ax.set_axis_off()

legend(loc = 'upper right')
show()


In [None]:


from sklearn import datasets

iris = datasets.load_iris()
dataset = iris["data"]
labels = iris["target"]

engine = PCA(n_components = n)
reduced = engine.fit_transform(dataset)

# close previous plots
close("all")

Colors = ['blue', 'red', 'green']
Classes = ["Iris-setosa", "Iris-virginica", "Iris-versicolor"]
Labels = [0,1,2]

for Label, Class, Color in zip(Labels,Classes,Colors):
	scatter(reduced[labels==Label,0], reduced[labels==Label,1], label = Class, c = Color, edgecolor = 'black')

grid()
legend(loc = 'upper right')
show()
