Apply PCA to a DataFrame

In [5]:
import pandas as pd
from pandas import DataFrame
import numpy as np

from sklearn.decomposition import PCA

Create a DataFrame full of random numbers.

In [9]:
df = pd.DataFrame(data=np.random.normal(0, 1, (50, 8)))
df.head()
Out[9]:
0 1 2 3 4 5 6 7
0 -0.314008 -1.090192 -0.920020 -0.559545 0.775226 -0.882474 1.050049 -0.092116
1 -0.145504 -0.297851 0.140186 -0.724637 0.558747 -0.708013 -0.555629 0.653227
2 0.711590 -0.215502 -0.266352 -1.058049 -0.324418 0.929751 0.352552 -0.256500
3 0.625027 -0.725645 0.477974 -1.182262 1.435128 -0.673370 -1.112910 0.354148
4 1.335500 -0.488708 -0.131384 -0.581381 -0.451392 0.149943 -0.250088 -0.011798

Fit the PCA.

In [10]:
pca = PCA(n_components=4)
pca.fit(df)
Out[10]:
PCA(copy=True, iterated_power='auto', n_components=4, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

Apply the transformation and convert the result into a DataFrame.

In [14]:
columns = ['pca_%i' % i for i in range(4)]
df_pca = DataFrame(pca.transform(df), columns=columns, index=df.index)
df_pca.head()
Out[14]:
pca_0 pca_1 pca_2 pca_3
0 -0.546917 0.506951 -1.042971 0.287119
1 -0.054910 -0.217007 -0.539264 0.457777
2 0.908084 -0.113016 -0.602685 -1.009475
3 -0.168224 -1.419395 -1.216016 0.196382
4 0.678731 -0.605830 -0.057639 -0.428901