# import the library for data manipulation import pandas as pd # import the data set. Change the path to the location of your data file # the path to my Mac # bm = pd.read_excel (r'/Users/cyu/Documents/body_measurements.xlsx') # the path to my Windows bm = pd.read_excel (r'G:\JUMP\553\Lecture_PowerPoint\Unit_09_PCA\Python_files\body_measurements.xlsx') # preview the datafile to verify print(bm) # import the library for PCA from sklearn.decomposition import PCA # Run PCA analysis pca = PCA().fit(bm) # import the libraries for numeric manipulation and data visualization import numpy as np import matplotlib.pyplot as plt # Plot the cumulative variance explained plt.figure() plt.plot(np.cumsum(pca.explained_variance_ratio_)) plt.xlabel('Number of Components') plt.ylabel('Variance (%)') #for each component plt.title('Variance explained') plt.show() # Fit a 4-component model pca = PCA(n_components=4) pca.fit(bm) # Create a heatmap to see the component loadings of items import seaborn as sns heatmap = pd.DataFrame(pca.components_) x_axis_labels = ["Mass","Fore","Bicep","Chest","Neck","Shoulder","Waist","Height","Calf","Thigh","Head"] sns.heatmap(heatmap, cmap='twilight', annot=True, xticklabels=x_axis_labels)