import pandas as pd # import SAS file, your file location may be different df = pd.read_sas(r'c:\temp\diabetes.sas7bdat') # import Excel file df = pd.read_excel(r'c:\temp\Diabetes.xlsx', sheet_name='Diabetes') print(df) # define the target variable (dependent variable) as y y = df.Y_Binary # import the library for partitioning the data into training and testing subsets from sklearn.model_selection import train_test_split # split the data into subsets X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.3) # import support vector machine from sklearn import svm clf = svm.SVC(kernel='linear') # propose a model using the training data set clf.fit(X_train, y_train) # Predict the outcome for the test dataset y_pred = clf.predict(X_test) # Import scikit-learn metrics module for accuracy evaluation from sklearn import metrics # Evalute odel Accuracy: What is the hit rate? print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) # Evalute model Precision: What percentage of positive are correctly identified? print("Precision:",metrics.precision_score(y_test, y_pred)) # Evalute model Recall print("Recall:",metrics.recall_score(y_test, y_pred))