import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import plot_confusion_matrix
movies = pd.read_csv(“Movie_classification.csv”)
Discretizing the data
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
Genre_n=le.fit_transform(movies[‘Genre’])
d=[]
a=-1
for i in movies[‘3D_available’]:
if i==”YES”:
a=1
d.append(a)
elif i==”NO”:
a=0
d.append(a)
movies[‘3d_available’]=d
movies[‘Genre_n’]=Genre_n.tolist()
movies.drop(labels=[‘3D_available’,’Genre’,’Time_taken’],axis=1,inplace=True)
movies
Take out the label Start_Tech_Oscar
We are trying to predict Start_Tech_Oscar
X=movies.loc[:,movies.columns!=’Start_Tech_Oscar’]
y=movies[‘Start_Tech_Oscar’]
Splitting Data into test and train data sets
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20) # 80% training and 20% test
Traning Support Vector Machine
Creating a SVM Classifier with linear kernel
clf=SVC(kernel=’linear’)
clf.fit(X_train,y_train)
y_test_pred = clf.predict(X_test)
y_train_pred = clf.predict(X_train)
Accuracy:",metrics.accuracy_score(y_test, y_test_pred))
Accuracy: 0.5980392156862745
print("Precision:",metrics.precision_score(y_test, y_test_pred))
Precision: 0.6538461538461539
print("Recall:",metrics.recall_score(y_test, y_test_pred))
Recall: 0.5964912280701754
#Confusion matrix for linear kernel
sns.heatmap(confusion_matrix(y_test, y_test_pred,normalize='true'), annot =True)
Traning Support Vector Machine
Creating a SVM Classifier with polynomial kernel
clf_poly=SVC(kernel=’poly’)
clf_poly.fit(X_train,y_train)
y_test_pred1 = clf_poly.predict(X_test)
y_train_pred1 = clf_poly.predict(X_train)
print("Accuracy:",metrics.accuracy_score(y_test, y_test_pred1))
Accuracy: 0.5686274509803921
print("Precision:",metrics.precision_score(y_test, y_test_pred1))
Precision: 0.5686274509803921
print("Recall:",metrics.recall_score(y_test, y_test_pred1))
Recall: 1.0
#Confusion matrix for polynomial kernel
sns.heatmap(confusion_matrix(y_test, y_test_pred1), annot =True)
Traning Support Vector Machine
Creating a SVM Classifier with Sigmoid kernel
clf_sigmoid=SVC(kernel=’sigmoid’)
clf_sigmoid.fit(X_train,y_train)
y_test_pred2 = clf_sigmoid.predict(X_test)
y_train_pred2 = clf_sigmoid.predict(X_train)
print("Accuracy:",metrics.accuracy_score(y_test, y_test_pred2))
Accuracy: 0.506578947368421
print("Precision:",metrics.precision_score(y_test, y_test_pred2))
Precision: 0.506578947368421
print("Recall:",metrics.recall_score(y_test, y_test_pred2))
Recall: 1.0
Confusion matrix for sigmoid kernel
sns.heatmap(confusion_matrix(y_test, y_test_pred2), annot =True)
OUTPUT: LINK
