rubus0304 님의 블로그
[머신러닝 심화 2주차] 본문
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier,plot_tree
titaninc_df = pd.read_csv('C:/Users/82106/Desktop/데이터분석/강의/6.파이썬/4.머신러닝/ML/titanic/train.csv')
titaninc_df.info()
#Pclass: LabelEncoder
#Sex: LabelEncoder
#Age: 결측치-> 평균으로 대치하고
le = LabelEncoder()
titaninc_df['Sex'] = le.fit_transform(titaninc_df['Sex'])
le2 = LabelEncoder()
titaninc_df['Pclass'] = le2.fit_transform(titaninc_df['Pclass'])
age_mean = titaninc_df['Age'].mean()
titaninc_df['Age'] = titaninc_df['Age'].fillna(age_mean)
le3 = LabelEncoder()
titaninc_df['Embarked'] = titaninc_df['Embarked'].fillna('S')
titaninc_df['Embarked'] = le3.fit_transform(titaninc_df['Embarked'])
X = titaninc_df[X_features]
y = titaninc_df['Survived']
model_dt = DecisionTreeClassifier(max_depth = 1)
model_dt.fit(X,y)
plt.figure(figsize = (10,5))
plot_tree(model_dt, feature_names=X_features, class_names=['Not Survived','Survived'], filled= True)
plt.show()
저 DecisionTreeClassifier 옆에 (max_depth = ) 숫자가 몇 개 보일지 정하는 것!!
max_depth = 1 일 때

( ) 아예 저 괄호 안을 비워놨을 때

# 로지스틱회귀, 의사결정나무, 랜덤포레스트
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,f1_score
model_lor = LogisticRegression()
model_dt = DecisionTreeClassifier(random_state=42)
model_rf = RandomForestClassifier(random_state=42)
model_lor.fit(X,y)
model_dt.fit(X,y)
model_rf.fit(X,y)
y_lor_pred = model_lor.predict(X)
y_dt_pred = model_dt.predict(X)
y_rf_pred = model_rf.predict(X)
def get_score(model_name, y_true, y_pred):
acc = accuracy_score(y_true, y_pred).round(3)
f1 = f1_score(y_true,y_pred).round(3)
print(model_name, 'acc 스코어는: ',acc, 'f1_score는: ', f1)
get_score('lor',y,y_lor_pred)
get_score('dt ',y,y_dt_pred)
get_score('rf ',y,y_rf_pred)
X_features
model_rf.feature_importances_
# 최근접 이웃, 부스팅 모델 적용
# !pip install xgboost
# !pip install lightgbm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
model_knn = KNeighborsClassifier()
model_gbm = GradientBoostingClassifier(random_state= 42)
model_xgb = XGBClassifier(random_state= 42)
model_lgb = LGBMClassifier(random_state= 42)
model_knn.fit(X,y)
model_gbm.fit(X,y)
model_xgb.fit(X,y)
model_lgb.fit(X,y)
y_knn_pred = model_knn.predict(X)
y_gbm_pred = model_gbm.predict(X)
y_xgb_pred = model_xgb.predict(X)
y_lgb_pred = model_lgb.predict(X)
get_score('lor',y,y_lor_pred)
get_score('dt ',y,y_dt_pred)
get_score('rf ',y,y_rf_pred)
get_score('knn',y,y_knn_pred)
get_score('gbm ',y,y_gbm_pred)
get_score('xgb ',y,y_xgb_pred)
get_score('lgb ',y,y_lgb_pred)
acc (accuracy) 정확도 / f1- score : 정밀도
'강의 > 머신러닝' 카테고리의 다른 글
[머신러닝 4주차] 딥러닝 (0) | 2024.11.28 |
---|---|
[머신러닝 심화 3주차] 군집 (1) | 2024.11.27 |
[머신러닝 심화 1주차] (0) | 2024.11.27 |
[기초] 분류분석 - 로지스틱회귀 (1) | 2024.11.20 |
[기초] 3.선형회귀 심화 (0) | 2024.11.19 |