4월 21일 수업자료
4월 21일 수업자료 수정하여 올려드립니다~
#features에 Deck추가
features = [
'Pclass',
'Sex',
'Age',
'Fare',
'Embarked',
'Deck'
]
#df 에 Deck 추가
df['Deck'] = df['Cabin'].str[0]
#df에 null값을 'U'(unknown)추가
df['Deck'] = df['Deck'].fillna('U')
#자동인코딩
df['Deck'] = le.fit_transform(df['Deck'])
X = df[features]
y = df['Survived']
X_train, X_valid, y_train, y_valid = train_test_split(
X, y,
test_size=0.2,
random_state=42
)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
pred = model.predict(X_valid)
print(
"Deck = U ACC:",
accuracy_score(y_valid, pred)
)
#Pclass 기반 결측치 채우기
df['Deck'] = df['Cabin'].str[0]
deck_map = df.groupby('Pclass')['Deck'] \
.agg(lambda x: x.mode()[0])
print(deck_map)
def fill_deck(row):
if pd.isnull(row['Deck']):
return deck_map[row['Pclass']]
return row['Deck']
df['Deck'] = df.apply(fill_deck, axis=1)
df['Deck'] = le.fit_transform(df['Deck'])
X = df[features]
y = df['Survived']
X_train, X_valid, y_train, y_valid = train_test_split(
X, y,
test_size=0.2,
random_state=42
)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
pred = model.predict(X_valid)
print(
"Deck = Pclass mode ACC:",
accuracy_score(y_valid, pred)
)