메뉴 건너뛰기

데이터과학

4월 21일 수업자료 수정하여 올려드립니다~

 

#features에 Deck추가

features = [

    'Pclass',

    'Sex',

    'Age',

    'Fare',

    'Embarked',

    'Deck'

]

 

#df 에 Deck 추가

df['Deck'] = df['Cabin'].str[0]

#df에 null값을 'U'(unknown)추가

df['Deck'] = df['Deck'].fillna('U')

#자동인코딩

df['Deck'] = le.fit_transform(df['Deck'])

 

X = df[features]

y = df['Survived']

 

X_train, X_valid, y_train, y_valid = train_test_split(

    X, y,

    test_size=0.2,

    random_state=42

)

 

model = RandomForestClassifier(random_state=42)

 

model.fit(X_train, y_train)

 

pred = model.predict(X_valid)

 

print(

    "Deck = U ACC:",

    accuracy_score(y_valid, pred)

)

 

 

 

#Pclass 기반 결측치 채우기

df['Deck'] = df['Cabin'].str[0]

 

deck_map = df.groupby('Pclass')['Deck'] \

             .agg(lambda x: x.mode()[0])

 

print(deck_map)

 

def fill_deck(row):

    if pd.isnull(row['Deck']):

        return deck_map[row['Pclass']]

    return row['Deck']

 

df['Deck'] = df.apply(fill_deck, axis=1)

 

df['Deck'] = le.fit_transform(df['Deck'])

 

X = df[features]

y = df['Survived']

 

X_train, X_valid, y_train, y_valid = train_test_split(

    X, y,

    test_size=0.2,

    random_state=42

)

 

model = RandomForestClassifier(random_state=42)

 

model.fit(X_train, y_train)

 

pred = model.predict(X_valid)

 

print(

    "Deck = Pclass mode ACC:",

    accuracy_score(y_valid, pred)

)

제목 날짜
태그 목록
위로