Ekspolarcja Danych: Laboratorium 5

Zbiory danych

  • egzamin-cpp.csv Zbiór zawiera oceny studentów: zaliczenie z języka C, zaliczenie z języka C++ (I termin) oraz wyniki egzaminu z C++ (I termin)
  • egzamin-cpp-train.csv około 50% zbioru egzamin-cpp.csv (stratyfikowane)
  • egzamin-cpp-test.csv pozostałe 50% z zamazaną informacją o wynikach
  • grid.arff Wszystkie wariacje ocen

Weka

Konwersja plików

  java -cp /opt/weka/weka.jar weka.core.converters.CSVLoader egzamin-cpp.csv ...

Kod do punktu 5.7

import numpy as np
import pandas as pd
from io import StringIO
import datetime
 
data = """ImieNazwisko;OcenaC;DataC;OcenaCpp;Egzamin
Dqhoil Dhxpluj;3.5;2016-01-14;4;3
Bhnhgpxj Lwjmq;4.5;2016-01-14;4;3
Hewicg Cirxgqnvfog;5;2016-01-14;3.5;4.5
Ffkhrs Xkxgylf;3.5;2016-01-14;3;2
"""
inp =  StringIO(data)
 
df = pd.read_csv(inp,sep=';', parse_dates=['DataC'])
df.head()
df2 = df.drop(columns=['ImieNazwisko'])
df2.DataC=pd.to_numeric(df2.DataC) 
df2.head()
df2.loc[df2['Egzamin'] > 2, 'zdal'] = 1
df2.loc[df2['Egzamin'] <= 2, 'zdal'] = 0
df2.head(len(df2))
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 123)
clf = LogisticRegression()
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
date_col = scaler.fit_transform(X[:,1].reshape(-1,1))
X[:,1]=date_col.reshape(-1)
probs = clf.predict_proba(X_test)
print(probs)
def print_formula(weights,intercept,labels,target):
  print(f'{target} = ')
  for i in range(len(weights)):
    print(f'\t{weights[i]: .3g}\t* {labels[i]} +')
  print(f'\t{intercept:.8}')
 
print_formula(clf.coef_[0],clf.intercept_[0],df3.columns,'log(odds zdal)')

Kod do punktu 5.8

import tensorflow as tf
from keras import layers
from keras import models
from keras.models import Sequential,InputLayer
from keras.layers import Dense
 
tf.random.set_seed(123)
model = models.Sequential()
model.add(layers.InputLayer(input_shape=(X.shape[1],)))
model.add(layers.Dense(1,activation='sigmoid'))
model.summary()
model.compile(optimizer=tf.keras.optimizers.RMSprop(???), loss='binary_crossentropy', metrics=['accuracy'])
hist = model.fit(X_train,y_train,epochs=???,verbose=1)
probs = model.predict(X_test)
y_pred=...
print(classification_report(y_test, y_pred))
weights = model.layers[0].get_weights()[0]
bias = model.layers[0].get_weights()[1]
print(weights)
print(bias)
print_formula(weights[:,0],bias[0],df3.columns,'log(odds zdal)')
med/lab_5ed.txt · Last modified: 2022/03/30 02:55 by pszwed
CC Attribution-Share Alike 4.0 International
Driven by DokuWiki Recent changes RSS feed Valid CSS Valid XHTML 1.0