This is an old revision of the document!
Table of Contents
Metody eksploracji danych
Laboratorium 1
Zbiory danych
http://home.agh.edu.pl/~pszwed/med/regression/xy-001.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-002.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-003.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-004.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-005.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-006.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-007.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-008.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-009.arff http://home.agh.edu.pl/~pszwed/med/regression/xy-010.arff
Zbiory danych
# % matplotlib notebook import matplotlib.pyplot as plt import numpy as np from scipy import stats from io import StringIO data = """ 0.246939,9.011391 0.895519,8.950505 0.971588,9.671047 1.188316,14.735488 1.741884,10.265625 2.196002,13.501097 2.637403,13.887849 2.788188,17.180626 3.50202,19.321529 3.531476,16.089503 3.995073,14.937624 4.66407,18.389229 4.88705,22.798099 5.644447,25.351644 6.537993,26.94125 6.654565,20.387372 6.981497,21.255345 7.099548,27.745566 7.453511,22.811026 8.145089,22.388721 8.865577,28.027686 8.983554,25.146826 9.939295,34.305519 10.132365,33.500249 10.804992,35.683783 10.956247,33.300984 11.135499,30.047647 12.080814,35.787975 12.647943,40.051468 12.741248,35.344707 13.37512,33.765994 13.699004,38.776812 13.795843,37.091575 14.474034,38.114638 15.033079,45.589492 15.592465,40.631264 16.192292,48.739644 16.65253,48.830867 16.832643,50.325774 17.577283,45.206373 17.853024,53.339617 18.763727,48.279457 19.045983,55.953631 19.26704,50.470961 19.537664,51.928816 19.987968,49.376176 20.603744,52.380207 21.373748,61.677885 22.242239,57.668626 22.710625,56.161207 23.706639,65.423664 23.991602,60.008664 24.26953,61.870482 24.899023,66.002296 25.110234,70.342272 25.960459,68.472507 26.712467,72.751912 26.730612,73.491421 26.832166,73.970975 27.234095,74.192411 27.263899,67.012532 28.186481,71.667746 28.609198,70.126676 29.00642,71.713675 29.344191,78.395062 29.652469,72.427284 30.009633,80.863626 30.176943,73.542808 30.616236,80.771572 30.89822,80.967313 31.684718,77.600474 32.564911,79.397491 33.03557,80.840347 33.132668,83.721291 33.820815,88.970661 34.109682,89.530881 34.661445,93.863877 35.162583,89.337648 35.432228,87.968034 36.122985,90.976234 36.532793,91.573681 37.008879,97.673479 37.712701,92.459677 38.486883,94.410451 38.99117,104.222796 39.663589,105.80185 40.241739,101.458148 40.519365,105.707817 40.910886,103.881927 40.998451,99.379055 41.420003,105.555433 42.057595,103.837871 42.374651,107.950421 43.164247,106.545838 44.086193,107.721106 44.137013,110.703987 44.41119,107.948371 45.148115,115.274231 45.845917,112.39734 46.47258,115.871904 """ inp = StringIO(data) x, y = np.loadtxt(inp, delimiter=',', usecols=(0, 1), unpack=True,skiprows=6) plt.scatter(x,y,s=80, marker='+') #plot function fx=np.linspace(-10,60,100) fy=2.3702*fx+6.1973 ftrue=2.37*fx+7 plt.plot(fx,fy,linewidth=2,color='r') plt.plot(fx,ftrue,linewidth=1,linestyle='--',color='g') plt.xlim(-10,60) plt.grid(True) plt.xlabel('X') plt.ylabel('Y') r = stats.pearsonr(x, y)[0] plt.title('Regression $f_{true} = 2.37x+7$ r=' + str(r)) plt.show()