%% % Reading Data and Knn imputation to fill missing data

training_table=readtable('equip_failures_training_set.csv','TreatAsEmpty','na') training_table(:,1)=[]; training_data=table2array(training_table) imputed_training=knnimpute(training_data)

%% % Determination of columns of histogram sensors

hist_column_numbers=[8:17,34:43,44:53,54:63,101:110,115:124,160:169] hist_columns=imputed_training(:,hist_column_numbers)

%% % Feature transformation (Calculating means of each histogram sensor variables % for each row)

mean_hist=zeros(60000,7) k=1:10 for i=1:7 mean_hist(:,i)=mean(hist_columns(:,k.*i),2) end %% % Merging old imputed training data and histogram mean data

imputed_training(:,hist_column_numbers)=[]; imputed_training=[imputed_training mean_hist];

%% % Model Selection

% Due to target we have a classification problem. I tried boosted trees, % random forest, classification trees, LDA and SVM. Random forest gave the % best result in terms of mean F1 rate. Therefore, I tried to change some % parameters of the model to see if I can get better results. Random fores

%% % Test Data import and Missing filling data using knn imputation

test_data=readtable('equip_failures_test_set.csv','TreatAsEmpty','na'); test_data(:,1)=[]; test_array=table2array(test_data); imputed_test=knnimpute(test_array); %% % Determination of columns of histogram sensors for test data

hist_column_numbers_test=[8:17,34:43,44:53,54:63,101:110,115:124,160:169]-1 hist_column_test=imputed_test(:,hist_column_numbers_test) %% % Feature transformation (Calculating means of each histogram sensor variables % for each row)

mean_hist_test=zeros(16001,7) k=1:10 for i=1:7 mean_hist_test(:,i)=mean(hist_column_test(:,k.*i),2) end %% % Merging old imputed training data and histogram mean data

imputed_test(:,hist_column_numbers_test)=[]; imputed_test=[imputed_test mean_hist_test]; %% % Predictions on test data

boost30pred=gentle_boost30.predictFcn(imputed_test); random_forest_pred=random_forest.predictFcn(imputed_test); rusboosted_pred=rusboosted.predictFcn(imputed_test); rand_for_40pred=randomforest_40learners.predictFcn(imputed_test); logit_pred=logitboost.predictFcn(imputed_test); rand_40k=rand_for_40k.predictFcn(imputed_test);

%% function mf1 = meanF1(tp,fp,fn)
p=tp/(tp+fp); r=tp/(tp+fn); mf1=2*p*r/(p+r); end

Built With

Share this project:

Updates