-
Notifications
You must be signed in to change notification settings - Fork 0
/
naive_bayes.h
134 lines (108 loc) · 2.92 KB
/
naive_bayes.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// SWAMI KARUPPASWAMI THUNNAI
#pragma once
#define DEBUG 1
#define NODEBUG 0
#define PI 3.141592653589793238462643383279502884L
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
#include <set>
#include <map>
#include "json.h"
#include <future>
#include <mutex>
#include <algorithm>
#include "dataioclass.h"
#include "itrainpredict.h"
using json = nlohmann::json;
/*
A class to store mean and variance. A datastructure to ease the math in gaussian_naive_bayes class.
*/
class mean_variance
{
private:
unsigned long int column;
double _mean;
double variance;
public:
mean_variance(unsigned long int column, double _mean, double variance) : column(column), _mean(_mean), variance(variance) {}
unsigned long int get_column()
{
return column;
}
double get_mean()
{
return _mean;
}
double get_variance()
{
return variance;
}
};
/*
Similar class of scikit-learn's GaussianNB
Written By: Visweswaran N on 2019-09-02
Edited By: https://github.com/JUNZ1
*/
class gaussian_naive_bayes : public DataIOClass, public ITrainPredict
{
private:
// Independent variable
std::vector<std::vector<double>> X ={};
// Dependent variable
std::vector<unsigned long int> y = {};
// Verbose to print debug messages
unsigned short verbose;
// Independent variable probabilities <features, probability>
std::map<unsigned long int, std::map<double, double>> X_prob;
// Dependent variable probabilities
std::map<unsigned long int, double> y_prob;
// Labels of y
std::set<unsigned long int> labels;
// label, column, mean and variance
std::map<unsigned long int, std::vector<mean_variance>> mean_variance_map;
private:
void print(std::string message);
void calculate_y_probabilities();
void calculate_x_probabilities();
public:
/*
Constructor to be used only when loading the existing model
*/
gaussian_naive_bayes(unsigned short verbose): verbose(verbose) {}
/*
Constructor to be used when creating a new model
*/
gaussian_naive_bayes(std::vector<std::vector<double>> X, std::vector<unsigned long int> y, unsigned short verbose): X(X), y(y), verbose(verbose){}
gaussian_naive_bayes();
gaussian_naive_bayes(const gaussian_naive_bayes& copyFromThis);
gaussian_naive_bayes(gaussian_naive_bayes&& moveFromThis);
virtual ~gaussian_naive_bayes(){};
gaussian_naive_bayes& operator=(const gaussian_naive_bayes& copyFromThis);
gaussian_naive_bayes& operator=(gaussian_naive_bayes&& moveFromThis);
/*
For fitting the model
*/
void fit();
/*
For predicting output
*/
std::map<unsigned long int, double> predict(std::vector<double> X_test);
/*
Used to save the model
*/
void save_model(std::string model_name);
/*
Used to load the saved model
*/
void load_model(std::string model_name);
public: //Overrited Interfaces
void Train() override;
std::vector<double> Predict(std::vector<double>) override;
private:
std::mutex _trainingMutex;
std::future<void> _trainFuture;
};