-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCosine.cpp
More file actions
125 lines (99 loc) · 3.5 KB
/
Cosine.cpp
File metadata and controls
125 lines (99 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <bits/stdc++.h>
using namespace std;
/* ----- Funtion to Read the data from Input File and Convert it in to string ----- */
string readFileIntoString(const string &path){
auto readString = ostringstream{};
ifstream input_file(path);
if (!input_file.is_open()){
cerr << "Could not open the file - '" << path << "'" << endl;
exit(EXIT_FAILURE);
}
readString << input_file.rdbuf();
return readString.str();
}
/* ----- Function to Token Vector of String ----- */
vector<string> ConverstStringTovectorToken(string InputString){
vector<string> TokenVector;
istringstream ireadString(InputString);
string s;
while (ireadString >> s){
TokenVector.push_back(s);
}
return TokenVector;
}
/* ----- Function to Create Set of Tokens in Input String ----- */
set<string> ConverstStringToSetTokens(string InputString, string InputString2){
set<string> TokenSet;
istringstream ireadString(InputString);
string str;
while (ireadString >> str){
TokenSet.insert(str);
}
istringstream ireadString2(InputString2);
string str2;
while (ireadString2 >> str2){
TokenSet.insert(str2);
}
return TokenSet;
}
/* ----- Function to Count the Frequency of Terms of Input string ----- */
map<string, int> CountTermFrequency(vector<string> input, set<string> setToken){
map<string, int> Term;
for (int i = 0; i < input.size(); i++) {
Term[input[i]]++;
}
set<string, greater<string>>::iterator itr;
for (itr = setToken.begin(); itr != setToken.end(); itr++) {
string check = *itr;
if (Term.find(check) == Term.end()){
Term.insert(pair<string, int>(check, 0));
}
}
return Term;
}
/* ----- Function for fing Dot Product of Two Vectors ----- */
int dotProduct(vector<int> v1, vector<int> v2){
int product = 0;
for (int i = 0; i < v1.size(); ++i){
product += v1[i] * v2[i];
}
return product;
}
double magnitude(vector<int> v){
return std::sqrt(dotProduct(v, v));
}
void writeResult(const char *filename, double result){
std::ofstream file(filename);
file << "The Cosine Similarity In Document1 and Document2 is :- " << result << std::endl;
file.close();
}
void CosineSimilarity(map<string, int> M1, map<string, int> M2){
vector<int> v1;
vector<int> v2;
map<string, int>::iterator itr;
for (itr = M1.begin(); itr != M1.end(); itr++){
v1.push_back(itr->second);
}
for (itr = M2.begin(); itr != M2.end(); itr++){
v2.push_back(itr->second);
}
int DotProduct = dotProduct(v1, v2);
double Magnitude = magnitude(v1) * magnitude(v2);
double cosine = DotProduct / Magnitude;
writeResult("Sample-Output.txt", cosine);
}
/* ----- Main Function of the Program ----- */
int main(int argc, char const *argv[]){
string Input1("Sample-Input1.txt");
string Input2("Sample-Input2.txt");
string ReadInput1 = readFileIntoString(Input1);
string ReadInput2 = readFileIntoString(Input2);
vector<string> vectorReadInput1 = ConverstStringTovectorToken(ReadInput1);
vector<string> vectorReadInput2 = ConverstStringTovectorToken(ReadInput2);
set<string> setToken = ConverstStringToSetTokens(ReadInput1, ReadInput2);
map<string, int> Input1Frequency = CountTermFrequency(vectorReadInput1, setToken);
map<string, int> Input2Frequency = CountTermFrequency(vectorReadInput2, setToken);
CosineSimilarity(Input1Frequency, Input2Frequency);
exit(EXIT_SUCCESS);
return 0;
}