Given a string variable string 's;', implement a tokenizer to identify the unique tokens contained in this string, identify all the unique tokens and their frequencies, and store such information into a vector. Tokens are sequences of contiguous characters separated by any of the specified delimiters (e.g., white spaces). In this lab, only white spaces will be considered as delimiters. For instance, the string "Hello, what's that thing? " contains four tokens: "Hello", "what's", "that" and "thing?". The frequency of a token is the number of times this token appears in this string. In this example, each token has a frequency of 1. Note that in this lab, these tokens are case insensitive. For example, "Hello" and "hello" are considered to be the same token.
Specifically, you are required to declare a struct TokenFreq that consists of two data members: (1) string token; and (2) int freq; Obviously, an object of this struct will be used to store a specific token and its frequency. For example, the following object word stores the token "dream" and its frequency 100:

TokenFreq word;
word.value="dream";
word.freq=100;

implement the following function, where istr is the input string, and tfVec will be used to store the list of unique and case insensitive tokens and their corresponding frequencies identified within istr. You might find it's very convenient to use stringstream objects to tokenize a string.

void getTokenFreqVec( const string& istr, vector & tfVec)
Assume that the value of istr is

And no, I'm not a walking C++ dictionary. I do not keep every technical detail in my head at all times. If I did that, I would be a much poorer programmer. I do keep the main points straight in my head most of the time, and I do know where to find the details when I need them. by Bjarne Stroustrup
After calling the above function, tfVec is expected to contain the following values (where order of appearances doesn't matter):

size=46 {
[0] = (token = "and", freq = 2)
[1] = (token = "no,", freq = 1)
[2] = (token = "i'm", freq = 1)
[3] = (token = "not", freq = 2)
[4] = (token = "a", freq = 2)
[5] = (token = "walking", freq = 1)
[6] = (token = "c++", freq = 1)
[7] = (token = "dictionary.", freq = 1)
[8] = (token = "i", freq = 6)
[9] = (token = "do", freq = 3)
[10] = (token = "keep", freq = 2)
[11] = (token = "every", freq = 1)
[12] = (token = "technical", freq = 1)
[13] = (token = "detail", freq = 1)
[14] = (token = "in", freq = 2)
[15] = (token = "my", freq = 2)
[16] = (token = "head", freq = 2)
[17] = (token = "at", freq = 1)
[18] = (token = "all", freq = 1)
[19] = (token = "times.", freq = 1)
[20] = (token = "if", freq = 1)
[21] = (token = "did", freq = 1)
[22] = (token = "that,", freq = 1)
[23] = (token = "would", freq = 1)
[24] = (token = "be", freq = 1)
[25] = (token = "much", freq = 1)
[26] = (token = "poorer", freq = 1)
[27] = (token = "programmer.", freq = 1)
[28] = (token = "the", freq = 3)
[29] = (token = "main", freq = 1)
[30] = (token = "points", freq = 1)
[31] = (token = "straight", freq = 1)
[32] = (token = "most", freq = 1)
[33] = (token = "of", freq = 1)
[34] = (token = "time,", freq = 1)
[35] = (token = "know", freq = 1)
[36] = (token = "where", freq = 1)
[37] = (token = "to", freq = 1)
[38] = (token = "find", freq = 1)
[39] = (token = "details", freq = 1)
[40] = (token = "when", freq = 1)
[41] = (token = "need", freq = 1)
[42] = (token = "them.", freq = 1)
[43] = (token = "by", freq = 1)
[44] = (token = "bjarne", freq = 1)
[45] = (token = "stroustrup", freq = 1)
Required:
Implement the selection sort algorithm to sort a vector in ascending order of token frequency. This function has the following prototype:

void selectionSort( vector & tokFreqVector );
//This function receives a vector of TokenFreq objects by reference and applies the selections sort algorithm to sort this vector in increasing order of token frequencies.
Implement the insertion sort algorithm to sort a vector in descending order of token frequency.

Respuesta :

Answer:

#include <iostream>

#include <string>

#include <vector>

#include <cctype>

#include <algorithm>

using namespace std;

void matrixInit( vector< vector<int> >& matrix, int numRows, int numCols);

struct TokenFreq{

string token;

int freq;

};

string spaces(string& istr, string& newstr);

void printVector(vector<TokenFreq> & tfVec);

void getTokenFreqVec( string& istr, vector<TokenFreq> & tfVecVec);

void selectionSort( vector<TokenFreq> & tokFreqVector );  

void insertionSort( vector<TokenFreq> & tokFreqVector );

//main

int main(int argc, const char * argv[]) {

 

vector<vector<int> > matrix;

matrixInit(matrix, 3, 4);

 

string ostr1 = " I do keep the main points straight in my head most of the time, and I do know where to find the details when I need them. by Bjarne Stroustrup";

string ostr2 = " ";

string ostr3 = "Your time is limited, so don’t waste it living someone else’s life.Stay Hungry. Stay Foolish.";

//string ostr = ostr2;

string istr;

 

cout << "Enter the text: ";

getline(cin, istr);

vector<TokenFreq> tfVec;

getTokenFreqVec(istr, tfVec);

selectionSort(tfVec);

insertionSort(tfVec);

return 0;

}

//-----

void selectionSort( vector<TokenFreq> & tokFreqVector ){

TokenFreq temp;

int min;

 

for( int i = 0; i < int(tokFreqVector.size()-1); i++){

min = i;

for (int j = i+1; j <int(tokFreqVector.size()); j++){

if (tokFreqVector[j].freq < tokFreqVector[min].freq)

min = j;

}

if(min!=i)

{

temp = tokFreqVector[i];

tokFreqVector[i] = tokFreqVector[min];

tokFreqVector[min] = temp;

}

}

cout<<"-----selectionSort------------------"<<endl;

printVector(tokFreqVector);

}

void insertionSort( vector<TokenFreq> & tokFreqVector ){

TokenFreq temp;

int max;

 

for( int i = 0; i < int(tokFreqVector.size()-1); i++){

max = i;

for (int j = i+1; j < int(tokFreqVector.size()); j++){

if (tokFreqVector[j].freq > tokFreqVector[max].freq)

max = j;

}

if(max!=i)

{

temp = tokFreqVector[i];

tokFreqVector[i] = tokFreqVector[max];

tokFreqVector[max] = temp;

}

}

cout<<"-----insertionsort------------------"<<endl;

printVector(tokFreqVector);

}

string spaces(string& istr, string & newstr){

int j;

//Check double space and create new string with lowercase

for (int i = 0; i < int(istr.length()); ++i){

istr[i] = tolower(istr[i]);

if (isspace(istr[i])){

//check double space again

j = i+1;

while(true){

if (istr[j] == '\342') istr[j]=' ';

if (istr[j] == '\200') istr[j]=' ';

if (istr[j] == '\224') istr[j]=' ';

if(isspace(istr[j])) j++;

else break;

}

i=j-1;

newstr = newstr + istr[i];

}else

newstr = newstr + istr[i];

}

return newstr;

}

void getTokenFreqVec( string& istr, vector<TokenFreq> & tfVec){

string newstr;

int curr_beg = 0;

int curr_end = 0;

string word;

bool check = false;

int index=0;

 

//call func spaces to get new string

newstr = spaces(istr, newstr);

//cout << newstr << endl;

//cout << endl;

//cout << newstr << "||" << endl;

string tempstr = newstr;

 

if(newstr != " "){

tfVec.push_back(TokenFreq());

}

for (int i = 0; i < int(newstr.length()); ++i){

if(isspace(newstr[i])){

check = false;

 

curr_end = i;

word = tempstr.substr(curr_beg, curr_end - curr_beg);

curr_beg = i + 1;

 

for (int j = 0; j < int(tfVec.size()); j++){

if (tfVec[j].token == word){

tfVec[j].freq++;

check = true;

 

break;

}

}//end-for;

if (check == false && i!=(int(newstr.length())-1)){

//cout <<"**"<< word <<"**"<<word.length()<< endl;

tfVec[index].token=word;

tfVec[index].freq=1;

index++;

tfVec.push_back(TokenFreq());

}

word = "";

//add last value

}

}//end-for

 

if(!isspace(newstr[int(newstr.length())-1])){

word = tempstr.substr(curr_beg, curr_end - (int(newstr.length())-1));

tfVec[index].token=word;

tfVec[index].freq=1;

//tfVec.push_back(TokenFreq());

}

 

printVector(tfVec);

}

void printVector(vector<TokenFreq> & tfVec){

 

cout << "size= {" << tfVec.size() << endl;

for (int i = 0; i < int(tfVec.size()); i++){

cout << " [" << i << "] = (token = \""<< tfVec[i].token <<"\", freq = " << tfVec[i].freq << ")" << endl;

//cout << tfVec[i].token<<endl;;

}

cout << endl;

}

void matrixInit( vector< vector<int> >& matrix, int numRows, int numCols){

int i;

int j;

matrix.resize(numRows, vector<int>(numCols) );

for(i = 0; i< numRows; i++ ){

for(j = 0; j < numCols; j++ )

matrix[i][j] = i*j;

}

 

cout << "size of matrix is: " << numRows << "x" << numCols << endl;

for (i = 0; i < numRows; i++){

for (j = 0; j < numCols; j++){

cout<< "matrix[" << i << "][" << i << "]=" << matrix[i][j] << endl;

}

}

cout << endl;

}

Explanation: