TF-IDF

Joined
Aug 19, 2021
Messages
3
Reaction score
1
An implementation of the TF-IDF algorithm in C++ to find information in a text file (myfile.txt) by asking a question to the program in natural language. If there are any calculation errors report them to me!
C++:
#include <iostream>
#include <map>
#include <string>
#include <sstream>
#include <fstream>
#include <cctype>
#include <unordered_set>
#include <algorithm>
#include <cctype>
#include <bits/stdc++.h>
#include <math.h>
using namespace std;

map<string, double> wordQueries;
vector<string> ret;
vector<double> lengthOfLine;

vector<string> split(const string &s)
{
    typedef string::size_type string_size;
    string_size i = 0;

    while (i != s.size())
    {
        while (i != s.size() && isspace(s[i]))
            ++i;
        string_size j = i;

        while (j != s.size() && !isspace(s[j]))
            j++;
        if (i != j)
        {
            ret.push_back(s.substr(i, j - i));
            i = j;
        }
    }

    return ret;
}

string Keyword(string token, string fileName, double tf)
{
    map<string, double> lineSelection;
    std::ifstream ifs(fileName);
    string line;
    while (getline(ifs, line))
    {
        if (line.find(token) != string::npos)
        {
            split(line);
            int N = ret.size();

            double tfnorm = wordQueries.find(token)->second;
            /*
            N = total number of words in the collection
            df = document frequency of word w
            idf = log[(N - df + 0.5)/(df + 0.5)]
            */
            double varidf = log((N - tf + 0.5) / (tf + 0.5));

            /*
            Combining TF and IDF
                        Just multiply the idf and the tf normalizations. The idf
                        normalization is global, depending on the distribution of
                        individual words in the collection. The tf normalization is local,
                        and its purpose is to dampen the effect of words that appear
                        too many times.
            */
            double normalization = tfnorm * varidf;

            lineSelection.insert(std::pair<string, double> (line, normalization));
        }
    }

    std::map<string, double>::iterator best = std::max_element(lineSelection.begin(), lineSelection.end(), [](const std::pair<string, double> &a, const std::pair<string, double> &b)->bool
    {
        return a.second < b.second;
    });
    string googLine = best->first;
    return googLine;
}

std::string remove_stop_words(const std::string &src, const std::unordered_set<std::string > &stops)
{
    std::string retval;
    std::istringstream strm(src);
    std::string word;
    while (strm >> word)
    {
        if (!stops.count(word))
            retval += word + " ";
    }

    if (!retval.empty())
        retval.pop_back();
    return retval;
}

int main()
{
    string insertit;
    double tf;
    std::string fileName = "myfile.txt";
    std::ifstream inFile(fileName);

    int wordCount = 0;
    int lineNum = 0;

    std::unordered_map<std::string, int> words;

    std::string str;
    cout << "Please enter your question " << endl;
    getline(cin, insertit);

    std::unordered_set<std::string > stops = { "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because",
        "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't",
        "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here",
        "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself",
        "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out",
        "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs",
        "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until",
        "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while",
        "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" };
    insertit = remove_stop_words(insertit, stops);

    /*
    In computing, stop words are words that are filtered out before or after the natural language data (text)
    are processed. While stop words typically refers to the most common words in a language, all-natural language
    processing tools don't use a single universal list of stop words.
    */
    split(insertit);
    int M = ret.size();

    while (getline(inFile, str))
    {
        std::stringstream ss(str);
        while (ss)
        {
            std::string s;
            ss >> s;
            std::transform(s.begin(), s.end(), s.begin(),
            [](unsigned char c)
                {
                    return std::tolower(c);
    });
            s.erase(std::remove_if(s.begin(), s.end(),
                [](unsigned char c)
                    {
                        return std::isalpha(c) == 0;
}),
                s.end());
            if (!s.empty())
            {
                ++wordCount;
                ++words[s];
            }
        }

        ++lineNum;
        int lineSize = str.length();
        lengthOfLine.push_back(lineSize);
    }

    auto n = lengthOfLine.size();
    double avg_doc_len = 0.0f;
    if (n != 0)
    {
        avg_doc_len = accumulate(lengthOfLine.begin(), lengthOfLine.end(), 0.0) / n;
    }

    double doclen_correction = 0.75 *wordCount / avg_doc_len + 0.25;

    for (auto &pair: words)
    {
        for (int j = 0; j < M; j++)
        {
            if (pair.first == ret[j])
            {
                tf = pair.second / wordCount;
                double tfnorm = tf / (tf + doclen_correction);
                /*
                tf = number of times word appears in a document, the term frequency
                doclen = number of total words (including duplicates) in a document, the document length
                avg_doc_len = average of all document lengths in the collection
                doclen_correction = 0.75*doclen/avg_doclen + 0.25
                tfnorm= tf/(tf + doclen_correction)
                */
                wordQueries.insert(std::pair<string, double> (pair.first, tfnorm));
            }
        }
    }

    std::map<string, double>::iterator best = std::max_element(wordQueries.begin(), wordQueries.end(), [](const std::pair<string, double> &a, const std::pair<string, double> &b)->bool
    {
        return a.second < b.second;
    });
    string token = best->first;
    cout << "Let's talk about \"" << token << "\"... " << endl;
    cout << Keyword(token, fileName, tf) << endl;
    return 0;
}
 
Joined
Aug 31, 2021
Messages
1
Reaction score
0
You must replace the line :
std::unordered_map<std::string, int> words;
by this line
std::unordered_map<std::string, double> words;
otherwise tf = 0 and tfnorm too. Here is the correct code:

C++:
#include <iostream>
#include <map>
#include <string>
#include <sstream>
#include <fstream>
#include <cctype>
#include <unordered_set>
#include <algorithm>
#include <cctype>
#include <bits/stdc++.h>
#include <math.h>
using namespace std;

map<string, double> wordQueries;
vector<string> ret;
vector<double> lengthOfLine;

vector<string> split(const string &s)
{
    typedef string::size_type string_size;
    string_size i = 0;

    while (i != s.size())
    {
        while (i != s.size() && isspace(s[i]))
            ++i;
        string_size j = i;

        while (j != s.size() && !isspace(s[j]))
            j++;
        if (i != j)
        {
            ret.push_back(s.substr(i, j - i));
            i = j;
        }
    }

    return ret;
}

string Keyword(string token, string fileName, double tf)
{
    map<string, double> lineSelection;
    std::ifstream ifs(fileName);
    string line;
    while (getline(ifs, line))
    {
        if (line.find(token) != string::npos)
        {
            split(line);
            int N = ret.size();

            double tfnorm = wordQueries.find(token)->second;
            /*
            N = total number of words in the collection
            df = document frequency of word w
            idf = log[(N - df + 0.5)/(df + 0.5)]
            */
            double varidf = log((N - tf + 0.5) / (tf + 0.5));

            /*
            Combining TF and IDF
                        Just multiply the idf and the tf normalizations. The idf
                        normalization is global, depending on the distribution of
                        individual words in the collection. The tf normalization is local,
                        and its purpose is to dampen the effect of words that appear
                        too many times.
            */
            double normalization = tfnorm * varidf;

            lineSelection.insert(std::pair<string, double> (line, normalization));
        }
    }

    std::map<string, double>::iterator best = std::max_element(lineSelection.begin(), lineSelection.end(), [](const std::pair<string, double> &a, const std::pair<string, double> &b)->bool
    {
        return a.second < b.second;
    });
    string googLine = best->first;
    return googLine;
}

std::string remove_stop_words(const std::string &src, const std::unordered_set<std::string > &stops)
{
    std::string retval;
    std::istringstream strm(src);
    std::string word;
    while (strm >> word)
    {
        if (!stops.count(word))
            retval += word + " ";
    }

    if (!retval.empty())
        retval.pop_back();
    return retval;
}

int main()
{
    string insertit;
    
    double doclen_correction;
    std::string fileName = "myfile.txt";
    std::ifstream inFile(fileName);

    int wordCount = 0.0f;
    int lineNum = 0.0f;
    int termOccur;
    double tf = 0.0f;
    std::unordered_map<std::string, double> words;

    std::string str;
    cout << "Please enter your question " << endl;
    getline(cin, insertit);
    
    std::unordered_set<std::string > stops = { "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because",
        "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't",
        "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here",
        "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself",
        "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out",
        "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs",
        "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until",
        "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while",
        "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" };
    insertit = remove_stop_words(insertit, stops);


    /*
    In computing, stop words are words that are filtered out before or after the natural language data (text)
    are processed. While stop words typically refers to the most common words in a language, all-natural language
    processing tools don't use a single universal list of stop words.
    */
    split(insertit);
    

    while (getline(inFile, str))
    {
        std::stringstream ss(str);
        while (ss)
        {
            std::string s;
            ss >> s;
            std::transform(s.begin(), s.end(), s.begin(),
            [](unsigned char c)
                {
                    return std::tolower(c);
    });
            s.erase(std::remove_if(s.begin(), s.end(),
                [](unsigned char c)
                    {
                        return std::isalpha(c) == 0;
}),
                s.end());
            if (!s.empty())
            {
                wordCount = wordCount+1;
                ++words[s];
            }
        }

        ++lineNum;
        int lineSize = str.length();
        lengthOfLine.push_back(lineSize);
    }

    auto n = lengthOfLine.size();
    double avg_doc_len = 0.0f;
    if (n != 0)
    {
        avg_doc_len = accumulate(lengthOfLine.begin(), lengthOfLine.end(), 0.0) / n;
    }

    doclen_correction = 0.75 *wordCount / avg_doc_len + 0.25;
    
    for (auto &pair: words)
    {
        int M = ret.size();
        for (int j = 0; j < M; j++)
        {
            if (pair.first == ret[j])
            {
                
                
                tf = pair.second/wordCount;
                
                double tfnorm = tf/(tf + doclen_correction);
                
                /*
                tf = number of times word appears in a document, the term frequency
                doclen = number of total words (including duplicates) in a document, the document length
                avg_doc_len = average of all document lengths in the collection
                doclen_correction = 0.75*doclen/avg_doclen + 0.25
                tfnorm= tf/(tf + doclen_correction)
                */
                wordQueries.insert(std::pair<string, double> (pair.first, tfnorm));
            }
            else
            {
                continue;
            }
        }
    }

    std::map<string, double>::iterator best = std::max_element(wordQueries.begin(), wordQueries.end(), [](const std::pair<string, double> &a, const std::pair<string, double> &b)->bool
    {
        return a.second < b.second;
    });
    string token = best->first;
    cout << "Let's talk about \"" << token << "\"... " << endl;
    cout << Keyword(token, fileName, tf) << endl;
    
}
 
Joined
Sep 21, 2024
Messages
2
Reaction score
0
This C++ code implements a basic framework for a Bayesian Network and integrates it with a simple Q-learning algorithm. The code is structured into several classes and functions that work together to perform tasks related to probabilistic reasoning and reinforcement learning.


Key Components:​


  1. Node Class:
    • Represents a node in the Bayesian Network.
    • Each node has a name, a list of parent and child nodes, and a probability distribution associated with it.
  2. BayesianNetwork Class:
    • Manages a collection of nodes and their relationships.
    • Provides methods to add nodes and edges, retrieve nodes, and calculate conditional and joint probabilities based on the network structure.
  3. QLearning Class:
    • Implements a simple Q-learning algorithm.
    • Contains methods for selecting actions based on the current state and an exploration rate, allowing the agent to balance between exploration and exploitation.
  4. Text Processing Functions:
    • Includes functions to split strings, remove stop words from text, and compute TF-IDF (Term Frequency-Inverse Document Frequency) scores for words in a given document.
    • These functions are essential for processing user input and analyzing text data.
  5. Structure Learning and Parameter Learning:
    • The structureLearning function creates a Bayesian Network structure based on a set of variables.
    • The parameterLearning function populates the probability distributions of the nodes based on provided data.
  6. Main Function:
    • The entry point of the program, which handles user input and processes a text file.
    • It filters the user input to remove stop words, calculates average document length, and performs structure and parameter learning for the Bayesian Network.
    • It computes TF-IDF scores for the filtered input and adjusts these scores based on the conditional probabilities from the Bayesian Network.
    • Finally, it selects the best keyword based on the computed scores and demonstrates the Q-learning agent's action selection.

Usage:​


  • The program prompts the user to ask a question, processes the input, and identifies the most relevant keyword from the text file based on the Bayesian Network's probabilistic reasoning.
  • It also showcases a simple reinforcement learning scenario by selecting an action based on the Q-learning algorithm.

This code serves as a foundational example of combining probabilistic graphical models with reinforcement learning techniques, making it suitable for applications in areas such as natural language processing and decision-making systems.
C++:
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <numeric>
#include <cmath>
#include <random>

// Classe pour représenter un nœud dans le réseau bayésien
class Node {
public:
    std::string name;
    std::vector<Node*> parents;
    std::vector<Node*> children;
    std::unordered_map<std::string, double> distribution; // Distribution de probabilité conditionnelle

    Node(const std::string& name) : name(name) {}
};

// Classe pour représenter le réseau bayésien
class BayesianNetwork {
public:
    std::vector<Node*> nodes;

    void addNode(Node* node) {
        nodes.push_back(node);
    }

    void addEdge(const std::string& parent, const std::string& child) {
        Node* parentNode = getNode(parent);
        Node* childNode = getNode(child);
        if (parentNode && childNode) {
            parentNode->children.push_back(childNode);
            childNode->parents.push_back(parentNode);
        }
    }

    Node* getNode(const std::string& name) {
        for (auto& node : nodes) {
            if (node->name == name) {
                return node;
            }
        }
        return nullptr;
    }

    double getConditionalProbability(const std::string& target, const std::unordered_map<std::string, std::string>& evidence) {
        double posterior = 0.0;
        for (const auto& node : nodes) {
            if (node->name == target) {
                double prior = calculateJointProbability(target, evidence);
                posterior += prior; // Normaliser si nécessaire
            }
        }
        return posterior;
    }

    bool areIndependent(const std::string& eventA, const std::string& eventB) {
        double P_A = getProbability(eventA);
        double P_B = getProbability(eventB);
        double P_A_and_B = getJointProbability(eventA, eventB);
        return (P_A_and_B == P_A * P_B);
    }

private:
    double calculateJointProbability(const std::string& target, const std::unordered_map<std::string, std::string>& evidence) {
        Node* targetNode = getNode(target);
        if (!targetNode) return 0.0;

        double jointProbability = targetNode->distribution[target]; // P(target | parents)
        for (const auto& [var, value] : evidence) {
            Node* evidenceNode = getNode(var);
            if (evidenceNode) {
                jointProbability *= evidenceNode->distribution.at(value); // P(evidence)
            }
        }
        return jointProbability;
    }

    double getProbability(const std::string& event) {
        Node* node = getNode(event);
        if (node) {
            return node->distribution[event];
        }
        return 0.0;
    }

    double getJointProbability(const std::string& eventA, const std::string& eventB) {
        // Placeholder pour calculer P(A ∩ B)
        return 0.0; // Remplacez par une logique appropriée
    }
};

// Classe pour Q-Learning
class QLearning {
public:
    QLearning(int numStates, int numActions, double gamma, double alpha) :
        m_numStates(numStates), m_numActions(numActions), m_gamma(gamma), m_alpha(alpha) {
        m_qTable.resize(m_numStates, std::vector<double>(m_numActions, 0.0));
    }

    int chooseAction(int state, double epsilon) {
        // Exploration vs Exploitation
        if (std::rand() / static_cast<double>(RAND_MAX) < epsilon) {
            // Exploration: choisir une action aléatoire
            return std::rand() % m_numActions;
        } else {
            // Exploitation: choisir l'action avec la valeur Q la plus élevée
            int bestAction = 0;
            double maxQValue = m_qTable[state][0];
            for (int i = 1; i < m_numActions; ++i) {
                if (m_qTable[state][i] > maxQValue) {
                    bestAction = i;
                    maxQValue = m_qTable[state][i];
                }
            }
            return bestAction;
        }
    }

    void updateQTable(int state, int action, double reward, int nextState) {
        double maxNextQValue = *std::max_element(m_qTable[nextState].begin(), m_qTable[nextState].end());
        m_qTable[state][action] = (1 - m_alpha) * m_qTable[state][action] + m_alpha * (reward + m_gamma * maxNextQValue);
    }

    void getQValue() {
        std::cout << "Final Q-Value Table:" << std::endl;
        for (int state = 0; state < getNumStates(); ++state) {
            std::cout << "State " << state << ": ";
            for (int action = 0; action < m_numActions; ++action) {
                std::cout << m_qTable[state][action] << " ";
            }
            std::cout << std::endl;
        }
    }

    int getNumStates() {
        return m_numStates;
    }

private:
    int m_numStates;
    int m_numActions;
    double m_gamma;
    double m_alpha;
    std::vector<std::vector<double>> m_qTable;
};

// Fonction pour diviser une chaîne en mots
std::vector<std::string> split(const std::string& s) {
    std::vector<std::string> tokens;
    std::istringstream iss(s);
    std::string token;
    while (iss >> token) {
        tokens.push_back(token);
    }
    return tokens;
}

// Fonction pour supprimer les mots vides
std::string remove_stop_words(const std::string& text, const std::unordered_set<std::string>& stop_words) {
    std::stringstream ss;
    std::string word;
    for (const auto& w : split(text)) {
        if (stop_words.find(w) == stop_words.end()) {
            ss << w << " ";
        }
    }
    std::string result = ss.str();
    if (!result.empty()) {
        result.pop_back();
    }
    return result;
}

// Fonction pour calculer le TF-IDF d'un terme dans un fichier
std::pair<std::string, double> compute_tfidf(const std::string& token, const std::string& file_name, int word_count, double avg_doc_len, double doc_len_correction) {
    std::unordered_map<std::string, double> line_selection;
    std::ifstream ifs(file_name);
    if (!ifs) {
        std::cerr << "Error opening file: " << file_name << std::endl;
        return {"", 0.0};
    }

    std::string line;
    while (std::getline(ifs, line)) {
        if (line.find(token) != std::string::npos) {
            std::vector<std::string> words = split(line);
            int n = words.size();
            double tf = std::count(words.begin(), words.end(), token) / static_cast<double>(word_count);
            double idf = std::log((n - tf + 0.5) / (tf + 0.5));
            double tf_idf = tf / (tf + doc_len_correction) * idf;
            line_selection[line] = tf_idf;
        }
    }

    if (line_selection.empty()) return {"", 0.0};

    auto best = std::max_element(line_selection.begin(), line_selection.end(),
                                [](const auto& a, const auto& b) { return a.second < b.second; });
    return {best->first, best->second};
}

// Apprentissage de la structure du BN
BayesianNetwork structureLearning(const std::vector<std::string>& variables) {
    BayesianNetwork BN;

    // Créer des nœuds pour chaque variable
    for (const auto& var : variables) {
        BN.addNode(new Node(var));
    }

    // Définir les relations entre les nœuds en fonction de votre problème spécifique
    // Exemple : Supposons que "A" influence "B" et "C"
    BN.addEdge("A", "B");
    BN.addEdge("A", "C");

    return BN;
}

// Apprentissage des paramètres du BN
void parameterLearning(BayesianNetwork& BN, const std::vector<std::string>& data) {
    for (auto& node : BN.nodes) {
        // Estimer la distribution de probabilité conditionnelle P(node | parents)
        double count = 0.0;
        for (const auto& datum : data) {
            if (datum == node->name) {
                count++;
            }
        }
        node->distribution[node->name] = count / data.size(); // Estimation simple
    }
}

// Fonction principale
int main() {
    std::string file_name = "myfile.txt";

    std::ifstream inFile(file_name);
    if (!inFile) {
        std::cerr << "Error opening file: " << file_name << std::endl;
        return 1;
    }

    std::unordered_set<std::string> stop_words = { "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because",
        "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't",
        "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here",
        "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself",
        "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out",
        "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs",
        "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until",
        "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while",
        "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" };

    // Initialiser l'agent Q-Learning
    const int numStates = 2; // Exemple de nombre d'états
    const int numActions = 2; // Exemple de nombre d'actions
    double gamma = 0.9; // Facteur de discount
    double alpha = 0.1; // Taux d'apprentissage
    QLearning agent(numStates, numActions, gamma, alpha);

    std::string user_input;
    while (true) {
        std::cout << "Please enter your question (or type 'exit' to quit): ";
        std::getline(std::cin, user_input);

        if (user_input == "exit") {
            break; // Sortir de la boucle si l'utilisateur tape 'exit'
        }

        std::string filtered_input = remove_stop_words(user_input, stop_words);

        int word_count = 0;
        std::vector<double> line_lengths;
        std::string line;

        // Réinitialiser le fichier pour le lire à nouveau
        inFile.clear();
        inFile.seekg(0, std::ios::beg);

        while (std::getline(inFile, line)) {
            std::vector<std::string> words = split(line);
            word_count += words.size();
            line_lengths.push_back(line.length());
        }

        double avg_doc_len = line_lengths.empty() ? 0.0 : std::accumulate(line_lengths.begin(), line_lengths.end(), 0.0) / line_lengths.size();
        double doc_len_correction = 0.75 * word_count / avg_doc_len + 0.25;

        // Définir le réseau bayésien
        BayesianNetwork BN = structureLearning(split(filtered_input));

        // Apprentissage des paramètres
        parameterLearning(BN, split(filtered_input));

        // Calculer les scores TF-IDF
        std::unordered_map<std::string, double> tfidf_scores;
        for (const auto& token : split(filtered_input)) {
            auto result = compute_tfidf(token, file_name, word_count, avg_doc_len, doc_len_correction);
            tfidf_scores[token] = result.second;
        }

        // Renforcer les scores TF-IDF avec les probabilités conditionnelles
        for (const auto& word1 : split(filtered_input)) {
            for (const auto& word2 : split(filtered_input)) {
                if (word1 != word2) {
                    std::unordered_map<std::string, std::string> evidence; // Remplir avec des preuves si nécessaire
                    double prob = BN.getConditionalProbability(word1, evidence);
                    tfidf_scores[word1] *= prob; // Renforcer le score TF-IDF
                }
            }
        }

        // Trouver le mot clé avec le meilleur score
        if (!tfidf_scores.empty()) {
            auto best_token = std::max_element(tfidf_scores.begin(), tfidf_scores.end(),
                                               [](const auto& a, const auto& b) { return a.second < b.second; });
            std::cout << "Let's talk about \"" << best_token->first << "\"..." << std::endl;
            std::cout << "Best line: " << compute_tfidf(best_token->first, file_name, word_count, avg_doc_len, doc_len_correction).first << std::endl;
        } else {
            std::cout << "No relevant information found in the file." << std::endl;
        }

        // Exemple d'utilisation de l'agent Q-Learning
        double exploreRate = 0.6; // Taux d'exploration
        int state = 0; // État initial

        // Sélectionner une action
        int action = agent.chooseAction(state, exploreRate);
        std::cout << "Selected action by Q-Learning agent: " << action << std::endl;

        // Exemple de mise à jour de la table Q
        double reward = 1.0; // Exemple de récompense
        int nextState = 1; // État suivant
        agent.updateQTable(state, action, reward, nextState);

        // Afficher la table Q finale
        agent.getQValue();
    }

    inFile.close(); // Fermer le fichier ici
    return 0;
}
 
Last edited:

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
474,056
Messages
2,570,443
Members
47,089
Latest member
Bobby2025b

Latest Threads

Top