Chatbot

subroihei · Oct 8, 2024

How do I search the database if the myfile.txt file doesn't have the answer in this c++ code?

C++:

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <numeric>
#include <cmath>
#include <random>
#include <deque>
#include <tuple>

// Typedef pour les vecteurs
using Vector = std::vector<double>;
using WordEmbeddings = std::unordered_map<std::string, Vector>;

// Classe pour la mémoire de replay
class ReplayMemory {
public:
    ReplayMemory(size_t capacity) : m_capacity(capacity) {}

    void add(const std::tuple<int, int, double, int>& experience) {
        if (m_memory.size() >= m_capacity) {
            m_memory.pop_front(); // Supprimer la plus ancienne expérience si la capacité est atteinte
        }
        m_memory.push_back(experience);
    }

    std::vector<std::tuple<int, int, double, int>> sample(size_t batch_size) {
        std::vector<std::tuple<int, int, double, int>> samples;
        std::sample(m_memory.begin(), m_memory.end(), std::back_inserter(samples),
                     batch_size, std::mt19937{std::random_device{}()});
        return samples;
    }

    bool isEmpty() const {
        return m_memory.empty();
    }

private:
    size_t m_capacity;
    std::deque<std::tuple<int, int, double, int>> m_memory; // (état, action, récompense, état suivant)
};

// Classe pour Q-Learning
class QLearning {
public:
    QLearning(int numStates, int numActions, double gamma, double alpha, size_t replayMemorySize) :
        m_numStates(numStates), m_numActions(numActions), m_gamma(gamma), m_alpha(alpha),
        m_replayMemory(replayMemorySize) {
        m_qTable.resize(m_numStates, std::vector<double>(m_numActions, 0.0));
    }

    void addExperience(int state, int action, double reward, int nextState) {
        m_replayMemory.add(std::make_tuple(state, action, reward, nextState));
    }

    void learnFromReplay(size_t batch_size) {
        if (m_replayMemory.isEmpty()) return;

        auto experiences = m_replayMemory.sample(batch_size);
        for (const auto& experience : experiences) {
            int state, action, nextState;
            double reward;
            std::tie(state, action, reward, nextState) = experience;

            double maxNextQValue = *std::max_element(m_qTable[nextState].begin(), m_qTable[nextState].end());
            m_qTable[state][action] = (1 - m_alpha) * m_qTable[state][action] + m_alpha * (reward + m_gamma * maxNextQValue);
        }
    }

    int chooseAction(int state, double epsilon) {
        if (std::rand() / static_cast<double>(RAND_MAX) < epsilon) {
            return std::rand() % m_numActions; // Exploration
        } else {
            return std::distance(m_qTable[state].begin(), std::max_element(m_qTable[state].begin(), m_qTable[state].end())); // Exploitation
        }
    }

    void getQValue() {
        std::cout << "Final Q-Value Table:" << std::endl;
        for (int state = 0; state < m_numStates; ++state) {
            std::cout << "State " << state << ": ";
            for (int action = 0; action < m_numActions; ++action) {
                std::cout << m_qTable[state][action] << " ";
            }
            std::cout << std::endl;
        }
    }

    // Sauvegarder les valeurs Q dans un fichier
    void saveQValues(const std::string& filename) {
        std::ofstream outFile(filename);
        if (outFile.is_open()) {
            for (const auto& state : m_qTable) {
                for (const auto& qValue : state) {
                    outFile << qValue << " ";
                }
                outFile << std::endl;
            }
            outFile.close();
        } else {
            std::cerr << "Error opening file for saving Q-values." << std::endl;
        }
    }

    // Charger les valeurs Q depuis un fichier
    void loadQValues(const std::string& filename) {
        std::ifstream inFile(filename);
        if (inFile.is_open()) {
            for (auto& state : m_qTable) {
                for (auto& qValue : state) {
                    inFile >> qValue;
                }
            }
            inFile.close();
        } else {
            std::cerr << "Error opening file for loading Q-values." << std::endl;
        }
    }

private:
    int m_numStates;
    int m_numActions;
    double m_gamma;
    double m_alpha;
    std::vector<std::vector<double>> m_qTable;
    ReplayMemory m_replayMemory; // Instance de la mémoire de replay
};

// Classe pour représenter un nœud dans le réseau bayésien
class Node {
public:
    std::string name;
    std::vector<Node*> parents;
    std::vector<Node*> children;
    std::unordered_map<std::string, double> distribution;

    Node(const std::string& name) : name(name) {}
};

// Classe pour représenter le réseau bayésien
class BayesianNetwork {
public:
    std::vector<Node*> nodes;

    ~BayesianNetwork() {
        for (auto node : nodes) {
            delete node;
        }
    }

    void addNode(Node* node) {
        nodes.push_back(node);
    }

    void addEdge(const std::string& parent, const std::string& child) {
        Node* parentNode = getNode(parent);
        Node* childNode = getNode(child);
        if (parentNode && childNode) {
            parentNode->children.push_back(childNode);
            childNode->parents.push_back(parentNode);
        }
    }

    Node* getNode(const std::string& name) {
        for (auto& node : nodes) {
            if (node->name == name) {
                return node;
            }
        }
        return nullptr;
    }

    double getConditionalProbability(const std::string& target, const std::unordered_map<std::string, std::string>& evidence) {
        Node* targetNode = getNode(target);
        if (!targetNode) return 0.0;

        double jointProbability = calculateJointProbability(targetNode, evidence);
        return jointProbability; // Normaliser si nécessaire
    }

private:
    double calculateJointProbability(Node* targetNode, const std::unordered_map<std::string, std::string>& evidence) {
        double jointProbability = targetNode->distribution[targetNode->name];

        for (const auto& [var, value] : evidence) {
            Node* evidenceNode = getNode(var);
            if (evidenceNode) {
                jointProbability *= evidenceNode->distribution.at(value);
            }
        }
        return jointProbability;
    }
};

// Fonction pour diviser une chaîne en mots
std::vector<std::string> split(const std::string& s) {
    std::vector<std::string> tokens;
    std::istringstream iss(s);
    std::string token;
    while (iss >> token) {
        tokens.push_back(token);
    }
    return tokens;
}

// Fonction pour supprimer les mots vides
std::string remove_stop_words(const std::string& text, const std::unordered_set<std::string>& stop_words) {
    std::stringstream ss;
    std::string word;
    for (const auto& w : split(text)) {
        if (stop_words.find(w) == stop_words.end()) {
            ss << w << " ";
        }
    }
    std::string result = ss.str();
    if (!result.empty()) {
        result.pop_back();
    }
    return result;
}

// Fonction pour générer un embedding pour un mot
Vector generateEmbedding(const std::string& word, const Vector& alpha, const Vector& beta_param) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::normal_distribution<double> distribution(0.0, 1.0);

    Vector embedding(alpha.size());
    for (size_t i = 0; i < embedding.size(); i++) {
        embedding[i] = alpha[i] + beta_param[i] * distribution(gen);
    }

    return embedding;
}

// Fonction pour obtenir les embeddings de mots
WordEmbeddings getWordEmbeddings(const std::unordered_map<std::string, std::unordered_map<std::string, int>>& dialogues, const Vector& alpha, const Vector& beta_param) {
    WordEmbeddings wordEmbeddings;
    for (const auto& dialogue : dialogues) {
        for (const auto& response : dialogue.second) {
            for (const auto& word : split(response.first)) {
                if (wordEmbeddings.find(word) == wordEmbeddings.end()) {
                    wordEmbeddings[word] = generateEmbedding(word, alpha, beta_param);
                }
            }
        }
    }
    return wordEmbeddings;
}

// Fonction pour calculer la similarité cosinus
double computeCosineSimilarity(const Vector& v1, const Vector& v2) {
    double dotProduct = 0.0;
    double norm1 = 0.0;
    double norm2 = 0.0;

    for (size_t i = 0; i < v1.size(); i++) {
        dotProduct += v1[i] * v2[i];
        norm1 += v1[i] * v1[i];
        norm2 += v2[i] * v2[i];
    }

    norm1 = std::sqrt(norm1);
    norm2 = std::sqrt(norm2);

    if (norm1 == 0.0 || norm2 == 0.0) {
        return 0.0;
    }

    return dotProduct / (norm1 * norm2);
}

// Fonction pour calculer la similarité entre une question et une réponse
double computeSimilarity(const std::string& question, const std::string& response, const WordEmbeddings& wordEmbeddings) {
    std::vector<std::string> questionWords = split(question);
    std::vector<std::string> responseWords = split(response);

    double similarity = 0.0;
    int numMatchingWords = 0;

    for (const auto& questionWord : questionWords) {
        for (const auto& responseWord : responseWords) {
            if (wordEmbeddings.find(questionWord) != wordEmbeddings.end() && wordEmbeddings.find(responseWord) != wordEmbeddings.end()) {
                Vector questionWordEmbedding = wordEmbeddings.at(questionWord);
                Vector responseWordEmbedding = wordEmbeddings.at(responseWord);

                double cosineSimilarity = computeCosineSimilarity(questionWordEmbedding, responseWordEmbedding);
                similarity += cosineSimilarity;
                numMatchingWords++;
            }
        }
    }

    if (numMatchingWords > 0) {
        similarity /= numMatchingWords;
    } else {
        similarity = 0.0;
    }

    return similarity;
}

// Fonction pour extraire une sous-chaîne entre deux délimiteurs
std::string get_str_between_two_str(const std::string &s,
                                     const std::string &start_delim,
                                     const std::string &stop_delim) {
    unsigned first_delim_pos = s.find(start_delim);
    if (first_delim_pos == std::string::npos) return ""; // Délimiteur de début non trouvé
    unsigned end_pos_of_first_delim = first_delim_pos + start_delim.length();
    unsigned last_delim_pos = s.find(stop_delim, end_pos_of_first_delim);
    if (last_delim_pos == std::string::npos) return ""; // Délimiteur de fin non trouvé

    return s.substr(end_pos_of_first_delim, last_delim_pos - end_pos_of_first_delim);
}
// Fonction pour calculer le TF-IDF d'un terme dans un fichier
std::pair<std::string, double> compute_tfidf(const std::string& token, const std::string& file_name, int word_count, double avg_doc_len, double doc_len_correction) {
    std::unordered_map<std::string, double> phrase_selection;
    std::ifstream ifs(file_name);
    if (!ifs) {
        std::cerr << "Error opening file: " << file_name << std::endl;
        return {"", 0.0};
    }

    std::string line;
    while (std::getline(ifs, line)) {
        // Extraire les phrases de la ligne
        std::string::size_type start = 0;
        while (true) {
            std::string::size_type end = line.find_first_of(".!?", start);
            if (end == std::string::npos) break; // Pas de fin de phrase trouvée

            std::string phrase = line.substr(start, end - start + 1); // Inclure le délimiteur
            if (phrase.find(token) != std::string::npos) {
                std::vector<std::string> words = split(phrase);
                int n = words.size();
                double tf = std::count(words.begin(), words.end(), token) / static_cast<double>(word_count);
                double idf = std::log((n - tf + 0.5) / (tf + 0.5));
                double tf_idf = tf / (tf + doc_len_correction) * idf;
                phrase_selection[phrase] = tf_idf;
            }
            start = end + 1; // Passer au début de la prochaine phrase
        }
    }

    if (phrase_selection.empty()) return {"", 0.0};

    auto best = std::max_element(phrase_selection.begin(), phrase_selection.end(),
                                [](const auto& a, const auto& b) { return a.second < b.second; });
    return {best->first, best->second};
}

// Apprentissage de la structure du BN
BayesianNetwork structureLearning(const std::vector<std::string>& variables) {
    BayesianNetwork BN;

    for (const auto& var : variables) {
        BN.addNode(new Node(var));
    }

    // Exemple de relations entre nœuds
    BN.addEdge("A", "B");
    BN.addEdge("A", "C");

    return BN;
}

// Apprentissage des paramètres du BN
void parameterLearning(BayesianNetwork& BN, const std::vector<std::string>& data) {
    for (auto& node : BN.nodes) {
        double count = 0.0;
        for (const auto& datum : data) {
            if (datum == node->name) {
                count++;
            }
        }
        node->distribution[node->name] = count / data.size();
    }
}

// Déterminer l'état basé sur l'entrée utilisateur
int determineState(const std::string& user_input) {
    return user_input.length() % 10; // Exemple : 10 états
}

// Fonction pour évaluer la récompense
double getReward(int action, const std::string& user_input, double tfidf_score) {
    // Exemple : la récompense est pondérée par le score TF-IDF
    return static_cast<double>(std::rand() % 10) * tfidf_score; // Ajustez la formule selon vos besoins
}

// Déterminer l'état suivant basé sur l'action
int determineNextState(int action) {
    return action % 10; // Exemple : 10 états
}

class Database {
private:
    std::unordered_map<int, std::string> data; // Utilisation d'un entier comme clé
    const std::string filename = "database.txt";
    int nextKey; // Pour garder la trace de la prochaine clé à utiliser

    void loadFromFile() {
        std::ifstream file(filename);
        if (!file) {
            std::cerr << "Error opening file for reading." << std::endl;
            return;
        }
        int key;
        std::string value;
        while (file >> key) {
            std::getline(file, value); // Lire la ligne entière après la clé
            if (!value.empty() && value[0] == ' ') {
                value.erase(0, 1); // Supprimer l'espace initial
            }
            data[key] = value;
            nextKey = std::max(nextKey, key + 1); // Mettre à jour nextKey
        }
        file.close();
    }

    void saveToFile() {
    std::ofstream file(filename, std::ios_base::trunc); // Ouvrir en mode écriture, écraser le contenu
    if (!file) {
        std::cerr << "Error opening file for writing." << std::endl;
        return;
    }
    for (const auto& pair : data) {
        file << pair.first << " " << pair.second << "\n";
    }
    file.close();
}

public:
    Database() : nextKey(1) { // Initialiser nextKey à 1
        loadFromFile();
    }

    ~Database() {
        saveToFile();
    }

    void add(const std::string& value) {
        data[nextKey] = value; // Utiliser nextKey comme clé
        std::cout << "Added: " << nextKey << " -> " << value << std::endl;
        nextKey++; // Incrémenter nextKey pour la prochaine entrée
    }

    void remove(int key) {
        if (data.erase(key)) {
            std::cout << "Removed: " << key << std::endl;
        } else {
            std::cout << "Key not found: " << key << std::endl;
        }
    }

    void search(int key) {
        auto it = data.find(key);
        if (it != data.end()) {
            std::cout << "Found: " << it->first << " -> " << it->second << std::endl;
        } else {
            std::cout << "Key not found: " << key << std::endl;
        }
    }

    void searchByText(const std::string& token) {
        std::unordered_map<int, double> line_selection;
        for (const auto& pair : data) {
            if (pair.second.find(token) != std::string::npos) {
                line_selection[pair.first] = 1.0; // On peut ajuster le score ici si nécessaire
            }
        }

        if (!line_selection.empty()) {
            for (const auto& pair : line_selection) {
                std::cout << "Found in key: " << pair.first << " -> " << data[pair.first] << std::endl;
            }
        } else {
            std::cout << "No entries found containing: " << token << std::endl;
        }
    }
};
// Fonction principale
int main() {
     Database db;
    std::string file_name = "myfile.txt";
    std::string qValuesFile = "q_values.txt"; // Fichier pour sauvegarder les valeurs Q

    std::ifstream inFile(file_name);
    if (!inFile) {
        std::cerr << "Error opening file: " << file_name << std::endl;
        return 1;
    }

    std::unordered_set<std::string> stop_words =  { "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because",
        "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't",
        "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here",
        "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself",
        "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out",
        "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs",
        "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until",
        "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while",
        "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves" };

    // Initialiser l'agent Q-Learning
    const int numStates = 10; // Augmenter le nombre d'états
    const int numActions = 5; // Augmenter le nombre d'actions
    double gamma = 0.9; // Facteur de discount
    double alpha = 0.1; // Taux d'apprentissage
    size_t replayMemorySize = 1000; // Taille de la mémoire de replay
    QLearning agent(numStates, numActions, gamma, alpha, replayMemorySize);

    // Charger les valeurs Q depuis le fichier
    agent.loadQValues(qValuesFile);

    // Paramètres pour les embeddings
    Vector alphaParams = {0.5, 0.5}; // Exemple de paramètres alpha
    Vector beta_param = {0.1, 0.1}; // Exemple de paramètres beta

    std::string user_input;
    while (true) {
        std::cout << "Please enter your question (or type 'exit' to quit): ";
        std::getline(std::cin, user_input);

        if (user_input == "exit") {
            break; // Sortir de la boucle si l'utilisateur tape 'exit'
        }

        std::string filtered_input = remove_stop_words(user_input, stop_words);

        // Calculer les statistiques du fichier
        int word_count = 0;
        std::vector<double> line_lengths;
        std::string line;

        inFile.clear();
        inFile.seekg(0, std::ios::beg);

        while (std::getline(inFile, line)) {
            std::vector<std::string> words = split(line);
            word_count += words.size();
            line_lengths.push_back(line.length());
        }

        double avg_doc_len = line_lengths.empty() ? 0.0 : std::accumulate(line_lengths.begin(), line_lengths.end(), 0.0) / line_lengths.size();
        double doc_len_correction = 0.75 * word_count / avg_doc_len + 0.25;
        
        // Définir le réseau bayésien
        BayesianNetwork BN = structureLearning(split(filtered_input));

        // Apprentissage des paramètres
        parameterLearning(BN, split(filtered_input));

        // Obtenir les embeddings de mots
        std::unordered_map<std::string, std::unordered_map<std::string, int>> dialogues; // Remplir avec des dialogues
        WordEmbeddings wordEmbeddings = getWordEmbeddings(dialogues, alphaParams, beta_param);
        
        // Calculer les scores TF-IDF
        std::unordered_map<std::string, double> tfidf_scores;
        for (const auto& token : split(filtered_input)) {
            auto result = compute_tfidf(token, file_name, word_count, avg_doc_len, doc_len_correction);
            tfidf_scores[token] = result.second;
        }

        // Renforcer les scores TF-IDF avec les probabilités conditionnelles
        for (const auto& word1 : split(filtered_input)) {
            for (const auto& word2 : split(filtered_input)) {
                if (word1 != word2) {
                    std::unordered_map<std::string, std::string> evidence; // Remplir avec des preuves si nécessaire
                    double prob = BN.getConditionalProbability(word1, evidence);
                    tfidf_scores[word1] *= prob; // Renforcer le score TF-IDF
                }
            }
        }

        // Trouver le mot clé avec le meilleur score
        std::string best_token;
        double best_score = 0.0;
        if (!tfidf_scores.empty()) {
            auto best_it = std::max_element(tfidf_scores.begin(), tfidf_scores.end(),
                                             [](const auto& a, const auto& b) { return a.second < b.second; });
            best_token = best_it->first;
            best_score = best_it->second;

            std::cout << "Let's talk about \"" << best_token << "\"..." << std::endl;
            std::string goodSentence = compute_tfidf(best_token, file_name, word_count, avg_doc_len, doc_len_correction).first;
            std::cout << "Best line: " << compute_tfidf(best_token, file_name, word_count, avg_doc_len, doc_len_correction).first << std::endl;
            db.add(goodSentence);
            // Calculer la similarité avec les embeddings
            double similarity = computeSimilarity(user_input, best_token, wordEmbeddings);
            std::cout << "Similarity score: " << similarity << std::endl;

            // Afficher tous les scores TF-IDF pour le débogage
            for (const auto& [token, score] : tfidf_scores) {
                std::cout << "Token: " << token << ", TF-IDF Score: " << score << std::endl;
            }
        } else {
            std::cout << "No relevant information found in the file." << std::endl;
        }

        // Exemple d'utilisation de l'agent Q-Learning
        double exploreRate = 1.0; // Taux d'exploration initial
        int state = determineState(user_input); // Déterminer l'état basé sur l'entrée utilisateur

        // Boucle d'apprentissage
        for (int episode = 0; episode < 10; ++episode) { // Nombre d'épisodes d'apprentissage
            // Choisir une action
            int action = agent.chooseAction(state, exploreRate);
            double reward = 0.0;

            // Utiliser le score TF-IDF pour la récompense
            if (!tfidf_scores.empty()) {
                reward = getReward(action, user_input, best_score); // Passer le score TF-IDF
            } else {
                reward = getReward(action, user_input, 0.0); // Si aucun score TF-IDF, récompense nulle
            }

            int nextState = determineNextState(action); // Déterminer l'état suivant

            // Ajouter l'expérience à la mémoire de replay
            agent.addExperience(state, action, reward, nextState);

            // Mettre à jour la table Q à partir de la mémoire de replay
            agent.learnFromReplay(32); // Exemple : utiliser un batch de 32 expériences

            // Passer à l'état suivant
            state = nextState;

            // Réduire le taux d'exploration
            exploreRate *= 0.99; // Réduction progressive du taux d'exploration
        }

        // Afficher la table Q finale
        agent.getQValue();
    }

    // Sauvegarder les valeurs Q dans le fichier
    agent.saveQValues(qValuesFile);

    inFile.close(); // Fermer le fichier ici
    return 0;
}

Implementing a Q-Learning Algorithm with Logistic Regression Normalization in C++	0	Jun 4, 2025
Database Manager: A C++ Console Application	14	May 12, 2025
Building a Professional Neural Network Framework: Full C++ Implementation with Windows GUI and Real-Time Training Visualization	11	Jun 14, 2026
Universal BMP Steganography Tool (AES-128-CTR + SP800-90A CSPRNG) Full Encoder/Decoder with 3LSB Payload, PasswordDerived Key & External Key File	4	Mar 26, 2026
Rich Text Format (RTF) Document Builder in C++: Code and Features	0	Sep 28, 2025
TF-IDF	2	Aug 19, 2021
Mini Web Server in C++ (Part One)	4	Oct 2, 2025
Console Interactive File Manager for Protected Files on Windows	3	Apr 10, 2026

Chatbot

subroihei

Ask a Question

Similar Threads

Members online

Forum statistics

Latest Threads