Trouble with prediction code, for the life of me I can't figure out why it isnt running properly. Help would be appreciated.

Joined
Jul 8, 2023
Messages
1
Reaction score
0
from collections import Counter
from itertools import combinations
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, chi2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence

class DataSequence(Sequence):
def init(self, X, y, batch_size):
self.X = X
self.y = y
self.batch_size = batch_size

def len(self):
return len(self.X) // self.batch_size

def getitem(self, idx):
batch_X = self.X[idx * self.batch_size : (idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size : (idx + 1) * self.batch_size]
return batch_X, batch_y


# Draws data
draws = [
{'date': '06-07-2023', 'numbers': [2, 13, 43, 12, 42, 9]},
{'date': '02-07-2023', 'numbers': [42, 1, 6, 34, 45, 17]},
...

]

# Sort the draws based on the date in ascending order
draws.sort(key=lambda x: datetime.strptime(x['date'], '%d-%m-%Y'))

# Flatten the draws into a single list
all_numbers = [number for draw in draws for number in draw['numbers']]

# Count the occurrences of each number
number_counts = Counter(all_numbers)

# Find the most common numbers
most_common_numbers = number_counts.most_common()

# Function to generate features from draws
def generate_features(draws):
features = []
dates = []

for draw in draws:
# Convert date string to datetime object
date = datetime.strptime(draw['date'], '%d-%m-%Y')

# Feature 1: Days since the last draw
if len(dates) > 0:
days_since_last_draw = (date - dates[-1]).days
else:
days_since_last_draw = 0
features.append(days_since_last_draw)

# Feature 2: Sum of Numbers
features.append(sum(draw['numbers']))

# Feature 3: Odd/Even Numbers Ratio
odd_count = len([num for num in draw['numbers'] if num % 2 == 1])
even_count = len([num for num in draw['numbers'] if num % 2 == 0])
features.append(odd_count / even_count if even_count > 0 else 1)

# Feature 4: Consecutive Numbers Count
consecutive_count = sum(1 for i in range(len(draw['numbers']) - 1) if draw['numbers'] + 1 == draw['numbers'][i + 1])
features.append(consecutive_count)

# Feature 5: Number Frequency
for number in range(1, 46):
features.append(number_counts[number])

# Feature 6: Number Sums
number_sums = [sum(combination) for combination in combinations(draw['numbers'], 2)]
features.extend(number_sums)

# Additional Features
# Feature 7: Month of the draw
features.append(date.month)

# Feature 8: Day of the week (Monday=0 to Sunday=6)
features.append(date.weekday())

# Feature 9: Prime Numbers Count
prime_count = len([num for num in draw['numbers'] if num in [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]])
features.append(prime_count)

# Feature 10: Fibonacci Numbers Count
fibonacci_count = len([num for num in draw['numbers'] if num in [1, 2, 3, 5, 8, 13, 21, 34]])
features.append(fibonacci_count)

# Feature 11: Mean of Numbers
features.append(np.mean(draw['numbers']))

# Feature 12: Median of Numbers
features.append(np.median(draw['numbers']))

# Feature 13: Standard Deviation of Numbers
features.append(np.std(draw['numbers']))

dates.append(date)

return features


# Generate features and target labels for each draw
X = []
y = []

for i in range(4, len(draws)):
features = generate_features(draws[i-4:i])
X.append(features)
y.append(draws['numbers'][-1]) # Target label is the last number in the draw

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test Split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Function to generate LSTM features from draws
def generate_features_lstm(draws):
lstm_features = []
dates = []

# Code for generating LSTM features
# Assign the generated LSTM features to lstm_features list
for draw in draws:
# Convert date string to datetime object
date = datetime.strptime(draw['date'], '%d-%m-%Y')

# Feature 1: Days since the last draw
if len(dates) > 0:
days_since_last_draw = (date - dates[-1]).days
else:
days_since_last_draw = 0
lstm_features.append(days_since_last_draw)

# Feature 2: Sum of Numbers
lstm_features.append(sum(draw['numbers']))

# Feature 3: Odd/Even Numbers Ratio
odd_count = len([num for num in draw['numbers'] if num % 2 == 1])
even_count = len([num for num in draw['numbers'] if num % 2 == 0])
ratio = odd_count / even_count if even_count > 0 else 1
lstm_features.append(ratio)

# Feature 4: Consecutive Numbers Count
consecutive_count = sum(
1 for i in range(len(draw['numbers']) - 1) if draw['numbers'] + 1 == draw['numbers'][i + 1])
lstm_features.append(consecutive_count)

# Feature 5: Number Frequency
number_counts = Counter(draw['numbers'])
for number in range(1, 46):
lstm_features.append(number_counts[number])

# Feature 6: Number Sums
number_sums = [sum(combination) for combination in combinations(draw['numbers'], 2)]
lstm_features.extend(number_sums)

# Feature 7: Month of the draw
month = date.month
lstm_features.append(month)

# Feature 8: Day of the week (Monday=0 to Sunday=6)
day_of_week = date.weekday()
lstm_features.append(day_of_week)

# Feature 9: Prime Numbers Count
prime_count = len([num for num in draw['numbers'] if num in [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43]])
lstm_features.append(prime_count)

# Feature 10: Fibonacci Numbers Count
fibonacci_count = len([num for num in draw['numbers'] if num in [1, 2, 3, 5, 8, 13, 21, 34]])
lstm_features.append(fibonacci_count)

# Feature 11: Mean of Numbers
mean = np.mean(draw['numbers'])
lstm_features.append(mean)

# Feature 12: Median of Numbers
median = np.median(draw['numbers'])
lstm_features.append(median)

# Feature 13: Standard Deviation of Numbers
std_deviation = np.std(draw['numbers'])
lstm_features.append(std_deviation)

dates.append(date)

return lstm_features


# Generate LSTM features and target labels for each draw
X_lstm = []
y_lstm = []
dates = []

for i in range(4, len(draws)):
features_lstm = generate_features_lstm(draws[i-3:i])
X_lstm.append(features_lstm)
y_lstm.append(draws['numbers'][-1]) # Target label is the last number in the draw
dates.append(datetime.strptime(draws['date'], '%d-%m-%Y'))

# Convert LSTM features and target labels to numpy arrays
X_train_lstm = np.array(X_lstm)
y_train_lstm = np.array(y_lstm)

print("Shape of X_train_lstm:", X_train_lstm.shape)
print("Shape of y_train_lstm:", y_train_lstm.shape)

# Reshape input data for LSTM
X_train_lstm = np.reshape(X_train_lstm, (X_train_lstm.shape[0], X_train_lstm.shape[1], 1))

# Generate LSTM features and target labels for the test set
X_lstm_test = []
y_lstm_test = []

for i in range(len(draws)-3, len(draws)):
features_lstm = generate_features_lstm(draws[i-3:i])
X_lstm_test.append(features_lstm)
y_lstm_test.append(draws['numbers'][-1]) # Target label is the last number in the draw

# Convert LSTM features and target labels to numpy arrays
X_test_lstm = np.array(X_lstm_test)
y_test_lstm = np.array(y_lstm_test)

# Reshape input data for LSTM
X_test_lstm = np.reshape(X_test_lstm, (X_test_lstm.shape[0], X_test_lstm.shape[1], 1))

# Update the shapes
print("Shape of X_test_lstm:", X_test_lstm.shape)
print("Shape of y_test_lstm:", y_test_lstm.shape)
print("Shape of X_test_lstm:", X_test_lstm.shape)
print("Shape of y_test_lstm:", y_test_lstm.shape)
print("Shape of X_test_lstm:", X_test_lstm.shape)
print("Shape of y_test_lstm:", y_test_lstm.shape)


# Create an LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(units=64, input_shape=(1, X_train_lstm.shape[1])))
model_lstm.add(Dense(units=1))
model_lstm.compile(optimizer=Adam(), loss='mse')

# Create an instance of DataSequence for training
train_sequence = DataSequence(X_train_lstm, y_train_lstm, batch_size=32)

# Train the LSTM model
model_lstm.fit(train_sequence, epochs=10)

# Predict using the LSTM model
y_pred_lstm = model_lstm.predict(X_test_lstm)
# Evaluate the LSTM model
accuracy_lstm = accuracy_score(y_test, np.round(y_pred_lstm))
print("LSTM Accuracy:", accuracy_lstm)

# Reshape LSTM predictions for compatibility with other models
y_pred_lstm = y_pred_lstm.flatten().tolist()

# Combine LSTM predictions with original features
X_train_combined = np.concatenate((X_train, np.array(y_pred_lstm[:-len(X_test)]).reshape(-1, 1)), axis=1)
X_test_combined = np.concatenate((X_test, np.array(y_pred_lstm[-len(X_test):]).reshape(-1, 1)), axis=1)

# Model Selection and Hyperparameter Tuning (including Random Forest and Gradient Boosting)
models = {
'Random Forest': {
'model': RandomForestClassifier(),
'params': {
'n_estimators': [100, 200, 300],
'max_depth': [None, 5, 10]
}
},
'Gradient Boosting': {
'model': GradientBoostingClassifier(),
'params': {
'n_estimators': [100, 200, 300],
'learning_rate': [0.1, 0.01, 0.001]
}
}
}

best_models = {}

for model_name, model_info in models.items():
print("Performing Grid Search for", model_name)
model = model_info['model']
params = model_info['params']
grid_search = GridSearchCV(model, params, cv=5)
grid_search.fit(X_train_combined, y_train)
best_model = grid_search.best_estimator_
best_models[model_name] = best_model
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Evaluate Models on Test Set
for model_name, model in best_models.items():
model.fit(X_train_combined, y_train)
y_pred = model.predict(X_test_combined)
accuracy = accuracy_score(y_test, y_pred)
print(model_name, "Accuracy:", accuracy)

# Calculate the probability based on previous draws
total_draws = len(draws)
probability_previous = {number: count / total_draws for number, count in most_common_numbers}

# Calculate the probability based on models' predictions
probability_models = {}
for model_name, model in best_models.items():
predicted_probabilities = model.predict_proba(X_test_combined)
for i, draw in enumerate(X_test_combined):
predicted_number = int(y_pred)
num_classes = len(model.classes_)
if predicted_number < num_classes:
if predicted_number in probability_models:
probability_models[predicted_number] += predicted_probabilities[predicted_number]
else:
probability_models[predicted_number] = predicted_probabilities[predicted_number]

# Combine the probabilities from previous draws and models
combined_probability = {number: probability_previous.get(number, 0) + probability_models.get(number, 0)
for number in range(1, 46)}

# Sort the combined probability dictionary by values in descending order
sorted_combined_probability = sorted(combined_probability.items(), key=lambda x: x[1], reverse=True)

# Print the most probable draws
print("Most Probable Draws:")
for draw, prob in sorted_combined_probability:
print(draw, "Probability:", prob)

# Generate the bar plot
x_labels = [str(draw[0]) for draw in sorted_combined_probability]
y_values_previous = [probability_previous.get(draw[0], 0) for draw in sorted_combined_probability]
y_values_models = [probability_models.get(draw[0], 0) for draw in sorted_combined_probability]

fig, ax = plt.subplots()
ax.bar(x_labels, y_values_previous, label='Previous Draws', alpha=0.5)
ax.bar(x_labels, y_values_models, label='Models', alpha=0.5)
ax.set_xlabel('Number')
ax.set_ylabel('Probability')
ax.set_title('Probability Distribution')
ax.legend()
plt.xticks(rotation=90)
plt.show()

((Shape of X_train_lstm: (1814, 213)
Shape of y_train_lstm: (1814,)
Shape of X_train_lstm: (1814, 213, 1)
Shape of y_train_lstm: (1814,)
Shape of X_test_lstm: (3, 213)
Shape of y_test_lstm: (3,)
Shape of X_test_lstm: (3, 213)
Shape of y_test_lstm: (3,)
2023-07-08 15:17:21.989590: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last):
File "C:\Users\Administrator\PycharmProjects\pythonProject19\Lottery prediction code final.py", line 2063, in <module>
model_lstm.fit(X_train_lstm, y_train, epochs=10, batch_size=32)
File "C:\Users\Administrator\PycharmProjects\pythonProject19\venv\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Administrator\PycharmProjects\pythonProject19\venv\Lib\site-packages\keras\src\engine\data_adapter.py", line 1105, in select_data_adapter
raise ValueError(
ValueError: Failed to find data adapter that can handle input: <class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'int'>"})

Process finished with exit code 1
))
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
474,057
Messages
2,570,443
Members
47,115
Latest member
DorothyLus

Latest Threads

Top