Help for my project in the last minute

AjL0ra · Apr 23, 2022

hi, I have this project for umm don't know how to translate the tratamiento de la informacion in english subject, but the point is i have to code a naive-bayes and/or K-Nearest Neighbor from scrartch in any IDE without using extension or libraries, somehow I managed to code both algorithms in python but the problem is that i need to run them using the datasets proposed by our professor, and well they are BIG datasets and my code crashes due to the extent of the characters please can someone kindly help how to adpat the code i had with the large amount of dataset???
here are the codes:

1) Naive-bayes code

Python:

mp = dict()
for i in range(len(dataset)):
    row = dataset[i]
    y = row[-1]
    if (y not in mp):
        mp[y] = list()
    mp[y].append(row)
for label in mp:
    print(label)
    for row in mp[label]:
        print(row)

Python:

test = [
            [2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
            [1,37.17,4,1,7,4,5,1,0,0,1,0,280,1,0],
            [1,16.17,0.04,1,7,3,0.04,0,0,0,0,1,0,1,1],
            [1,51.83,2.04,0,0,0,1.5,0,0,0,0,1,120,2,0],
            [1,29.25,13,1,1,7,0.5,0,0,0,0,1,228,1,0],
            [1,31.08,3.085,1,7,3,2.5,0,1,2,1,1,160,42,0],
            [1,37.75,7,1,10,7,11.5,1,1,7,1,1,300,6,0],
            [1,23.5,2.75,1,0,0,4.5,0,0,0,0,1,160,26,0],
            [0,22.92,1.25,1,10,3,0.25,0,0,0,1,1,120,810,0],
            [1,28.17,0.125,0,3,3,0.085,0,0,0,0,1,216,2101,0],
            [1,42.75,4.085,1,5,3,0.04,0,0,0,0,1,108,101,0],
            [1,48.58,6.5,1,10,7,6,1,0,0,1,1,350,1,1],
            [1,39.92,5,1,2,4,0.21,0,0,0,0,1,550,1,0],
            [1,31.83,0.04,0,6,3,0.04,0,0,0,0,1,0,1,0],
            [1,34.17,1.54,1,12,3,1.54,1,1,1,1,1,520,50001,1],
            [1,33.58,2.75,1,6,3,4.25,1,1,6,0,1,204,1,1],
            [0,16.92,0.5,1,2,3,0.165,0,1,6,1,1,240,36,0],
            [0,22.42,11.25,0,13,7,0.75,1,1,4,0,1,0,322,1],
            [1,18.42,10.415,0,5,3,0.125,1,0,0,0,1,120,376,0],
            [1,34.08,6.5,1,5,3,0.125,1,0,0,1,1,443,1,0],
            [1,39.17,2.5,0,2,7,10,0,0,0,1,0,200,1,0],
            [1,29.42,1.25,1,8,3,1.75,0,0,0,0,1,200,1,0],
            [1,20,0,1,1,3,0.5,0,0,0,0,1,144,1,0],
            [0,71.58,0,1,7,3,0,0,0,0,0,2,184,1,1],
            [1,18.83,0.415,0,7,3,0.165,0,1,1,0,1,200,2,0],
            [1,43.08,0.375,0,7,3,0.375,1,1,8,1,1,300,163,1],
            [1,18.83,3.54,0,0,0,0,0,0,0,1,1,180,2,0],
            [0,21.75,1.75,0,4,2,0,0,0,0,0,1,160,1,0],
            [1,27.58,3.25,0,10,7,5.085,0,1,2,1,1,369,2,0],
            [1,29.42,1.25,1,7,7,0.25,0,1,2,1,1,400,109,0],
            [1,28.25,5.04,0,7,4,1.5,1,1,8,1,1,144,8,1],
            [1,31.57,0.375,1,1,3,0.875,1,0,0,1,0,928,1,0],
            [1,47.67,2.5,1,6,4,2.5,1,1,12,1,1,410,2511,1],
            [1,22.67,0.75,1,2,3,1.585,0,1,1,1,1,400,10,0],
            [0,24.83,4.5,1,8,3,1,0,0,0,1,1,360,7,0],
            [1,39.83,0.5,1,6,3,0.25,1,0,0,0,0,288,1,0],
            [1,49.5,7.585,1,2,4,7.585,1,1,15,1,1,0,5001,1],
            [1,39.17,1.625,1,7,3,1.5,1,1,10,0,1,186,4701,1],
            [1,34.67,1.08,1,6,3,1.165,0,0,0,0,0,28,1,0],
            [1,23.58,0.46,0,8,3,2.625,1,1,6,1,1,208,348,0],
            [2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
            [0,64.08,0.165,1,0,0,0,1,1,1,0,1,232,101,1],
            [1,42,9.79,1,13,7,7.96,1,1,8,0,1,0,1,1],
            [1,27.42,12.5,1,5,4,0.25,0,0,0,1,1,720,1,0],
            [1,16.5,0.125,1,7,3,0.165,0,0,0,0,1,132,1,0],
            [1,38.17,10.125,1,13,3,2.5,1,1,6,0,1,520,197,1],
            [1,21.08,4.125,0,2,7,0.04,0,0,0,0,1,140,101,0],
            [1,33.67,1.25,1,8,3,1.165,0,0,0,0,1,120,1,0],
            [0,28.17,0.585,1,5,3,0.04,0,0,0,0,1,260,1005,0],
            [1,20.67,0.835,0,7,3,2,0,0,0,1,0,240,1,0],
            [1,54.33,6.75,1,7,7,2.625,1,1,11,1,1,0,285,1],
            [0,17.67,0,0,4,0,0,0,0,0,0,1,86,1,0],
            [1,34,5.5,0,7,3,1.5,0,0,0,1,1,60,1,0],
            [1,29.83,3.5,1,7,3,0.165,0,0,0,0,1,216,1,0],
            [1,40.92,2.25,0,13,7,10,1,0,0,1,1,176,1,0],
            [1,25.67,12.5,1,12,3,1.21,1,1,67,1,1,140,259,1],
            [0,24.75,13.665,1,10,7,1.5,0,0,0,0,1,280,2,0],
            [1,34,4.5,1,5,3,1,1,0,0,1,1,240,1,0],
            [1,48.5,4.25,1,6,3,0.125,1,0,0,1,1,225,1,1],
            [1,33.17,3.04,0,7,7,2.04,1,1,1,1,1,180,18028,1],
            [1,28.25,5.125,1,13,3,4.75,1,1,2,0,1,420,8,1],
            [0,52.5,7,1,5,7,3,0,0,0,0,1,0,1,0],
            [0,19.17,0.585,0,5,3,0.585,1,0,0,1,1,160,1,0],
            [1,21,4.79,0,8,3,2.25,1,1,1,1,1,80,301,1],
            [1,21.17,0.25,0,7,7,0.25,0,0,0,0,1,280,205,0],
            [1,20.42,1.085,1,10,3,1.5,0,0,0,0,1,108,8,0],
            [1,27.58,2.04,0,5,3,2,1,1,3,1,1,370,561,1],
            [1,38.67,0.21,1,3,3,0.085,1,0,0,1,1,280,1,1],
            [1,30.58,2.71,0,6,3,0.125,0,0,0,1,0,80,1,0],
            [1,43.17,5,1,2,4,2.25,0,0,0,1,1,141,1,0],
            [1,18.58,10.29,1,0,0,0.415,0,0,0,0,1,80,1,0]
       ]

Python:

probYes = 1

count = 0
total = 0
for row in dataset:
    if(row[-1] == 1):
        count+=1
    total+=1
print("Total si: "+str(count)+" / "+str(total))
probYes *= count/total
for i in range(len(test)):
    count = 0
    total = 0
    for row in mp[1]:
        if(test[i] == row[i]):
            count += 1
        total += 1
    print('Para característica '+str(i+1))
    print(str(count)+" / "+str(total))
    probYes *= count/total
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
probNo = 1
count = 0
total = 0
for row in dataset:
    if(row[-1] == 0):
        count+=1
    total+=1
probNo *= count/total
print("Total no: "+str(count)+" / "+str(total))
for i in range(len(test)):
    count = 0
    total = 0
    for row in mp[0]:
        if(test[i] == row[i]):
            count += 1
        total += 1
    print('Para característica '+str(i+1))
    print(str(count)+" / "+str(total))
    probNo *= count/total

Python:

print(probYes)
print(probNo)

Python:

prob = probYes/(probYes+probNo)
print("La probabilidad es: "+str(prob*100)+"%")

2) K-nearest neighbor code

Python:

x = [i[0] for i in dataset]
y = [i[1] for i in dataset]
label = [i[2] for i in dataset]

import matplotlib.pyplot as plt

plt.scatter(x,y,c=label)
plt.show()

Python:

import math
def dist(testRow, trainRow):
    d = 0.0
    for i in range(0,len(trainRow)-1):
        d += (testRow[i]-trainRow[i])**2
    return math.sqrt(d)

Python:

print("Introduzca el punto que desea clasificar")
test = [int(i) for i in input().split()]
print("Introduzca a k")
k = int(input())

Python:

plt.scatter(x,y,c=label)
plt.scatter(test[0],test[1],c='red')
plt.show()

Python:

d = list()
for row in dataset:
    temp = dist(test,row)
    d.append((temp,row))
d.sort(key = lambda x: x[0]) 
knn = list()
print("K vecinos cercanos")
for i in range(k):
    print("punto: ("+str(d[i][1][0])+", "+str(d[i][1][1])+") con distancia: "+str(d[i][0])+" y clase: "+str(d[i][1][-1]))
    knn.append(d[i][1])

Python:

labels = [label[-1] for label in knn]
pred = max(set(labels), key=labels.count)
print('predicción: '+str(pred))

the file is the dataset for both codes

Trouble with prediction code, for the life of me I can't figure out why it isnt running properly. Help would be appreciated.	0	Jul 8, 2023
I Need Fix In Code	1	Apr 12, 2023
Please critique my code for fun learning project.	5	Jul 21, 2023
Need help with this script	4	Mar 12, 2023
Trying to build a SARIMAX model to forecast the S&P500 trend	0	Nov 5, 2023
How to use Densenet121 in monai	0	Feb 16, 2024
Help with code	2	Oct 11, 2022
Need help with this Python code.	2	Jun 13, 2023

Help for my project in the last minute

AjL0ra

Attachments

Ask a Question

Similar Threads

Members online

Forum statistics

Latest Threads