Help for my project in the last minute

Joined
Apr 23, 2022
Messages
1
Reaction score
0
hi, I have this project for umm don't know how to translate the tratamiento de la informacion in english subject, but the point is i have to code a naive-bayes and/or K-Nearest Neighbor from scrartch in any IDE without using extension or libraries, somehow I managed to code both algorithms in python but the problem is that i need to run them using the datasets proposed by our professor, and well they are BIG datasets and my code crashes due to the extent of the characters please can someone kindly help how to adpat the code i had with the large amount of dataset???
here are the codes:


1) Naive-bayes code
Python:
mp = dict()
for i in range(len(dataset)):
    row = dataset[i]
    y = row[-1]
    if (y not in mp):
        mp[y] = list()
    mp[y].append(row)
for label in mp:
    print(label)
    for row in mp[label]:
        print(row)
Python:
test = [
            [2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
            [1,37.17,4,1,7,4,5,1,0,0,1,0,280,1,0],
            [1,16.17,0.04,1,7,3,0.04,0,0,0,0,1,0,1,1],
            [1,51.83,2.04,0,0,0,1.5,0,0,0,0,1,120,2,0],
            [1,29.25,13,1,1,7,0.5,0,0,0,0,1,228,1,0],
            [1,31.08,3.085,1,7,3,2.5,0,1,2,1,1,160,42,0],
            [1,37.75,7,1,10,7,11.5,1,1,7,1,1,300,6,0],
            [1,23.5,2.75,1,0,0,4.5,0,0,0,0,1,160,26,0],
            [0,22.92,1.25,1,10,3,0.25,0,0,0,1,1,120,810,0],
            [1,28.17,0.125,0,3,3,0.085,0,0,0,0,1,216,2101,0],
            [1,42.75,4.085,1,5,3,0.04,0,0,0,0,1,108,101,0],
            [1,48.58,6.5,1,10,7,6,1,0,0,1,1,350,1,1],
            [1,39.92,5,1,2,4,0.21,0,0,0,0,1,550,1,0],
            [1,31.83,0.04,0,6,3,0.04,0,0,0,0,1,0,1,0],
            [1,34.17,1.54,1,12,3,1.54,1,1,1,1,1,520,50001,1],
            [1,33.58,2.75,1,6,3,4.25,1,1,6,0,1,204,1,1],
            [0,16.92,0.5,1,2,3,0.165,0,1,6,1,1,240,36,0],
            [0,22.42,11.25,0,13,7,0.75,1,1,4,0,1,0,322,1],
            [1,18.42,10.415,0,5,3,0.125,1,0,0,0,1,120,376,0],
            [1,34.08,6.5,1,5,3,0.125,1,0,0,1,1,443,1,0],
            [1,39.17,2.5,0,2,7,10,0,0,0,1,0,200,1,0],
            [1,29.42,1.25,1,8,3,1.75,0,0,0,0,1,200,1,0],
            [1,20,0,1,1,3,0.5,0,0,0,0,1,144,1,0],
            [0,71.58,0,1,7,3,0,0,0,0,0,2,184,1,1],
            [1,18.83,0.415,0,7,3,0.165,0,1,1,0,1,200,2,0],
            [1,43.08,0.375,0,7,3,0.375,1,1,8,1,1,300,163,1],
            [1,18.83,3.54,0,0,0,0,0,0,0,1,1,180,2,0],
            [0,21.75,1.75,0,4,2,0,0,0,0,0,1,160,1,0],
            [1,27.58,3.25,0,10,7,5.085,0,1,2,1,1,369,2,0],
            [1,29.42,1.25,1,7,7,0.25,0,1,2,1,1,400,109,0],
            [1,28.25,5.04,0,7,4,1.5,1,1,8,1,1,144,8,1],
            [1,31.57,0.375,1,1,3,0.875,1,0,0,1,0,928,1,0],
            [1,47.67,2.5,1,6,4,2.5,1,1,12,1,1,410,2511,1],
            [1,22.67,0.75,1,2,3,1.585,0,1,1,1,1,400,10,0],
            [0,24.83,4.5,1,8,3,1,0,0,0,1,1,360,7,0],
            [1,39.83,0.5,1,6,3,0.25,1,0,0,0,0,288,1,0],
            [1,49.5,7.585,1,2,4,7.585,1,1,15,1,1,0,5001,1],
            [1,39.17,1.625,1,7,3,1.5,1,1,10,0,1,186,4701,1],
            [1,34.67,1.08,1,6,3,1.165,0,0,0,0,0,28,1,0],
            [1,23.58,0.46,0,8,3,2.625,1,1,6,1,1,208,348,0],
            [2,0,0,2,14,9,0,2,2,0,2,3,0,0,2],
            [0,64.08,0.165,1,0,0,0,1,1,1,0,1,232,101,1],
            [1,42,9.79,1,13,7,7.96,1,1,8,0,1,0,1,1],
            [1,27.42,12.5,1,5,4,0.25,0,0,0,1,1,720,1,0],
            [1,16.5,0.125,1,7,3,0.165,0,0,0,0,1,132,1,0],
            [1,38.17,10.125,1,13,3,2.5,1,1,6,0,1,520,197,1],
            [1,21.08,4.125,0,2,7,0.04,0,0,0,0,1,140,101,0],
            [1,33.67,1.25,1,8,3,1.165,0,0,0,0,1,120,1,0],
            [0,28.17,0.585,1,5,3,0.04,0,0,0,0,1,260,1005,0],
            [1,20.67,0.835,0,7,3,2,0,0,0,1,0,240,1,0],
            [1,54.33,6.75,1,7,7,2.625,1,1,11,1,1,0,285,1],
            [0,17.67,0,0,4,0,0,0,0,0,0,1,86,1,0],
            [1,34,5.5,0,7,3,1.5,0,0,0,1,1,60,1,0],
            [1,29.83,3.5,1,7,3,0.165,0,0,0,0,1,216,1,0],
            [1,40.92,2.25,0,13,7,10,1,0,0,1,1,176,1,0],
            [1,25.67,12.5,1,12,3,1.21,1,1,67,1,1,140,259,1],
            [0,24.75,13.665,1,10,7,1.5,0,0,0,0,1,280,2,0],
            [1,34,4.5,1,5,3,1,1,0,0,1,1,240,1,0],
            [1,48.5,4.25,1,6,3,0.125,1,0,0,1,1,225,1,1],
            [1,33.17,3.04,0,7,7,2.04,1,1,1,1,1,180,18028,1],
            [1,28.25,5.125,1,13,3,4.75,1,1,2,0,1,420,8,1],
            [0,52.5,7,1,5,7,3,0,0,0,0,1,0,1,0],
            [0,19.17,0.585,0,5,3,0.585,1,0,0,1,1,160,1,0],
            [1,21,4.79,0,8,3,2.25,1,1,1,1,1,80,301,1],
            [1,21.17,0.25,0,7,7,0.25,0,0,0,0,1,280,205,0],
            [1,20.42,1.085,1,10,3,1.5,0,0,0,0,1,108,8,0],
            [1,27.58,2.04,0,5,3,2,1,1,3,1,1,370,561,1],
            [1,38.67,0.21,1,3,3,0.085,1,0,0,1,1,280,1,1],
            [1,30.58,2.71,0,6,3,0.125,0,0,0,1,0,80,1,0],
            [1,43.17,5,1,2,4,2.25,0,0,0,1,1,141,1,0],
            [1,18.58,10.29,1,0,0,0.415,0,0,0,0,1,80,1,0]
       ]
Python:
probYes = 1

count = 0
total = 0
for row in dataset:
    if(row[-1] == 1):
        count+=1
    total+=1
print("Total si: "+str(count)+" / "+str(total))
probYes *= count/total
for i in range(len(test)):
    count = 0
    total = 0
    for row in mp[1]:
        if(test[i] == row[i]):
            count += 1
        total += 1
    print('Para característica '+str(i+1))
    print(str(count)+" / "+str(total))
    probYes *= count/total
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
probNo = 1
count = 0
total = 0
for row in dataset:
    if(row[-1] == 0):
        count+=1
    total+=1
probNo *= count/total
print("Total no: "+str(count)+" / "+str(total))
for i in range(len(test)):
    count = 0
    total = 0
    for row in mp[0]:
        if(test[i] == row[i]):
            count += 1
        total += 1
    print('Para característica '+str(i+1))
    print(str(count)+" / "+str(total))
    probNo *= count/total
Python:
print(probYes)
print(probNo)
Python:
prob = probYes/(probYes+probNo)
print("La probabilidad es: "+str(prob*100)+"%")
2) K-nearest neighbor code
Python:
x = [i[0] for i in dataset]
y = [i[1] for i in dataset]
label = [i[2] for i in dataset]

import matplotlib.pyplot as plt

plt.scatter(x,y,c=label)
plt.show()
Python:
import math
def dist(testRow, trainRow):
    d = 0.0
    for i in range(0,len(trainRow)-1):
        d += (testRow[i]-trainRow[i])**2
    return math.sqrt(d)
Python:
print("Introduzca el punto que desea clasificar")
test = [int(i) for i in input().split()]
print("Introduzca a k")
k = int(input())
Python:
plt.scatter(x,y,c=label)
plt.scatter(test[0],test[1],c='red')
plt.show()
Python:
d = list()
for row in dataset:
    temp = dist(test,row)
    d.append((temp,row))
d.sort(key = lambda x: x[0]) 
knn = list()
print("K vecinos cercanos")
for i in range(k):
    print("punto: ("+str(d[i][1][0])+", "+str(d[i][1][1])+") con distancia: "+str(d[i][0])+" y clase: "+str(d[i][1][-1]))
    knn.append(d[i][1])
Python:
labels = [label[-1] for label in knn]
pred = max(set(labels), key=labels.count)
print('predicción: '+str(pred))
the file is the dataset for both codes
 

Attachments

  • sb1-T.txt
    88 KB · Views: 12

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

Forum statistics

Threads
473,913
Messages
2,570,027
Members
46,420
Latest member
IsobelScha

Latest Threads

Top