J
Jai
my code :
#!/usr/bin/env python
from bs4 import BeautifulSoup
import re,urllib2,urlparse, MySQLdb
def get_domain(url):
return urlparse.urlparse(url).netloc
def men_tshirts2(main_link, cat_link,db,cursor):
#print main_link
for cat,link in cat_link.iteritems():
cat = str(cat)
#print cat, link
page = urllib2.urlopen(link)
soup = BeautifulSoup(page)
page.close()
item = soup.find_all("div",attrs={"class":"itemTitle"})
price = soup.find_all("div", attrs={"class":"itemPrice"})
item_list =[]
price_list =[]
seller_list =[]
for x,y in zip(item,price):
item_content=str(x.a.string)
price = str(y.p.string)
link = str(x.a.get("href"))
page =urllib2.urlopen(link)
soup = BeautifulSoup(page)
page.close()
data = soup.find_all("span", attrs={"class":"mbg-nw"})
seller = str(data[0].string)
#print cat,item_content,price,seller
gender = "men"
sql = """insert into fashion(GENDER,links,category,item_content,price,seller) VAlUES('%s','%s','%s','%s','%s','s')"""
cursor.execute(sql,(gender,main_link,cat,item_content,price,seller))
db.commit()
#except:
# db.rollback()
#print len(gender),len(main_link),len(cat),len(item_content),len(price),len(seller)
def men_tshirts(db,cursor):
main_link = "http://fashion.ebay.in/index.html#men_tshirts"
domane = get_domain(main_link)
main_page = urllib2.urlopen(main_link)
main_soup=BeautifulSoup(main_page)
main_page.close()
data = main_soup.find_all("div",attrs= {"class":"itmTitle"})
price = main_soup.find_all("span",attrs={"class":"catlblTitle"})
cat_link = {}
for x, y in zip(data, price):
#cat= str(x.a.string)+":"+str(y.string)
cat= str(x.a.string)
link= "http://"+domane+"/"+str(x.a.get("href"))
#print cat, link
cat_link[cat] = link
men_tshirts2(main_link, cat_link,db,cursor)
if __name__=="__main__":
db = MySQLdb.connect("localhost","root","india123","ebay_db" )
cursor = db.cursor()
men_tshirts(db,cursor)
db.close()
++++++++++++++++++++++++++++++++++++++++++++++++++
sql structure :-
mysql> describe fashion;
+--------------+--------------+------+-----+-------------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+--------------+--------------+------+-----+-------------------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| GENDER | varchar(6) | YES | | NULL | |
| links | varchar(255) | YES | | NULL | |
| category | varchar(255) | YES | | NULL | |
| item_content | varchar(255) | YES | | NULL | |
| price | varchar(10) | YES | | NULL | |
| seller | varchar(20) | YES | | NULL | |
| created_on | timestamp | NO | | CURRENT_TIMESTAMP | |
+--------------+--------------+------+-----+-------------------+----------------+
8 rows in set (0.00 sec)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
error:
query = query % db.literal(args)
TypeError: not all arguments converted during string formatting
#!/usr/bin/env python
from bs4 import BeautifulSoup
import re,urllib2,urlparse, MySQLdb
def get_domain(url):
return urlparse.urlparse(url).netloc
def men_tshirts2(main_link, cat_link,db,cursor):
#print main_link
for cat,link in cat_link.iteritems():
cat = str(cat)
#print cat, link
page = urllib2.urlopen(link)
soup = BeautifulSoup(page)
page.close()
item = soup.find_all("div",attrs={"class":"itemTitle"})
price = soup.find_all("div", attrs={"class":"itemPrice"})
item_list =[]
price_list =[]
seller_list =[]
for x,y in zip(item,price):
item_content=str(x.a.string)
price = str(y.p.string)
link = str(x.a.get("href"))
page =urllib2.urlopen(link)
soup = BeautifulSoup(page)
page.close()
data = soup.find_all("span", attrs={"class":"mbg-nw"})
seller = str(data[0].string)
#print cat,item_content,price,seller
gender = "men"
sql = """insert into fashion(GENDER,links,category,item_content,price,seller) VAlUES('%s','%s','%s','%s','%s','s')"""
cursor.execute(sql,(gender,main_link,cat,item_content,price,seller))
db.commit()
#except:
# db.rollback()
#print len(gender),len(main_link),len(cat),len(item_content),len(price),len(seller)
def men_tshirts(db,cursor):
main_link = "http://fashion.ebay.in/index.html#men_tshirts"
domane = get_domain(main_link)
main_page = urllib2.urlopen(main_link)
main_soup=BeautifulSoup(main_page)
main_page.close()
data = main_soup.find_all("div",attrs= {"class":"itmTitle"})
price = main_soup.find_all("span",attrs={"class":"catlblTitle"})
cat_link = {}
for x, y in zip(data, price):
#cat= str(x.a.string)+":"+str(y.string)
cat= str(x.a.string)
link= "http://"+domane+"/"+str(x.a.get("href"))
#print cat, link
cat_link[cat] = link
men_tshirts2(main_link, cat_link,db,cursor)
if __name__=="__main__":
db = MySQLdb.connect("localhost","root","india123","ebay_db" )
cursor = db.cursor()
men_tshirts(db,cursor)
db.close()
++++++++++++++++++++++++++++++++++++++++++++++++++
sql structure :-
mysql> describe fashion;
+--------------+--------------+------+-----+-------------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+--------------+--------------+------+-----+-------------------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| GENDER | varchar(6) | YES | | NULL | |
| links | varchar(255) | YES | | NULL | |
| category | varchar(255) | YES | | NULL | |
| item_content | varchar(255) | YES | | NULL | |
| price | varchar(10) | YES | | NULL | |
| seller | varchar(20) | YES | | NULL | |
| created_on | timestamp | NO | | CURRENT_TIMESTAMP | |
+--------------+--------------+------+-----+-------------------+----------------+
8 rows in set (0.00 sec)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
error:
query = query % db.literal(args)
TypeError: not all arguments converted during string formatting