-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScrap.py
More file actions
62 lines (50 loc) · 1.65 KB
/
Scrap.py
File metadata and controls
62 lines (50 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from typing import List
import requests
from bs4 import BeautifulSoup
from time import sleep
import csv
class Scrap:
@staticmethod
def get_data(base_url,number):
res = requests.get(base_url)
soup = BeautifulSoup(res.text,'html.parser')
rows = soup.find_all(class_='row')[1:]
titles = []
id = 0
url = base_url
while url:
if id == number:break
print(f'Extracting all book titles......{url}')
sleep(1)
for r in rows:
links = r.find_all('a')
for l in links:
if l.get('title') is None:
continue
titles.append(l.get('title'))
next = soup.find_all(class_='next')
if next:
url = base_url + f'catalogue/page-{id}.html'
id+=1
else:
break
print(titles)
return titles
def write_to_csv(titles:List,format:str,name:str):
header = 'Titles from site'.upper()
if format == 'csv':
titles = [[t] for t in titles]
with open(name,'w',newline='') as file:
writer = csv.writer(file)
writer.writerow([header])
writer.writerows(titles)
elif format == 'txt':
with open(name,'w') as f:
f.write(header + '\n')
for line in titles:
f.write(line+'\n')
else:
print('[INFO] csv or txt')
print(f'[INFO] Created {format} file with data')
data = Scrap.get_data('https://books.toscrape.com/?',2)
write_to_csv(data,'csv','output.csv')