-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcnn_scraper.py
More file actions
47 lines (32 loc) · 1.13 KB
/
cnn_scraper.py
File metadata and controls
47 lines (32 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import bs4
from bs4 import BeautifulSoup
import requests
import csv
from csv import writer
source = requests.get('https://edition.cnn.com/world').text
soup = BeautifulSoup(source, 'lxml')
csv_file = open('cnn_scraper.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Catagory','Headline', 'Link', 'Description'])
for para in soup.find_all('div', class_='metadata-header__top'):
catagory = para.h1.text
print(catagory)
print()
for para3 in soup.find_all('div', class_="cd__content"):
try:
headline = para3.h3.a.text
print(headline)
link = para3.find('a')['href']
print(link)
if link.split(':')[0] == 'https':
source2 = requests.get(link).text
else:
source2 = requests.get(f'https://edition.cnn.com{link}').text
soup2 = BeautifulSoup(source2, 'lxml')
for des in soup2.find_all('div', class_="pg-rail-tall__body"):
description = des.text
print(description)
print()
csv_writer.writerow([catagory, headline, link, description])
except Exception as e:
pass