Skip to content

Commit 2dee46a

Browse files
Draft script
1 parent 684750c commit 2dee46a

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

scripts/net.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import csv
2+
import logging
3+
import sys
4+
import warnings
5+
from collections import Counter
6+
from pathlib import Path
7+
8+
import httpx
9+
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
10+
from ics import Calendar
11+
12+
13+
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
14+
15+
16+
def get_uris(element):
17+
uris = [normalize_uri(_.get('href')) for _ in element.find_all('a')]
18+
return uris
19+
20+
21+
minimum_year = int(sys.argv[1])
22+
23+
24+
logging.basicConfig()
25+
L = logging.getLogger('net')
26+
L.setLevel(logging.INFO)
27+
28+
29+
event_maps = []
30+
31+
32+
uri = 'https://pycon.org'
33+
text = httpx.get(uri).text
34+
L.info(f'uri = {uri}')
35+
L.info(f'text_length = {len(text)}')
36+
soup = BeautifulSoup(text, 'html.parser')
37+
38+
39+
def normalize_uri(uri):
40+
character_counts = Counter(uri)
41+
if character_counts.get('/') == 3: # noqa: PLR2004
42+
uri = uri.rstrip('/')
43+
return uri
44+
45+
46+
conference_elements = soup.select('.conference')
47+
L.info(f'event_count = {len(conference_elements)}')
48+
for conference_element in conference_elements:
49+
name = conference_element.find('h3').text
50+
location = conference_element.select_one('.location').text
51+
uris = get_uris(conference_element)
52+
event_maps.append({
53+
'Subject': name,
54+
'Location': location,
55+
'Website URL': uris[0],
56+
'Updates URL': uris[1] if len(uris) > 1 else ''})
57+
58+
59+
iframe_elements = soup.find_all('iframe')
60+
if len(iframe_elements) != 1:
61+
L.warning('more than one iframe found')
62+
sys.exit(1)
63+
ics_uris = get_uris(iframe_elements[0])
64+
65+
66+
for uri in ics_uris:
67+
text = httpx.get(uri, follow_redirects=True).text
68+
L.info(f'uri = {uri}')
69+
L.info(f'text_length = {len(text)}')
70+
c = Calendar(text)
71+
d = {_.name: _.value for _ in c.extra}
72+
if d.get('X-WR-CALNAME') == 'Python Events Calendar':
73+
break
74+
75+
76+
for event in c.events:
77+
website_uri = ''
78+
description = event.description
79+
if description:
80+
soup = BeautifulSoup(description, 'html.parser')
81+
a_element = soup.find('a')
82+
if a_element:
83+
website_uri = a_element.get('href', '')
84+
start_date = event.begin.date()
85+
if start_date.year < minimum_year:
86+
continue
87+
event_maps.append({
88+
'Subject': event.name,
89+
'Start Date': start_date,
90+
'End Date': event.end.date(),
91+
'Location': event.location,
92+
'Website URL': website_uri})
93+
94+
95+
column_names = [
96+
'Subject',
97+
'Start Date',
98+
'End Date',
99+
'Location',
100+
'Country',
101+
'Venue',
102+
'Tutorial Deadline',
103+
'Talk Deadline',
104+
'Website URL',
105+
'Proposal URL',
106+
'Sponsorship URL']
107+
with Path('conferences.csv').open('wt') as f:
108+
writer = csv.DictWriter(
109+
f,
110+
fieldnames=column_names,
111+
extrasaction='ignore')
112+
writer.writeheader()
113+
writer.writerows(event_maps)

0 commit comments

Comments
 (0)