B_CGPT/Python - Media Library Scanner with Threading.txt at main · Hablakes/B_CGPT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import concurrent.futures
import threading
import requests
import re
import os

# A list to store the results of the tasks
results = []

# A lock to synchronize access to the results list
lock = threading.Lock()

def process_url(url, lock):
    # Do some work with the URL
    response = requests.get(url)
    html = response.text

    # Extract the title and description from the HTML
    title = re.search(r"<title>(.+?)</title>", html).group(1)
    description = re.search(r'<meta name="description" content="(.+?)">', html).group(1)

    # Store the result in the results list
    result = "{}: {}\n{}\n".format(url, title, description)
    with lock:
        results.append(result)

def main():
    # Create a thread pool with 4 worker threads
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        # Submit the process_url() function as a task for each URL
        for url in urls:
            executor.submit(process_url, url, lock)

    # Write the results to the results.txt file
    with open("results.txt", "w") as f:
        for result in results:
            f.write(result)

if __name__ == "__main__":
    main()