11"""Module for retrieving and filtering topics from the 'Using Jenkins' category on Discourse."""
22
33import json
4+ import os
45import requests
6+ from utils import LoggerFactory
7+
8+ logger_factory = LoggerFactory .instance ()
9+ logger = logger_factory .get_logger ("collection" )
510
611BASE_URL = "https://community.jenkins.io"
712CATEGORY_SLUG = "using-jenkins"
813CATEGORY_ID = 7 # 'Using Jenkins' Category
9- OUTPUT_PATH = "../raw/discourse_topic_list.json"
14+ SCRIPT_DIR = os .path .dirname (os .path .abspath (__file__ ))
15+ OUTPUT_PATH = os .path .join (SCRIPT_DIR , ".." , "raw" , "discourse_topic_list.json" )
1016
1117
1218def fetch_page (category_slug , category_id , page ):
@@ -48,15 +54,17 @@ def get_category_topics(category_slug, category_id):
4854 explored_topics = {}
4955
5056 while True :
51- print ( f "Fetching page { page } ..." )
57+ logger . info ( "Fetching page %d ..." , page )
5258 data = fetch_page (category_slug , category_id , page )
5359 topics , more_topics_url = extract_topics (data )
5460
5561 right_category_topics , wrong_category_topics = get_wrong_and_correct_topics (topics )
5662
57- print (f"Page { page } - Found { len (topics )} topics" )
58- print (f"Right category Topics { len (right_category_topics )} "
59- f"- Wrong category Topics { len (wrong_category_topics )} " )
63+ logger .info ("Page %d - Found %d topics" , page , len (topics ))
64+ logger .info ("Right category Topics %d - Wrong category Topics %d" ,
65+ len (right_category_topics ),
66+ len (wrong_category_topics )
67+ )
6068
6169 for topic in right_category_topics :
6270 id_topic = topic ["id" ]
@@ -66,21 +74,21 @@ def get_category_topics(category_slug, category_id):
6674 explored_pages .add (page )
6775
6876 if not more_topics_url :
69- print ("No more topics to explore." )
77+ logger . info ("No more topics to explore." )
7078 break
7179
7280 # Extract the next page number from the more_topics_url
7381 try :
7482 page = int (more_topics_url .split ('page=' )[- 1 ])
7583 except (IndexError , ValueError ):
76- print ("Failed to parse next page number." )
84+ logger . error ("Failed to parse next page number." )
7785 break
7886
7987 if page in explored_pages :
80- print ( f "Already explored page { page } ." )
88+ logger . info ( "Already explored page %d." , page )
8189 break
8290
83- print ( f "Explored { len (explored_topics .keys ())} topics" )
91+ logger . info ( "Explored %d topics" , len (explored_topics .keys ()))
8492 with open (OUTPUT_PATH , "w" , encoding = "utf-8" ) as f :
8593 json .dump (explored_topics , f , ensure_ascii = False , indent = 2 )
8694
0 commit comments