1+ # Script created in Python 3.10.8 using only standard libraries
2+ import argparse
3+ import os
4+ import json
5+ import shutil
6+ import time
7+ from pathlib import Path
8+
9+ # Searches for "metadata.json" files within the inputed directory
10+ # Returns a dictionary of dictionaries (output of function get_query_info)
11+ def get_meta_data_and_tests (input_path : str , metadata_file = 'metadata.json' ):
12+ all_metadata = Path (input_path ).rglob (metadata_file )
13+ queries_data = {}
14+
15+ for path in all_metadata :
16+ query_info = get_query_info (os .path .dirname (path ), metadata_file )
17+ query_id = query_info .get ("id" )
18+ if not query_id : continue
19+ queries_data [query_id ] = query_info
20+
21+ return queries_data
22+
23+ # Creates a dictionary with the query information present in the "./metadata.json" file and the "./test" directory
24+ def get_query_info (query_path : str , metadata_file = 'metadata.json' , test_extensions = (".json" , ".yaml" , ".tf" , ".dockerfile" )):
25+ metadata_file_path = os .path .join (query_path , metadata_file )
26+
27+ if not os .path .exists (metadata_file_path ):
28+ raise FileNotFoundError (f"\033 [31mFile { metadata_file } doesn't exist in { query_path } \033 [0m" )
29+
30+ # Get information inside the "./metadata.json" file
31+ with open (metadata_file_path , 'r' ) as f :
32+ query_metadata_content = json .loads (f .read ())
33+
34+ # Find the index of the folder name in the directory path
35+ folder_index = metadata_file_path .rfind ("assets\queries" )
36+ # If the folder name is found, return everything after it
37+ if folder_index != - 1 :
38+ path_after_folder = metadata_file_path [folder_index :]
39+ else :
40+ path_after_folder = metadata_file_path
41+ query_metadata_content ['githubUrl' ] = f"https://github.com/Checkmarx/kics/tree/master/{ os .path .dirname (path_after_folder )} "
42+
43+ test_folder_path = os .path .join (query_path , 'test' )
44+ if not os .path .isdir (test_folder_path ):
45+ raise FileNotFoundError (f"\033 [31mFolder { test_folder_path } doesn't exist in { query_path } \033 [0m" )
46+
47+ expected_result_file_path = os .path .join (test_folder_path , 'positive_expected_result.json' )
48+ if not os .path .exists (expected_result_file_path ):
49+ raise FileNotFoundError (f"\033 [31mFile { expected_result_file_path } doesn't exist in { test_folder_path } \033 [0m" )
50+
51+ with open (expected_result_file_path ) as f :
52+ expected_results = json .loads (f .read ())
53+
54+ true_positives = []
55+ true_negatives = []
56+ count_positives = 0
57+ count_negatives = 0
58+ valid_tests = True
59+
60+ # Get information inside the "./test" folder
61+ while valid_tests :
62+ all_tests_saved = True
63+ if count_positives == 0 :
64+ positive = "positive"
65+ negative = "negative"
66+ else :
67+ positive = f"positive{ count_positives } "
68+ negative = f"negative{ count_negatives } "
69+
70+ for extension in test_extensions :
71+ # Positive tests
72+ positive_file_path = os .path .join (query_path , "test" , positive + extension )
73+ if os .path .exists (positive_file_path ):
74+
75+ with open (positive_file_path , "r" ) as f :
76+ test_content = f .read ()
77+
78+ if count_positives == 0 :
79+ test_results = expected_results
80+ else :
81+ test_results = [
82+ f
83+ for f in expected_results
84+ if ((f .get ("fileName" )
85+ or f .get ("filename" )
86+ or f .get ("file" )) == positive + extension )
87+ ]
88+ lines = list (set ([item ["line" ] for item in test_results ]))
89+ positive_test = dict (fileName = positive + extension , lines = lines , code = test_content )
90+ true_positives .append (positive_test )
91+ all_tests_saved = False
92+
93+ # Negative tests
94+ negative_file_path = os .path .join (query_path , "test" , negative + extension )
95+ if os .path .exists (negative_file_path ):
96+
97+ with open (negative_file_path , "r" ) as f :
98+ test_content = f .read ()
99+
100+ negative_test = dict (fileName = negative + extension , code = test_content )
101+ true_negatives .append (negative_test )
102+ all_tests_saved = False
103+
104+ if all_tests_saved and count_positives != 0 and count_negatives != 0 :
105+ valid_tests = False
106+ else :
107+ count_positives += 1
108+ count_negatives += 1
109+
110+ query_metadata_content ['true_positives' ] = true_positives
111+ query_metadata_content ['true_negatives' ] = true_negatives
112+ return query_metadata_content
113+
114+ # Utility for generate ".md" documentation
115+ def format_negative_tests (format_negative_tests : dict ) -> str :
116+ result = ''
117+
118+ for idx , x in enumerate (format_negative_tests ):
119+ filename = x .get ('fileName' )
120+ extension = filename .split ("." )[- 1 ]
121+ title = f'Negative test num. { idx + 1 } - { extension } file'
122+ code = x .get ('code' )
123+
124+ # If the query has more than 3 tests, the remaining tests are placed in a drop down
125+ if idx <= 2 :
126+ result += f'```{ extension } title="{ title } "\n { code } \n ```\n '
127+ else :
128+ result += f"<details><summary>{ title } </summary>\n \n "
129+ result += f'```{ extension } \n { code } \n ```\n '
130+ result += "</details>\n "
131+
132+ return result
133+
134+ # Utility for generate ".md" documentation
135+ def format_positive_tests (positive_tests : dict ) -> str :
136+ result = ''
137+
138+ for idx , x in enumerate (positive_tests ):
139+ filename = x .get ('fileName' )
140+ extension = filename .split ("." )[- 1 ]
141+ title = f'Postitive test num. { idx + 1 } - { extension } file'
142+ code = x .get ('code' )
143+
144+ results_lines = ''
145+ results_lines_arr = x .get ('lines' )
146+ results_lines_len = len (results_lines_arr )
147+
148+ if results_lines_len > 0 :
149+ results_lines += 'hl_lines="'
150+ for idy , y in enumerate (results_lines_arr ):
151+ if idy + 1 >= results_lines_len :
152+ results_lines += str (y )
153+ else :
154+ results_lines += f"{ str (y )} "
155+ results_lines += '"'
156+
157+ # If the query has more than 3 tests, the remaining tests are placed in a drop down
158+ if idx <= 2 :
159+ result += f'```{ extension } title="{ title } " { results_lines } \n { code } \n ```\n '
160+ else :
161+ result += f"<details><summary>{ title } </summary>\n \n "
162+ result += f'```{ extension } { results_lines } \n { code } \n ```\n '
163+ result += "</details>\n "
164+
165+ return result
166+
167+ # Utility for generate ".md" documentation
168+ def format_severity (severity : str ) -> str :
169+ colors = {'High' : '#C00' , 'Medium' : '#C60' , 'Low' : '#CC0' , 'Info' : '#00C' , 'Trace' : '#CCC' }
170+ severity = severity .capitalize ()
171+ color = colors .get (severity )
172+ return f'<span style="color:{ color } ">{ severity } </span>'
173+
174+ # Generates a ".md" file for each query
175+ def generate_md_docs (queries_database : str , output_path : str , template_file_path = 'template.md' , delete_folders : bool = False ):
176+ # Ensure that we are deleting old files generated by this script
177+ if (delete_folders ):
178+ platforms = {f"{ value .get ('platform' ).lower ()} -queries"
179+ for value in queries_database .values ()
180+ if value .get ("platform" ) is not None }
181+
182+ # Get a list of all the folders in the path
183+ folders = [folder for folder in os .listdir (output_path ) if os .path .isdir (os .path .join (output_path , folder ))]
184+
185+ # Iterate over each folder and check if its name matches any of the platforms
186+ for folder in folders :
187+ if folder .lower () in platforms :
188+ folder_path = os .path .join (output_path , folder )
189+ shutil .rmtree (folder_path ) # Delete the folder and all its contents
190+
191+ if not os .path .exists (template_file_path ):
192+ raise FileNotFoundError ("\033 [31mtemplate_path doesn't exist in the operating system\033 [0m" )
193+
194+ with open (template_file_path , 'r' ) as f :
195+ doc_template = f .read ()
196+
197+ for key , query_data in queries_database .items ():
198+ query_doc = doc_template
199+ query_doc = doc_template .replace ("<QUERY_ID>" , key ).replace (
200+ "<QUERY_NAME>" , query_data .get ('queryName' )).replace (
201+ "<PLATFORM>" , query_data .get ('platform' )).replace (
202+ "<SEVERITY>" , format_severity (query_data .get ('severity' ))).replace (
203+ "<CATEGORY>" , query_data .get ('category' )).replace (
204+ "<GITHUB_URL>" , query_data .get ('githubUrl' )).replace (
205+ "<DESCRIPTION_TEXT>" , query_data .get ('descriptionText' )).replace (
206+ "<DESCRIPTION_URL>" , query_data .get ('descriptionUrl' )).replace (
207+ "<POSITIVE_TESTS>" , format_positive_tests (query_data .get ('true_positives' ))).replace (
208+ "<NEGATIVE_TESTS>" , format_negative_tests (query_data .get ('true_negatives' )))
209+
210+ platform_folder_path = os .path .join (output_path ,
211+ f"{ query_data .get ('platform' ).lower ()} -queries" ,
212+ query_data .get ('cloudProvider' , '' ).lower ())
213+ if not os .path .exists (platform_folder_path ):
214+ os .makedirs (platform_folder_path )
215+
216+ # If you are having problems rendering the ".md" pages, try adding encoding='utf-8' as a parameter of "open" function invocation
217+ with open (f"{ os .path .join (platform_folder_path , key )} .md" , "w" ) as f :
218+ f .write (query_doc )
219+
220+ # Export a dictionary to a "json" file
221+ def export_to_json (queries_database : dict , output_path : str ):
222+ with open (os .path .join (output_path , "queries_database.json" ), "w" ) as f :
223+ json .dump (queries_database , f , indent = 4 )
224+
225+ def main ():
226+ start_time = time .time ()
227+
228+ # Script arguments
229+ parser = argparse .ArgumentParser (description = "Create/Update documentation page for each query" )
230+ parser .add_argument ('-p' , type = Path , dest = 'input_path' ,
231+ help = 'Folder path to read "metadata.json".' , required = True )
232+ parser .add_argument ('-o' , type = Path , dest = 'output_path' ,
233+ help = 'Folder path to output documentation files.' , required = True )
234+ parser .add_argument ('-f' , type = str , dest = 'output_format' , choices = ['json' , 'md' ],
235+ help = 'Documentation formats to be created, this script only supports "json" and "md".' , required = True )
236+ parser .add_argument ('--t' , type = Path , dest = 'template_path' ,
237+ help = 'Template file path.' )
238+ parser .add_argument ('--df' , dest = 'delete_folders' , action = 'store_true' ,
239+ help = 'If specified, delete all folders in the specified output_path that match the platform names.' )
240+
241+ args = parser .parse_args ()
242+
243+ # Validating optional arguments
244+ output_format = args .output_format
245+ if output_format == 'md' and (args .template_path is None ):
246+ parser .error ("-f json requires --t" )
247+
248+ input_path = args .input_path
249+ if not input_path .exists (): raise FileNotFoundError ("\033 [31minput_path doesn't exist in the operating system\033 [0m" )
250+
251+ output_path = args .output_path
252+
253+ # Get queries information
254+ queries_database = get_meta_data_and_tests (str (input_path ))
255+
256+ # The output of this script depends on the output_format
257+ if output_format == 'json' :
258+ export_to_json (queries_database , str (output_path ))
259+ print ("-->\033 [32m JSON file with all queries information created/updated successfully\033 [0m" )
260+
261+ elif output_format == 'md' :
262+ template_path = args .template_path
263+ if not template_path .exists (): raise FileNotFoundError ("\033 [31mtemplate_path doesn't exist in the operating system\033 [0m" )
264+
265+ generate_md_docs (queries_database , str (output_path ), str (template_path ), args .delete_folders )
266+ print ("-->\033 [32m Documentation .md pages for each query created/updated successfully\033 [0m" )
267+
268+ end_time = time .time ()
269+ elapsed_time = end_time - start_time
270+ print (f"-->\033 [34m Elapsed time: { round (elapsed_time , 2 )} seconds\033 [0m" )
271+
272+ if __name__ == "__main__" :
273+ main ()
0 commit comments