Skip to content

Commit a5c8d52

Browse files
authored
Merge pull request #6313 from cx-ruiaraujo/Feature/Add_Query_Documentation_Generator
feat(documentation): add query page generator
2 parents ce0e794 + b5fe6cb commit a5c8d52

14 files changed

Lines changed: 474 additions & 11 deletions

File tree

.github/scripts/docs-generator/docs-generator.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def check_and_create_override_entry(meta_dict, template_dict):
9191

9292
check_and_create_override_entry(meta_dict, template_dict)
9393
q_id = meta_dict['id']
94+
query_page = os.path.join('..', f"{meta_dict.get('platform').lower()}-queries", meta_dict.get('cloudProvider', '').lower(), meta_dict.get('id'))
95+
meta_dict['descriptionText'] += f' (<a href="{query_page}" target="_blank">read more</a>)'
9496
template_dict[platform][sub_platform][severity][category][q_id] = meta_dict
9597
#
9698
# template dict ex:
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
# Script created in Python 3.10.8 using only standard libraries
2+
import argparse
3+
import os
4+
import json
5+
import shutil
6+
import time
7+
from pathlib import Path
8+
9+
# Searches for "metadata.json" files within the inputed directory
10+
# Returns a dictionary of dictionaries (output of function get_query_info)
11+
def get_meta_data_and_tests(input_path : str, metadata_file = 'metadata.json'):
12+
all_metadata = Path(input_path).rglob(metadata_file)
13+
queries_data = {}
14+
15+
for path in all_metadata:
16+
query_info = get_query_info(os.path.dirname(path), metadata_file)
17+
query_id = query_info.get("id")
18+
if not query_id: continue
19+
queries_data[query_id] = query_info
20+
21+
return queries_data
22+
23+
# Creates a dictionary with the query information present in the "./metadata.json" file and the "./test" directory
24+
def get_query_info(query_path : str, metadata_file = 'metadata.json', test_extensions = (".json", ".yaml", ".tf", ".dockerfile")):
25+
metadata_file_path = os.path.join(query_path, metadata_file)
26+
27+
if not os.path.exists(metadata_file_path):
28+
raise FileNotFoundError(f"\033[31mFile {metadata_file} doesn't exist in {query_path}\033[0m")
29+
30+
# Get information inside the "./metadata.json" file
31+
with open(metadata_file_path, 'r') as f:
32+
query_metadata_content = json.loads(f.read())
33+
34+
# Find the index of the folder name in the directory path
35+
folder_index = metadata_file_path.rfind("assets\queries")
36+
# If the folder name is found, return everything after it
37+
if folder_index != -1:
38+
path_after_folder = metadata_file_path[folder_index:]
39+
else:
40+
path_after_folder = metadata_file_path
41+
query_metadata_content['githubUrl'] = f"https://github.com/Checkmarx/kics/tree/master/{os.path.dirname(path_after_folder)}"
42+
43+
test_folder_path = os.path.join(query_path, 'test')
44+
if not os.path.isdir(test_folder_path):
45+
raise FileNotFoundError(f"\033[31mFolder {test_folder_path} doesn't exist in {query_path}\033[0m")
46+
47+
expected_result_file_path = os.path.join(test_folder_path, 'positive_expected_result.json')
48+
if not os.path.exists(expected_result_file_path):
49+
raise FileNotFoundError(f"\033[31mFile {expected_result_file_path} doesn't exist in {test_folder_path}\033[0m")
50+
51+
with open(expected_result_file_path) as f:
52+
expected_results = json.loads(f.read())
53+
54+
true_positives = []
55+
true_negatives = []
56+
count_positives = 0
57+
count_negatives = 0
58+
valid_tests = True
59+
60+
# Get information inside the "./test" folder
61+
while valid_tests:
62+
all_tests_saved = True
63+
if count_positives == 0:
64+
positive = "positive"
65+
negative = "negative"
66+
else:
67+
positive = f"positive{count_positives}"
68+
negative = f"negative{count_negatives}"
69+
70+
for extension in test_extensions:
71+
# Positive tests
72+
positive_file_path = os.path.join(query_path, "test", positive + extension)
73+
if os.path.exists(positive_file_path):
74+
75+
with open(positive_file_path, "r") as f:
76+
test_content = f.read()
77+
78+
if count_positives == 0:
79+
test_results = expected_results
80+
else:
81+
test_results = [
82+
f
83+
for f in expected_results
84+
if ((f.get("fileName")
85+
or f.get("filename")
86+
or f.get("file")) == positive + extension)
87+
]
88+
lines = list(set([item["line"] for item in test_results]))
89+
positive_test = dict(fileName=positive + extension, lines=lines, code=test_content)
90+
true_positives.append(positive_test)
91+
all_tests_saved = False
92+
93+
# Negative tests
94+
negative_file_path = os.path.join(query_path, "test", negative + extension)
95+
if os.path.exists(negative_file_path):
96+
97+
with open(negative_file_path, "r") as f:
98+
test_content = f.read()
99+
100+
negative_test = dict(fileName=negative + extension, code=test_content)
101+
true_negatives.append(negative_test)
102+
all_tests_saved = False
103+
104+
if all_tests_saved and count_positives != 0 and count_negatives != 0:
105+
valid_tests = False
106+
else:
107+
count_positives += 1
108+
count_negatives += 1
109+
110+
query_metadata_content['true_positives'] = true_positives
111+
query_metadata_content['true_negatives'] = true_negatives
112+
return query_metadata_content
113+
114+
# Utility for generate ".md" documentation
115+
def format_negative_tests(format_negative_tests : dict) -> str:
116+
result = ''
117+
118+
for idx, x in enumerate(format_negative_tests):
119+
filename = x.get('fileName')
120+
extension = filename.split(".")[-1]
121+
title = f'Negative test num. {idx + 1} - {extension} file'
122+
code = x.get('code')
123+
124+
# If the query has more than 3 tests, the remaining tests are placed in a drop down
125+
if idx <= 2:
126+
result += f'```{extension} title="{title}"\n{code}\n```\n'
127+
else:
128+
result += f"<details><summary>{title}</summary>\n\n"
129+
result += f'```{extension}\n{code}\n```\n'
130+
result += "</details>\n"
131+
132+
return result
133+
134+
# Utility for generate ".md" documentation
135+
def format_positive_tests(positive_tests : dict) -> str:
136+
result = ''
137+
138+
for idx, x in enumerate(positive_tests):
139+
filename = x.get('fileName')
140+
extension = filename.split(".")[-1]
141+
title = f'Postitive test num. {idx + 1} - {extension} file'
142+
code = x.get('code')
143+
144+
results_lines = ''
145+
results_lines_arr = x.get('lines')
146+
results_lines_len = len(results_lines_arr)
147+
148+
if results_lines_len > 0:
149+
results_lines += 'hl_lines="'
150+
for idy, y in enumerate(results_lines_arr):
151+
if idy + 1 >= results_lines_len:
152+
results_lines += str(y)
153+
else:
154+
results_lines += f"{str(y)} "
155+
results_lines += '"'
156+
157+
# If the query has more than 3 tests, the remaining tests are placed in a drop down
158+
if idx <= 2:
159+
result += f'```{extension} title="{title}" {results_lines}\n{code}\n```\n'
160+
else:
161+
result += f"<details><summary>{title}</summary>\n\n"
162+
result += f'```{extension} {results_lines}\n{code}\n```\n'
163+
result += "</details>\n"
164+
165+
return result
166+
167+
# Utility for generate ".md" documentation
168+
def format_severity(severity : str) -> str:
169+
colors = {'High': '#C00', 'Medium': '#C60', 'Low': '#CC0', 'Info': '#00C', 'Trace': '#CCC'}
170+
severity = severity.capitalize()
171+
color = colors.get(severity)
172+
return f'<span style="color:{color}">{severity}</span>'
173+
174+
# Generates a ".md" file for each query
175+
def generate_md_docs(queries_database : str, output_path : str, template_file_path = 'template.md', delete_folders : bool = False):
176+
# Ensure that we are deleting old files generated by this script
177+
if (delete_folders):
178+
platforms = {f"{value.get('platform').lower()}-queries"
179+
for value in queries_database.values()
180+
if value.get("platform") is not None}
181+
182+
# Get a list of all the folders in the path
183+
folders = [folder for folder in os.listdir(output_path) if os.path.isdir(os.path.join(output_path, folder))]
184+
185+
# Iterate over each folder and check if its name matches any of the platforms
186+
for folder in folders:
187+
if folder.lower() in platforms:
188+
folder_path = os.path.join(output_path, folder)
189+
shutil.rmtree(folder_path) # Delete the folder and all its contents
190+
191+
if not os.path.exists(template_file_path):
192+
raise FileNotFoundError("\033[31mtemplate_path doesn't exist in the operating system\033[0m")
193+
194+
with open(template_file_path, 'r') as f:
195+
doc_template = f.read()
196+
197+
for key, query_data in queries_database.items():
198+
query_doc = doc_template
199+
query_doc = doc_template.replace("<QUERY_ID>", key).replace(
200+
"<QUERY_NAME>", query_data.get('queryName')).replace(
201+
"<PLATFORM>", query_data.get('platform')).replace(
202+
"<SEVERITY>", format_severity(query_data.get('severity'))).replace(
203+
"<CATEGORY>", query_data.get('category')).replace(
204+
"<GITHUB_URL>", query_data.get('githubUrl')).replace(
205+
"<DESCRIPTION_TEXT>", query_data.get('descriptionText')).replace(
206+
"<DESCRIPTION_URL>", query_data.get('descriptionUrl')).replace(
207+
"<POSITIVE_TESTS>", format_positive_tests(query_data.get('true_positives'))).replace(
208+
"<NEGATIVE_TESTS>", format_negative_tests(query_data.get('true_negatives')))
209+
210+
platform_folder_path = os.path.join(output_path,
211+
f"{query_data.get('platform').lower()}-queries",
212+
query_data.get('cloudProvider', '').lower())
213+
if not os.path.exists(platform_folder_path):
214+
os.makedirs(platform_folder_path)
215+
216+
# If you are having problems rendering the ".md" pages, try adding encoding='utf-8' as a parameter of "open" function invocation
217+
with open(f"{os.path.join(platform_folder_path, key)}.md", "w") as f:
218+
f.write(query_doc)
219+
220+
# Export a dictionary to a "json" file
221+
def export_to_json(queries_database : dict, output_path : str):
222+
with open(os.path.join(output_path, "queries_database.json"), "w") as f:
223+
json.dump(queries_database, f, indent=4)
224+
225+
def main():
226+
start_time = time.time()
227+
228+
# Script arguments
229+
parser = argparse.ArgumentParser(description="Create/Update documentation page for each query")
230+
parser.add_argument('-p', type=Path, dest='input_path',
231+
help='Folder path to read "metadata.json".', required=True)
232+
parser.add_argument('-o', type=Path, dest='output_path',
233+
help='Folder path to output documentation files.', required=True)
234+
parser.add_argument('-f', type=str, dest='output_format', choices=['json', 'md'],
235+
help='Documentation formats to be created, this script only supports "json" and "md".', required=True)
236+
parser.add_argument('--t', type=Path, dest='template_path',
237+
help='Template file path.')
238+
parser.add_argument('--df', dest='delete_folders', action='store_true',
239+
help='If specified, delete all folders in the specified output_path that match the platform names.')
240+
241+
args = parser.parse_args()
242+
243+
# Validating optional arguments
244+
output_format = args.output_format
245+
if output_format == 'md' and (args.template_path is None):
246+
parser.error("-f json requires --t")
247+
248+
input_path = args.input_path
249+
if not input_path.exists(): raise FileNotFoundError("\033[31minput_path doesn't exist in the operating system\033[0m")
250+
251+
output_path = args.output_path
252+
253+
# Get queries information
254+
queries_database = get_meta_data_and_tests(str(input_path))
255+
256+
# The output of this script depends on the output_format
257+
if output_format == 'json':
258+
export_to_json(queries_database, str(output_path))
259+
print("-->\033[32m JSON file with all queries information created/updated successfully\033[0m")
260+
261+
elif output_format == 'md':
262+
template_path = args.template_path
263+
if not template_path.exists(): raise FileNotFoundError("\033[31mtemplate_path doesn't exist in the operating system\033[0m")
264+
265+
generate_md_docs(queries_database, str(output_path), str(template_path), args.delete_folders)
266+
print("-->\033[32m Documentation .md pages for each query created/updated successfully\033[0m")
267+
268+
end_time = time.time()
269+
elapsed_time = end_time - start_time
270+
print(f"-->\033[34m Elapsed time: {round(elapsed_time, 2)} seconds\033[0m")
271+
272+
if __name__ == "__main__":
273+
main()
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
title: <QUERY_NAME>
3+
hide:
4+
toc: true
5+
navigation: true
6+
---
7+
8+
<style>
9+
.highlight .hll {
10+
background-color: #ff171742;
11+
}
12+
.md-content {
13+
max-width: 1100px;
14+
margin: 0 auto;
15+
}
16+
</style>
17+
18+
- **Query id:** <QUERY_ID>
19+
- **Query name:** <QUERY_NAME>
20+
- **Platform:** <PLATFORM>
21+
- **Severity:** <SEVERITY>
22+
- **Category:** <CATEGORY>
23+
- **URL:** [Github](<GITHUB_URL>)
24+
25+
### Description
26+
<DESCRIPTION_TEXT><br>
27+
[Documentation](<DESCRIPTION_URL>)
28+
29+
### Code samples
30+
#### Code samples with security vulnerabilities
31+
<POSITIVE_TESTS>
32+
33+
#### Code samples without security vulnerabilities
34+
<NEGATIVE_TESTS>

.github/workflows/update-docs-queries.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,17 @@ jobs:
2525
- name: Update docs
2626
run: |
2727
pip3 install -r .github/scripts/docs-generator/requirements.txt
28-
python3 -u .github/scripts/docs-generator/docs-generator.py \
28+
python3 -u -B .github/scripts/docs-generator/docs-generator.py \
2929
-p ./assets/queries/ \
3030
-o ./docs/queries/ \
3131
-f md \
3232
-t .github/scripts/docs-generator/templates
33+
python3 -u -B .github/scripts/docs-generator/query-page-generator/query-page-generator.py \
34+
-p ./assets/queries/ \
35+
-o ./docs/queries/ \
36+
-f md \
37+
--t .github/scripts/docs-generator/query-page-generator/templates/query-page-template.md \
38+
--df
3339
- name: Create Pull Request
3440
uses: peter-evans/create-pull-request@v4
3541
with:

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ generate-queries-docs: ## generate queries catalog md files
154154
-o ./docs/queries/ \
155155
-f md \
156156
-t .github/generators/templates
157+
@python3 -u .github/scripts/docs-generator/query-page-generator/query-page-generator.py \
158+
-p ./assets/queries/ \
159+
-o ./docs/queries/ \
160+
-f md \
161+
--t .github/scripts/docs-generator/query-page-generator/templates/query-page-template.md \
162+
--df
157163
@echo "\033[36mQueries catalog updated\033[0m"
158164

159165
.PHONY: integration

docs/js/custom.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ function exportToCSV(filename) {
170170
} else if (headerArray[j] == "query") {
171171
var lastIndex = text.lastIndexOf(" ")
172172
text = `"${text.substring(lastIndex + 1)},${text.substring(0, lastIndex)}"`
173+
} else if (headerArray[j] == "description") {
174+
text = text.replace(/\(read more\)/i, '')
173175
}
174176
row.push(text)
175177
}

mkdocs.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ google_analytics:
111111
- auto
112112

113113
markdown_extensions:
114+
- pymdownx.highlight:
115+
anchor_linenums: true
116+
- pymdownx.superfences
114117
- toc:
115118
permalink: true
116119
toc_depth: 2

0 commit comments

Comments
 (0)