Coverage for django_query_capture/classify.py: 100%
111 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-20 10:20 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-11-20 10:20 +0000
1"""
2If [native_query_capture][capture.native_query_capture] has received data, it serves to refine the data into the necessary data.
4???+ warning "HashableCapturedQuery"
5 In order to use `collection.Counter`, it was necessary to change the dict form to a hashable dict form.<br>
6 So, to classify `Duplicate` and `Similar`, we convert [CapturedQuery][capture.CapturedQuery] dict into HashableDict form and use it as Counter's key.<br>
7 If there is a better way, feel free to leave it as an issue or PR.
8"""
10import typing
12import re
13from collections import Counter
14from functools import cached_property
16from django_query_capture.capture import CapturedQuery
17from django_query_capture.settings import get_config
20class DuplicateHashableCapturedQuery(typing.Dict[str, typing.Any]):
21 def __hash__(self):
22 return hash(self["sql"])
24 def __eq__(self, other):
25 return self["sql"] == self["sql"]
28class SimilarHashableCapturedQuery(typing.Dict[str, typing.Any]):
29 def __hash__(self):
30 return hash(self["raw_sql"])
32 def __eq__(self, other):
33 return self["raw_sql"] == self["raw_sql"]
36class ClassifiedQuery(typing.TypedDict):
37 """
38 This is the result of Classifier refining list of [CapturedQuery][capture.CapturedQuery].
39 You can freely make output this data from the `Presenter`.
40 """
42 read: int
43 writes: int
44 total: int
45 total_duration: float
46 slow_captured_queries: typing.List[CapturedQuery]
47 duplicates_counter: typing.Counter[CapturedQuery]
48 duplicates_counter_over_threshold: typing.Counter[CapturedQuery]
49 similar_counter: typing.Counter[CapturedQuery]
50 similar_counter_over_threshold: typing.Counter[CapturedQuery]
51 most_common_duplicate: typing.Union[
52 typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]
53 ]
54 most_common_similar: typing.Union[
55 typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]
56 ]
57 has_over_threshold: bool
58 captured_queries: typing.List[CapturedQuery]
61class CapturedQueryClassifier:
62 """
63 This is the result of Classifier refining list of [CapturedQuery][capture.CapturedQuery].
64 You can freely make output this data from the `Presenter`.
65 """
67 def __init__(
68 self,
69 captured_queries: typing.List[CapturedQuery],
70 ignore_patterns: typing.Optional[typing.List[str]] = None,
71 ):
72 """
73 Args:
74 captured_queries: A list of [CapturedQuery][capture.CapturedQuery] collected by [native_query_capture][capture.native_query_capture].
75 ignore_patterns: REGEX string list that will not be used for classification among [CapturedQuery][capture.CapturedQuery].
76 """
77 self.ignore_patterns = ignore_patterns or get_config()["IGNORE_SQL_PATTERNS"]
78 self.captured_queries = captured_queries
79 self.filtered_captured_queries = [
80 captured_query
81 for captured_query in captured_queries
82 if self.is_allow_pattern(captured_query["sql"])
83 ]
85 def __call__(self) -> ClassifiedQuery:
86 return {
87 "read": self.read_count,
88 "writes": self.writes_count,
89 "total": self.total_count,
90 "total_duration": self.total_duration,
91 "slow_captured_queries": self.slow_captured_queries,
92 "duplicates_counter": self.duplicates_counter,
93 "duplicates_counter_over_threshold": self.duplicates_counter_over_threshold,
94 "similar_counter": self.similar_counter,
95 "similar_counter_over_threshold": self.similar_counter_over_threshold,
96 "most_common_duplicate": self.most_common_duplicate,
97 "most_common_similar": self.most_common_similar,
98 "has_over_threshold": self.has_over_threshold,
99 "captured_queries": self.captured_queries,
100 }
102 def is_allow_pattern(self, query: str) -> bool:
103 """
104 Args:
105 query: It's simply a sql string.
107 Returns:
108 It is a list of [CapturedQuery][capture.CapturedQuery] that is not caught in ignore_patterns, that is, a classification target.
109 """
110 return not list(
111 filter(
112 lambda pattern: re.compile(pattern).search(query),
113 self.ignore_patterns,
114 )
115 )
117 @property
118 def read_count(self) -> int:
119 """
120 Returns:
121 number of `SELECT` statement
122 """
123 return sum(
124 1
125 for capture_query in self.filtered_captured_queries
126 if capture_query["raw_sql"].startswith("SELECT")
127 )
129 @property
130 def writes_count(self) -> int:
131 """
132 Returns:
133 number of not `SELECT` statement ( `INSERT`, `UPDATE`, `DELETE` )
134 """
135 return sum(
136 1
137 for capture_query in self.filtered_captured_queries
138 if not capture_query["raw_sql"].startswith("SELECT")
139 )
141 @property
142 def total_count(self) -> int:
143 """
144 Returns:
145 The number of all queries.
146 """
147 return len(self.filtered_captured_queries)
149 @property
150 def total_duration(self) -> float:
151 """
152 Returns:
153 The total time the query was executed.
154 """
155 return sum(
156 capture_query["duration"]
157 for capture_query in self.filtered_captured_queries
158 )
160 @cached_property
161 def slow_captured_queries(self) -> typing.List[CapturedQuery]:
162 """
163 Returns:
164 [CapturedQuery][capture.CapturedQuery] list with time exceeding [SLOW_MIN_SECOND](home/settings)
165 """
166 results = []
167 slow_min_second = get_config()["PRINT_THRESHOLDS"]["SLOW_MIN_SECOND"]
168 if slow_min_second is not None:
169 for captured_query in self.filtered_captured_queries:
170 if captured_query["duration"] > slow_min_second:
171 results.append(captured_query)
173 return results
175 @cached_property
176 def duplicates_counter(self) -> typing.Counter[CapturedQuery]:
177 """
178 Returns:
179 `Counter` that counts the number of `Duplicate` in all queries except ignore_patterns.
180 """
181 counter: typing.Counter[CapturedQuery] = Counter()
182 for captured_query in self.filtered_captured_queries:
183 counter[DuplicateHashableCapturedQuery(captured_query)] += 1 # type: ignore
185 return counter
187 @cached_property
188 def duplicates_counter_over_threshold(self) -> typing.Counter[CapturedQuery]:
189 """
190 Returns:
191 CaptureQuery Counter that exceeds [DUPLICATE_MIN_COUNT](../home/settings.md) among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter].
192 """
193 counter: typing.Counter[CapturedQuery] = Counter()
194 duplicate_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][
195 "DUPLICATE_MIN_COUNT"
196 ]
197 if duplicate_min_count is not None:
198 for captured_query, count in self.duplicates_counter.items():
199 if count > duplicate_min_count:
200 counter[captured_query] = count
202 return counter
204 @cached_property
205 def similar_counter(self) -> typing.Counter[CapturedQuery]:
206 """
207 Returns:
208 `Counter` that counts the number of `Similar` in all queries except ignore_patterns.
209 """
210 counter: typing.Counter[CapturedQuery] = Counter()
211 for captured_query in self.filtered_captured_queries:
213 counter[SimilarHashableCapturedQuery(captured_query)] += 1 # type: ignore
215 return counter
217 @cached_property
218 def similar_counter_over_threshold(self) -> typing.Counter[CapturedQuery]:
219 """
220 Returns:
221 [CaptureQuery][capture.CapturedQuery] `Counter` that exceeds [SIMILAR_MIN_COUNT](../home/settings.md) among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter], it doesn't overlap with Duplicates.
222 """
223 counter: typing.Counter[CapturedQuery] = Counter()
224 similar_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][
225 "SIMILAR_MIN_COUNT"
226 ]
227 duplicate_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][
228 "DUPLICATE_MIN_COUNT"
229 ]
230 if similar_min_count is not None:
231 for captured_query, count in self.similar_counter.items():
232 if duplicate_min_count is not None:
233 if (
234 self.duplicates_counter[
235 DuplicateHashableCapturedQuery(captured_query) # type: ignore
236 ]
237 > duplicate_min_count
238 ):
239 continue
240 if count > similar_min_count:
241 counter[captured_query] = count
243 return counter
245 @property
246 def most_common_duplicate(
247 self,
248 ) -> typing.Union[typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]]:
249 """
250 Returns:
251 most frequent `Counter` among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter].
252 """
253 try:
254 return self.duplicates_counter.most_common(1)[0]
255 except IndexError:
256 return None, None
258 @property
259 def most_common_similar(
260 self,
261 ) -> typing.Union[typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]]:
262 """
263 Returns:
264 most frequent `Counter` among [duplicates_counter][classify.CapturedQueryClassifier.similar_counter].
265 """
266 try:
267 return self.similar_counter.most_common(1)[0]
268 except IndexError:
269 return None, None
271 @property
272 def has_over_threshold(self) -> bool:
273 """
274 Returns:
275 [SLOW_MIN_SECOND, DUPLICATE_MIN_COUNT, SIMILAR_MIN_COUNT](../home/settings.md)<br>
276 If any of the three has exceeded the threshold, return `True`.
277 """
278 if (
279 self.similar_counter_over_threshold
280 or self.duplicates_counter_over_threshold
281 or self.slow_captured_queries
282 ):
283 return True
284 return False