Coverage for django_query_capture/classify.py: 100%

111 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-11-20 10:20 +0000

1""" 

2If [native_query_capture][capture.native_query_capture] has received data, it serves to refine the data into the necessary data. 

3 

4???+ warning "HashableCapturedQuery" 

5 In order to use `collection.Counter`, it was necessary to change the dict form to a hashable dict form.<br> 

6 So, to classify `Duplicate` and `Similar`, we convert [CapturedQuery][capture.CapturedQuery] dict into HashableDict form and use it as Counter's key.<br> 

7 If there is a better way, feel free to leave it as an issue or PR. 

8""" 

9 

10import typing 

11 

12import re 

13from collections import Counter 

14from functools import cached_property 

15 

16from django_query_capture.capture import CapturedQuery 

17from django_query_capture.settings import get_config 

18 

19 

20class DuplicateHashableCapturedQuery(typing.Dict[str, typing.Any]): 

21 def __hash__(self): 

22 return hash(self["sql"]) 

23 

24 def __eq__(self, other): 

25 return self["sql"] == self["sql"] 

26 

27 

28class SimilarHashableCapturedQuery(typing.Dict[str, typing.Any]): 

29 def __hash__(self): 

30 return hash(self["raw_sql"]) 

31 

32 def __eq__(self, other): 

33 return self["raw_sql"] == self["raw_sql"] 

34 

35 

36class ClassifiedQuery(typing.TypedDict): 

37 """ 

38 This is the result of Classifier refining list of [CapturedQuery][capture.CapturedQuery]. 

39 You can freely make output this data from the `Presenter`. 

40 """ 

41 

42 read: int 

43 writes: int 

44 total: int 

45 total_duration: float 

46 slow_captured_queries: typing.List[CapturedQuery] 

47 duplicates_counter: typing.Counter[CapturedQuery] 

48 duplicates_counter_over_threshold: typing.Counter[CapturedQuery] 

49 similar_counter: typing.Counter[CapturedQuery] 

50 similar_counter_over_threshold: typing.Counter[CapturedQuery] 

51 most_common_duplicate: typing.Union[ 

52 typing.Tuple[CapturedQuery, int], typing.Tuple[None, None] 

53 ] 

54 most_common_similar: typing.Union[ 

55 typing.Tuple[CapturedQuery, int], typing.Tuple[None, None] 

56 ] 

57 has_over_threshold: bool 

58 captured_queries: typing.List[CapturedQuery] 

59 

60 

61class CapturedQueryClassifier: 

62 """ 

63 This is the result of Classifier refining list of [CapturedQuery][capture.CapturedQuery]. 

64 You can freely make output this data from the `Presenter`. 

65 """ 

66 

67 def __init__( 

68 self, 

69 captured_queries: typing.List[CapturedQuery], 

70 ignore_patterns: typing.Optional[typing.List[str]] = None, 

71 ): 

72 """ 

73 Args: 

74 captured_queries: A list of [CapturedQuery][capture.CapturedQuery] collected by [native_query_capture][capture.native_query_capture]. 

75 ignore_patterns: REGEX string list that will not be used for classification among [CapturedQuery][capture.CapturedQuery]. 

76 """ 

77 self.ignore_patterns = ignore_patterns or get_config()["IGNORE_SQL_PATTERNS"] 

78 self.captured_queries = captured_queries 

79 self.filtered_captured_queries = [ 

80 captured_query 

81 for captured_query in captured_queries 

82 if self.is_allow_pattern(captured_query["sql"]) 

83 ] 

84 

85 def __call__(self) -> ClassifiedQuery: 

86 return { 

87 "read": self.read_count, 

88 "writes": self.writes_count, 

89 "total": self.total_count, 

90 "total_duration": self.total_duration, 

91 "slow_captured_queries": self.slow_captured_queries, 

92 "duplicates_counter": self.duplicates_counter, 

93 "duplicates_counter_over_threshold": self.duplicates_counter_over_threshold, 

94 "similar_counter": self.similar_counter, 

95 "similar_counter_over_threshold": self.similar_counter_over_threshold, 

96 "most_common_duplicate": self.most_common_duplicate, 

97 "most_common_similar": self.most_common_similar, 

98 "has_over_threshold": self.has_over_threshold, 

99 "captured_queries": self.captured_queries, 

100 } 

101 

102 def is_allow_pattern(self, query: str) -> bool: 

103 """ 

104 Args: 

105 query: It's simply a sql string. 

106 

107 Returns: 

108 It is a list of [CapturedQuery][capture.CapturedQuery] that is not caught in ignore_patterns, that is, a classification target. 

109 """ 

110 return not list( 

111 filter( 

112 lambda pattern: re.compile(pattern).search(query), 

113 self.ignore_patterns, 

114 ) 

115 ) 

116 

117 @property 

118 def read_count(self) -> int: 

119 """ 

120 Returns: 

121 number of `SELECT` statement 

122 """ 

123 return sum( 

124 1 

125 for capture_query in self.filtered_captured_queries 

126 if capture_query["raw_sql"].startswith("SELECT") 

127 ) 

128 

129 @property 

130 def writes_count(self) -> int: 

131 """ 

132 Returns: 

133 number of not `SELECT` statement ( `INSERT`, `UPDATE`, `DELETE` ) 

134 """ 

135 return sum( 

136 1 

137 for capture_query in self.filtered_captured_queries 

138 if not capture_query["raw_sql"].startswith("SELECT") 

139 ) 

140 

141 @property 

142 def total_count(self) -> int: 

143 """ 

144 Returns: 

145 The number of all queries. 

146 """ 

147 return len(self.filtered_captured_queries) 

148 

149 @property 

150 def total_duration(self) -> float: 

151 """ 

152 Returns: 

153 The total time the query was executed. 

154 """ 

155 return sum( 

156 capture_query["duration"] 

157 for capture_query in self.filtered_captured_queries 

158 ) 

159 

160 @cached_property 

161 def slow_captured_queries(self) -> typing.List[CapturedQuery]: 

162 """ 

163 Returns: 

164 [CapturedQuery][capture.CapturedQuery] list with time exceeding [SLOW_MIN_SECOND](home/settings) 

165 """ 

166 results = [] 

167 slow_min_second = get_config()["PRINT_THRESHOLDS"]["SLOW_MIN_SECOND"] 

168 if slow_min_second is not None: 

169 for captured_query in self.filtered_captured_queries: 

170 if captured_query["duration"] > slow_min_second: 

171 results.append(captured_query) 

172 

173 return results 

174 

175 @cached_property 

176 def duplicates_counter(self) -> typing.Counter[CapturedQuery]: 

177 """ 

178 Returns: 

179 `Counter` that counts the number of `Duplicate` in all queries except ignore_patterns. 

180 """ 

181 counter: typing.Counter[CapturedQuery] = Counter() 

182 for captured_query in self.filtered_captured_queries: 

183 counter[DuplicateHashableCapturedQuery(captured_query)] += 1 # type: ignore 

184 

185 return counter 

186 

187 @cached_property 

188 def duplicates_counter_over_threshold(self) -> typing.Counter[CapturedQuery]: 

189 """ 

190 Returns: 

191 CaptureQuery Counter that exceeds [DUPLICATE_MIN_COUNT](../home/settings.md) among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter]. 

192 """ 

193 counter: typing.Counter[CapturedQuery] = Counter() 

194 duplicate_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][ 

195 "DUPLICATE_MIN_COUNT" 

196 ] 

197 if duplicate_min_count is not None: 

198 for captured_query, count in self.duplicates_counter.items(): 

199 if count > duplicate_min_count: 

200 counter[captured_query] = count 

201 

202 return counter 

203 

204 @cached_property 

205 def similar_counter(self) -> typing.Counter[CapturedQuery]: 

206 """ 

207 Returns: 

208 `Counter` that counts the number of `Similar` in all queries except ignore_patterns. 

209 """ 

210 counter: typing.Counter[CapturedQuery] = Counter() 

211 for captured_query in self.filtered_captured_queries: 

212 

213 counter[SimilarHashableCapturedQuery(captured_query)] += 1 # type: ignore 

214 

215 return counter 

216 

217 @cached_property 

218 def similar_counter_over_threshold(self) -> typing.Counter[CapturedQuery]: 

219 """ 

220 Returns: 

221 [CaptureQuery][capture.CapturedQuery] `Counter` that exceeds [SIMILAR_MIN_COUNT](../home/settings.md) among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter], it doesn't overlap with Duplicates. 

222 """ 

223 counter: typing.Counter[CapturedQuery] = Counter() 

224 similar_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][ 

225 "SIMILAR_MIN_COUNT" 

226 ] 

227 duplicate_min_count: typing.Optional[int] = get_config()["PRINT_THRESHOLDS"][ 

228 "DUPLICATE_MIN_COUNT" 

229 ] 

230 if similar_min_count is not None: 

231 for captured_query, count in self.similar_counter.items(): 

232 if duplicate_min_count is not None: 

233 if ( 

234 self.duplicates_counter[ 

235 DuplicateHashableCapturedQuery(captured_query) # type: ignore 

236 ] 

237 > duplicate_min_count 

238 ): 

239 continue 

240 if count > similar_min_count: 

241 counter[captured_query] = count 

242 

243 return counter 

244 

245 @property 

246 def most_common_duplicate( 

247 self, 

248 ) -> typing.Union[typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]]: 

249 """ 

250 Returns: 

251 most frequent `Counter` among [duplicates_counter][classify.CapturedQueryClassifier.duplicates_counter]. 

252 """ 

253 try: 

254 return self.duplicates_counter.most_common(1)[0] 

255 except IndexError: 

256 return None, None 

257 

258 @property 

259 def most_common_similar( 

260 self, 

261 ) -> typing.Union[typing.Tuple[CapturedQuery, int], typing.Tuple[None, None]]: 

262 """ 

263 Returns: 

264 most frequent `Counter` among [duplicates_counter][classify.CapturedQueryClassifier.similar_counter]. 

265 """ 

266 try: 

267 return self.similar_counter.most_common(1)[0] 

268 except IndexError: 

269 return None, None 

270 

271 @property 

272 def has_over_threshold(self) -> bool: 

273 """ 

274 Returns: 

275 [SLOW_MIN_SECOND, DUPLICATE_MIN_COUNT, SIMILAR_MIN_COUNT](../home/settings.md)<br> 

276 If any of the three has exceeded the threshold, return `True`. 

277 """ 

278 if ( 

279 self.similar_counter_over_threshold 

280 or self.duplicates_counter_over_threshold 

281 or self.slow_captured_queries 

282 ): 

283 return True 

284 return False