1"""
2Sorting utilities for table data.
3
4Implements :any:`/specs/spec_table_sorting` §1-8, covering:
5
6* Sort-key definition and expressions (§1)
7* General ordering rules (§2)
8* Virtual arithmetic columns (§3)
9* Date handling (§4)
10* Missing values & dashes (§5)
11* Group-aware post-processing (§6)
12* Fallback sorting (§7)
13* Error handling (§8)
14"""
15
16from typing import List, Tuple, Optional, Dict, Any, Set
17from datetime import datetime
18from ..rendering.renderer_base import Renderer
19from ..models.overview_data import OverviewData
20
21
22class SortConfigurationError(Exception):
23 """Exception raised for sort configuration errors."""
24
25 pass
26
27
28def validate_sort_configuration(
29 sort_keys: List[Tuple[str, str]], overview_table: Dict[str, Dict[str, Any]]
30) -> None:
31 """Validate sort configuration against the data.
32
33 Args:
34 sort_keys: List of (column_expr, direction) tuples
35 overview_table: The table data to validate against
36
37 Raises:
38 SortConfigurationError: If any validation fails
39 """
40 if not overview_table:
41 return # Empty table is valid
42
43 # Gather the union of all column keys present in any row. A column is
44 # considered available if *any* row contains it – rows may legitimately
45 # omit values for some columns (rendered as dashes). Relying on just the
46 # first row risks false-positive errors when that row lacks an otherwise
47 # valid column.
48 available_columns: Set[str] = set()
49 for _row in overview_table.values():
50 if _row:
51 available_columns.update(_row.keys())
52 if not available_columns:
53 return # Nothing to validate for an empty table
54
55 for key_expr, direction in sort_keys:
56 # Validate direction
57 if direction not in ("asc", "desc"):
58 raise SortConfigurationError(
59 f"Invalid sort direction '{direction}' for key '{key_expr}'"
60 )
61
62 # Check if it's a virtual column
63 if any(op in key_expr for op in ["+", "-", "*"]):
64 # Validate no nested arithmetic
65 if sum(key_expr.count(op) for op in ["+", "-", "*"]) > 1:
66 raise SortConfigurationError(
67 f"Nested arithmetic not supported in virtual column '{key_expr}'"
68 )
69
70 # Extract column references
71 op = next(op for op in ["+", "-", "*"] if op in key_expr)
72 columns = [col.strip() for col in key_expr.split(op)]
73
74 # Check for unknown columns
75 for col in columns:
76 if col not in available_columns:
77 raise SortConfigurationError(
78 f"Unknown column '{col}' referenced in virtual column '{key_expr}'"
79 )
80
81 # Check if virtual column is sortable (has at least one row with numeric values)
82 if not is_virtual_column_sortable(overview_table, columns):
83 raise SortConfigurationError(
84 f"Virtual column '{key_expr}' is not sortable - no rows contain numeric values"
85 )
86 else:
87 # Simple column - check if it exists
88 if key_expr not in available_columns:
89 raise SortConfigurationError(f"Unknown column '{key_expr}'")
90
91
[docs]
92def parse_virtual_column(row: Dict[str, Any], key: str) -> Optional[float]:
93 """Parse a virtual arithmetic column expression.
94
95 Supports operators +, -, * with proper precedence (* before +/-).
96 Handles missing values and validates expression format.
97 """
98 key = "".join(key.split())
99 if sum(key.count(op) for op in ["+", "-", "*"]) > 1:
100 return None
101 for op in ["+", "-"]:
102 if op in key:
103 left, right = key.split(op)
104 if "*" in left:
105 left_val = parse_virtual_column(row, left)
106 else:
107 left_raw = row.get(left, None)
108 if left_raw in ("-", "—", "–", ""):
109 left_val = 0.0
110 elif isinstance(left_raw, (int, float)) or (
111 isinstance(left_raw, str) and Renderer.is_numeric_string(left_raw)
112 ):
113 left_val = Renderer.safe_float(left_raw, 0.0)
114 else:
115 left_val = None
116 if "*" in right:
117 right_val = parse_virtual_column(row, right)
118 else:
119 right_raw = row.get(right, None)
120 if right_raw in ("-", "—", "–", ""):
121 right_val = 0.0
122 elif isinstance(right_raw, (int, float)) or (
123 isinstance(right_raw, str) and Renderer.is_numeric_string(right_raw)
124 ):
125 right_val = Renderer.safe_float(right_raw, 0.0)
126 else:
127 right_val = None
128 if left_val is None or right_val is None:
129 return None
130 result = left_val + right_val if op == "+" else left_val - right_val
131 return result
132 if "*" in key:
133 left, right = key.split("*")
134 left_raw = row.get(left, None)
135 right_raw = row.get(right, None)
136 if left_raw in ("-", "—", "–", ""):
137 left_val = 0.0
138 elif isinstance(left_raw, (int, float)) or (
139 isinstance(left_raw, str) and Renderer.is_numeric_string(left_raw)
140 ):
141 left_val = Renderer.safe_float(left_raw, 0.0)
142 else:
143 return None
144 if right_raw in ("-", "—", "–", ""):
145 right_val = 0.0
146 elif isinstance(right_raw, (int, float)) or (
147 isinstance(right_raw, str) and Renderer.is_numeric_string(right_raw)
148 ):
149 right_val = Renderer.safe_float(right_raw, 0.0)
150 else:
151 return None
152 result = left_val * right_val
153 return result
154 return None
155
156
157def get_numeric_value(row: Dict[str, Any], key: str) -> Optional[float]:
158 """Extract a numeric value from a row, handling missing values properly."""
159 v = row.get(key.strip(), None)
160
161 # Handle missing values (dashes)
162 if v is None or v in ("-", "—", "–", ""):
163 return 0.0
164
165 # Convert to float if possible
166 try:
167 val = Renderer.safe_float(v, 0.0)
168 # Only return the value if it was actually numeric
169 if isinstance(v, (int, float)) or (
170 isinstance(v, str) and Renderer.is_numeric_string(v)
171 ):
172 return val
173 except Exception:
174 pass
175
176 # Non-numeric values are treated as None to indicate invalid
177 return None
178
179
[docs]
180def parse_value(v, direction="asc"):
181 if v is None or v == "" or v in ("-", "—"):
182 return (1, "-")
183
184 # Try to parse as date first
185 if isinstance(v, str):
186 parsed_date = Renderer.parse_iso_date(v)
187 if parsed_date is not None:
188 # Convert datetime to timestamp for sorting
189 timestamp = parsed_date.timestamp()
190 return (0, timestamp if direction == "asc" else -timestamp)
191
192 try:
193 val = Renderer.safe_float(v, 0.0)
194 return (0, val if direction == "asc" else -val)
195 except Exception:
196 pass
197
198 s = str(v)
199 if direction == "asc":
200 return (0, s)
201 inverted = "".join(chr(0x10FFFF - ord(ch)) for ch in s)
202 return (0, inverted)
203
204
[docs]
205def is_virtual_column_sortable(
206 overview_table: Dict[str, Dict[str, Any]], keys: List[str]
207) -> bool:
208 """Check if a virtual column is sortable by validating that at least one row has numeric operands.
209
210 This function uses more restrictive validation than the spec minimum requirement.
211 It requires ALL operands to be numeric for a row to be considered valid,
212 whereas the spec only requires that not all operands evaluate to 0.
213
214 This provides better error detection by catching configuration issues earlier.
215
216 Args:
217 overview_table: The table data to validate against
218 keys: List of column keys or a single virtual column expression
219
220 Returns:
221 True if the virtual column is sortable, False otherwise
222 """
223 if len(keys) == 1 and any(op in keys[0] for op in ["+", "-", "*"]):
224 op = next(op for op in ["+", "-", "*"] if op in keys[0])
225 subkeys = [kk.strip() for kk in keys[0].split(op)]
226 else:
227 subkeys = keys
228 for row in overview_table.values():
229 try:
230 if all(
231 row.get(kk, None) is not None
232 and Renderer.is_numeric_string(row.get(kk))
233 for kk in subkeys
234 ):
235 return True
236 except Exception:
237 continue
238 return False
239
240
[docs]
241def parse_sort_keys(
242 sort_arg: Optional[str],
243 table_config: Optional[dict],
244 overview_table: Optional[Dict[str, Dict[str, Any]]] = None,
245) -> List[Tuple[str, str]]:
246 """Parse sort keys from command line argument or table configuration.
247
248 Sort keys can be specified in two formats:
249 1. <column-expr>:<direction> - e.g. "priority:desc"
250 2. <column-expr> - e.g. "priority" (defaults to "asc")
251
252 Multiple sort keys can be comma-separated and are evaluated left-to-right
253 as tiebreakers.
254
255 Args:
256 sort_arg: Optional comma-separated list of sort keys from command line
257 table_config: Optional table configuration containing default sort keys
258 overview_table: Optional table data to validate sort keys against
259
260 Returns:
261 List of tuples (column_expr, direction) where direction is "asc" or "desc"
262
263 Raises:
264 SortConfigurationError: If sort configuration is invalid
265 """
266 sort_list = []
267
268 # First try command line argument
269 if sort_arg:
270 parts = [s.strip() for s in sort_arg.split(",") if s.strip()]
271 for p in parts:
272 if ":" in p:
273 k, d = p.split(":", 1)
274 d = d.strip().lower()
275 # Validate direction
276 if d not in ("asc", "desc"):
277 d = "asc" # Default to ascending if invalid
278 sort_list.append((k.strip(), d))
279 else:
280 sort_list.append((p.strip(), "asc")) # Default to ascending
281
282 # If no command line arg, try table config
283 elif table_config and "default_sort" in table_config:
284 for p in table_config["default_sort"]:
285 if ":" in p:
286 k, d = p.split(":", 1)
287 d = d.strip().lower()
288 # Validate direction
289 if d not in ("asc", "desc"):
290 d = "asc" # Default to ascending if invalid
291 sort_list.append((k.strip(), d))
292 else:
293 sort_list.append((p.strip(), "asc")) # Default to ascending
294
295 # If no sort keys specified, default to repo name ascending
296 else:
297 sort_list = [("repo", "asc")]
298
299 # Validate configuration if table data is provided
300 if overview_table is not None:
301 validate_sort_configuration(sort_list, overview_table)
302
303 return sort_list
304
305
[docs]
306def sort_row_tuple(row: Dict[str, Any], sort_keys: List[Tuple[str, str]]):
307 """Create a tuple for sorting that properly handles invalid rows.
308
309 Invalid rows are always placed after valid ones, regardless of sort direction.
310 """
311 result = []
312 for k, d in sort_keys:
313 if any(op in k for op in ["+", "-", "*"]):
314 if op_is_all_dashes(row, k):
315 result.append((1, "-"))
316 else:
317 val = parse_virtual_column(row, k)
318 if val is not None:
319 result.append((0, val if d == "asc" else -val))
320 else:
321 result.append((1, "-"))
322 else:
323 v = row.get(k, None)
324 if v is None or v in ("-", "—", "–", ""):
325 result.append((1, "-"))
326 else:
327 if isinstance(v, str):
328 parsed_date = Renderer.parse_iso_date(v)
329 if parsed_date is not None:
330 timestamp = parsed_date.timestamp()
331 result.append((0, timestamp if d == "asc" else -timestamp))
332 continue
333 try:
334 val = Renderer.safe_float(v, 0.0)
335 if isinstance(v, (int, float)) or (
336 isinstance(v, str) and Renderer.is_numeric_string(v)
337 ):
338 result.append((0, val if d == "asc" else -val))
339 continue
340 except Exception:
341 pass
342 s = str(v)
343 if d == "asc":
344 result.append((0, s))
345 else:
346 inverted = "".join(chr(0x10FFFF - ord(ch)) for ch in s)
347 result.append((0, inverted))
348 return tuple(result)
349
350
351def op_is_all_dashes(row: Dict[str, Any], key: str) -> bool:
352 """Check if all operands in a virtual column expression are dashes."""
353 # Remove all whitespace
354 key = "".join(key.split())
355
356 # Split on operator
357 for op in ["+", "-", "*"]:
358 if op in key:
359 keys = [k.strip() for k in key.split(op)]
360 for k in keys:
361 v = row.get(k, None)
362 if v is not None and v not in ("-", "—", "–", ""):
363 return False
364 return True
365 return False
366
367
[docs]
368def sort_with_fallback(
369 items: List[Tuple[str, Dict[str, Any]]], sort_keys: List[Tuple[str, str]]
370):
371 """Sort items by multiple keys with fallback.
372
373 Args:
374 items: List of (id, row) tuples to sort
375 sort_keys: List of (column_expr, direction) tuples
376
377 Returns:
378 Sorted list of items
379
380 Raises:
381 SortConfigurationError: If sort configuration is invalid
382 """
383 if not items:
384 return items
385
386 # Validate sort configuration
387 overview_table = {item_id: row for item_id, row in items}
388 validate_sort_configuration(sort_keys, overview_table)
389
390 # Add original indices to maintain stable sorting
391 indexed_items = list(enumerate(items))
392
393 def sort_key(indexed_item):
394 idx, (item_id, row) = indexed_item
395 result = []
396 for k, d in sort_keys:
397 # Check if it's a virtual column
398 if any(op in k for op in ["+", "-", "*"]):
399 # For virtual columns, we need to check if all values are dashes
400 if op_is_all_dashes(row, k):
401 # All dashes - sort last
402 result.append((1, "-"))
403 else:
404 val = parse_virtual_column(row, k)
405 if val is None:
406 # Invalid - sort after valid values
407 result.append((1, "-"))
408 else:
409 # Valid - sort normally
410 result.append((0, val if d == "asc" else -val))
411 else:
412 # Simple column
413 val = row.get(k, None)
414 result.append(parse_value(val, d))
415 # Add original index for stable sorting
416 result.append(idx)
417 return tuple(result)
418
419 # Sort and return just the items without indices
420 return [item for _, item in sorted(indexed_items, key=sort_key)]
421
422
[docs]
423def sort_overview(
424 overview_data: List[OverviewData],
425 table_config: dict,
426 sort_arg: Optional[str] = None,
427) -> List[OverviewData]:
428 """Sort OverviewData objects according to table_config and sort_arg.
429
430 Args:
431 overview_data: List of OverviewData objects to sort
432 table_config: Table configuration dict with columns and default_sort
433 sort_arg: Optional sort argument from CLI (overrides default_sort)
434
435 Returns:
436 Sorted list of OverviewData objects
437 """
438 # Create overview_table for sorting (same as renderer does)
439 overview_table = {}
440 for row in overview_data:
441 # Use the same rowmod logic as the renderer
442 row_data = row.model_dump().copy()
443 # Replace repo field with project name for sorting
444 row_data["repo"] = row.project.name
445 # Ensure supervisors column matches renderer logic
446 if row.extra.supervisors:
447 row_data["supervisors"] = "; ".join(row.extra.supervisors)
448 elif row.extra.authors:
449 row_data["supervisors"] = "Authors: " + "; ".join(row.extra.authors)
450 else:
451 row_data.setdefault("supervisors", "-")
452 # Guarantee that every configured column key exists
453 for col in table_config["columns"]:
454 key = col["key"]
455 if key not in row_data:
456 row_data[key] = "-"
457 overview_table[row.project.name] = row_data
458
459 sort_keys = parse_sort_keys(sort_arg, table_config, overview_table)
460
461 # Sort the OverviewData objects based on the sorted table
462 sorted_items = sort_with_fallback(list(overview_table.items()), sort_keys)
463 sorted_project_names = [name for name, _ in sorted_items]
464
465 # Sort OverviewData in the same order
466 return sorted(
467 overview_data, key=lambda row: sorted_project_names.index(row.project.name)
468 )
469
470
471__all__ = [
472 "parse_virtual_column",
473 "parse_value",
474 "is_virtual_column_sortable",
475 "parse_sort_keys",
476 "sort_row_tuple",
477 "sort_with_fallback",
478 "sort_overview",
479]