Source code for gitlab_overviewer.services.sort_utils

  1"""
  2Sorting utilities for table data.
  3
  4Implements :any:`/specs/spec_table_sorting` §1-8, covering:
  5
  6* Sort-key definition and expressions (§1)
  7* General ordering rules (§2)
  8* Virtual arithmetic columns (§3)
  9* Date handling (§4)
 10* Missing values & dashes (§5)
 11* Group-aware post-processing (§6)
 12* Fallback sorting (§7)
 13* Error handling (§8)
 14"""
 15
 16from typing import List, Tuple, Optional, Dict, Any, Set
 17from datetime import datetime
 18from ..rendering.renderer_base import Renderer
 19from ..models.overview_data import OverviewData
 20
 21
 22class SortConfigurationError(Exception):
 23    """Exception raised for sort configuration errors."""
 24
 25    pass
 26
 27
 28def validate_sort_configuration(
 29    sort_keys: List[Tuple[str, str]], overview_table: Dict[str, Dict[str, Any]]
 30) -> None:
 31    """Validate sort configuration against the data.
 32
 33    Args:
 34        sort_keys: List of (column_expr, direction) tuples
 35        overview_table: The table data to validate against
 36
 37    Raises:
 38        SortConfigurationError: If any validation fails
 39    """
 40    if not overview_table:
 41        return  # Empty table is valid
 42
 43    # Gather the union of all column keys present in any row. A column is
 44    # considered available if *any* row contains it – rows may legitimately
 45    # omit values for some columns (rendered as dashes). Relying on just the
 46    # first row risks false-positive errors when that row lacks an otherwise
 47    # valid column.
 48    available_columns: Set[str] = set()
 49    for _row in overview_table.values():
 50        if _row:
 51            available_columns.update(_row.keys())
 52    if not available_columns:
 53        return  # Nothing to validate for an empty table
 54
 55    for key_expr, direction in sort_keys:
 56        # Validate direction
 57        if direction not in ("asc", "desc"):
 58            raise SortConfigurationError(
 59                f"Invalid sort direction '{direction}' for key '{key_expr}'"
 60            )
 61
 62        # Check if it's a virtual column
 63        if any(op in key_expr for op in ["+", "-", "*"]):
 64            # Validate no nested arithmetic
 65            if sum(key_expr.count(op) for op in ["+", "-", "*"]) > 1:
 66                raise SortConfigurationError(
 67                    f"Nested arithmetic not supported in virtual column '{key_expr}'"
 68                )
 69
 70            # Extract column references
 71            op = next(op for op in ["+", "-", "*"] if op in key_expr)
 72            columns = [col.strip() for col in key_expr.split(op)]
 73
 74            # Check for unknown columns
 75            for col in columns:
 76                if col not in available_columns:
 77                    raise SortConfigurationError(
 78                        f"Unknown column '{col}' referenced in virtual column '{key_expr}'"
 79                    )
 80
 81            # Check if virtual column is sortable (has at least one row with numeric values)
 82            if not is_virtual_column_sortable(overview_table, columns):
 83                raise SortConfigurationError(
 84                    f"Virtual column '{key_expr}' is not sortable - no rows contain numeric values"
 85                )
 86        else:
 87            # Simple column - check if it exists
 88            if key_expr not in available_columns:
 89                raise SortConfigurationError(f"Unknown column '{key_expr}'")
 90
 91
[docs] 92def parse_virtual_column(row: Dict[str, Any], key: str) -> Optional[float]: 93 """Parse a virtual arithmetic column expression. 94 95 Supports operators +, -, * with proper precedence (* before +/-). 96 Handles missing values and validates expression format. 97 """ 98 key = "".join(key.split()) 99 if sum(key.count(op) for op in ["+", "-", "*"]) > 1: 100 return None 101 for op in ["+", "-"]: 102 if op in key: 103 left, right = key.split(op) 104 if "*" in left: 105 left_val = parse_virtual_column(row, left) 106 else: 107 left_raw = row.get(left, None) 108 if left_raw in ("-", "—", "–", ""): 109 left_val = 0.0 110 elif isinstance(left_raw, (int, float)) or ( 111 isinstance(left_raw, str) and Renderer.is_numeric_string(left_raw) 112 ): 113 left_val = Renderer.safe_float(left_raw, 0.0) 114 else: 115 left_val = None 116 if "*" in right: 117 right_val = parse_virtual_column(row, right) 118 else: 119 right_raw = row.get(right, None) 120 if right_raw in ("-", "—", "–", ""): 121 right_val = 0.0 122 elif isinstance(right_raw, (int, float)) or ( 123 isinstance(right_raw, str) and Renderer.is_numeric_string(right_raw) 124 ): 125 right_val = Renderer.safe_float(right_raw, 0.0) 126 else: 127 right_val = None 128 if left_val is None or right_val is None: 129 return None 130 result = left_val + right_val if op == "+" else left_val - right_val 131 return result 132 if "*" in key: 133 left, right = key.split("*") 134 left_raw = row.get(left, None) 135 right_raw = row.get(right, None) 136 if left_raw in ("-", "—", "–", ""): 137 left_val = 0.0 138 elif isinstance(left_raw, (int, float)) or ( 139 isinstance(left_raw, str) and Renderer.is_numeric_string(left_raw) 140 ): 141 left_val = Renderer.safe_float(left_raw, 0.0) 142 else: 143 return None 144 if right_raw in ("-", "—", "–", ""): 145 right_val = 0.0 146 elif isinstance(right_raw, (int, float)) or ( 147 isinstance(right_raw, str) and Renderer.is_numeric_string(right_raw) 148 ): 149 right_val = Renderer.safe_float(right_raw, 0.0) 150 else: 151 return None 152 result = left_val * right_val 153 return result 154 return None
155 156 157def get_numeric_value(row: Dict[str, Any], key: str) -> Optional[float]: 158 """Extract a numeric value from a row, handling missing values properly.""" 159 v = row.get(key.strip(), None) 160 161 # Handle missing values (dashes) 162 if v is None or v in ("-", "—", "–", ""): 163 return 0.0 164 165 # Convert to float if possible 166 try: 167 val = Renderer.safe_float(v, 0.0) 168 # Only return the value if it was actually numeric 169 if isinstance(v, (int, float)) or ( 170 isinstance(v, str) and Renderer.is_numeric_string(v) 171 ): 172 return val 173 except Exception: 174 pass 175 176 # Non-numeric values are treated as None to indicate invalid 177 return None 178 179
[docs] 180def parse_value(v, direction="asc"): 181 if v is None or v == "" or v in ("-", "—"): 182 return (1, "-") 183 184 # Try to parse as date first 185 if isinstance(v, str): 186 parsed_date = Renderer.parse_iso_date(v) 187 if parsed_date is not None: 188 # Convert datetime to timestamp for sorting 189 timestamp = parsed_date.timestamp() 190 return (0, timestamp if direction == "asc" else -timestamp) 191 192 try: 193 val = Renderer.safe_float(v, 0.0) 194 return (0, val if direction == "asc" else -val) 195 except Exception: 196 pass 197 198 s = str(v) 199 if direction == "asc": 200 return (0, s) 201 inverted = "".join(chr(0x10FFFF - ord(ch)) for ch in s) 202 return (0, inverted)
203 204
[docs] 205def is_virtual_column_sortable( 206 overview_table: Dict[str, Dict[str, Any]], keys: List[str] 207) -> bool: 208 """Check if a virtual column is sortable by validating that at least one row has numeric operands. 209 210 This function uses more restrictive validation than the spec minimum requirement. 211 It requires ALL operands to be numeric for a row to be considered valid, 212 whereas the spec only requires that not all operands evaluate to 0. 213 214 This provides better error detection by catching configuration issues earlier. 215 216 Args: 217 overview_table: The table data to validate against 218 keys: List of column keys or a single virtual column expression 219 220 Returns: 221 True if the virtual column is sortable, False otherwise 222 """ 223 if len(keys) == 1 and any(op in keys[0] for op in ["+", "-", "*"]): 224 op = next(op for op in ["+", "-", "*"] if op in keys[0]) 225 subkeys = [kk.strip() for kk in keys[0].split(op)] 226 else: 227 subkeys = keys 228 for row in overview_table.values(): 229 try: 230 if all( 231 row.get(kk, None) is not None 232 and Renderer.is_numeric_string(row.get(kk)) 233 for kk in subkeys 234 ): 235 return True 236 except Exception: 237 continue 238 return False
239 240
[docs] 241def parse_sort_keys( 242 sort_arg: Optional[str], 243 table_config: Optional[dict], 244 overview_table: Optional[Dict[str, Dict[str, Any]]] = None, 245) -> List[Tuple[str, str]]: 246 """Parse sort keys from command line argument or table configuration. 247 248 Sort keys can be specified in two formats: 249 1. <column-expr>:<direction> - e.g. "priority:desc" 250 2. <column-expr> - e.g. "priority" (defaults to "asc") 251 252 Multiple sort keys can be comma-separated and are evaluated left-to-right 253 as tiebreakers. 254 255 Args: 256 sort_arg: Optional comma-separated list of sort keys from command line 257 table_config: Optional table configuration containing default sort keys 258 overview_table: Optional table data to validate sort keys against 259 260 Returns: 261 List of tuples (column_expr, direction) where direction is "asc" or "desc" 262 263 Raises: 264 SortConfigurationError: If sort configuration is invalid 265 """ 266 sort_list = [] 267 268 # First try command line argument 269 if sort_arg: 270 parts = [s.strip() for s in sort_arg.split(",") if s.strip()] 271 for p in parts: 272 if ":" in p: 273 k, d = p.split(":", 1) 274 d = d.strip().lower() 275 # Validate direction 276 if d not in ("asc", "desc"): 277 d = "asc" # Default to ascending if invalid 278 sort_list.append((k.strip(), d)) 279 else: 280 sort_list.append((p.strip(), "asc")) # Default to ascending 281 282 # If no command line arg, try table config 283 elif table_config and "default_sort" in table_config: 284 for p in table_config["default_sort"]: 285 if ":" in p: 286 k, d = p.split(":", 1) 287 d = d.strip().lower() 288 # Validate direction 289 if d not in ("asc", "desc"): 290 d = "asc" # Default to ascending if invalid 291 sort_list.append((k.strip(), d)) 292 else: 293 sort_list.append((p.strip(), "asc")) # Default to ascending 294 295 # If no sort keys specified, default to repo name ascending 296 else: 297 sort_list = [("repo", "asc")] 298 299 # Validate configuration if table data is provided 300 if overview_table is not None: 301 validate_sort_configuration(sort_list, overview_table) 302 303 return sort_list
304 305
[docs] 306def sort_row_tuple(row: Dict[str, Any], sort_keys: List[Tuple[str, str]]): 307 """Create a tuple for sorting that properly handles invalid rows. 308 309 Invalid rows are always placed after valid ones, regardless of sort direction. 310 """ 311 result = [] 312 for k, d in sort_keys: 313 if any(op in k for op in ["+", "-", "*"]): 314 if op_is_all_dashes(row, k): 315 result.append((1, "-")) 316 else: 317 val = parse_virtual_column(row, k) 318 if val is not None: 319 result.append((0, val if d == "asc" else -val)) 320 else: 321 result.append((1, "-")) 322 else: 323 v = row.get(k, None) 324 if v is None or v in ("-", "—", "–", ""): 325 result.append((1, "-")) 326 else: 327 if isinstance(v, str): 328 parsed_date = Renderer.parse_iso_date(v) 329 if parsed_date is not None: 330 timestamp = parsed_date.timestamp() 331 result.append((0, timestamp if d == "asc" else -timestamp)) 332 continue 333 try: 334 val = Renderer.safe_float(v, 0.0) 335 if isinstance(v, (int, float)) or ( 336 isinstance(v, str) and Renderer.is_numeric_string(v) 337 ): 338 result.append((0, val if d == "asc" else -val)) 339 continue 340 except Exception: 341 pass 342 s = str(v) 343 if d == "asc": 344 result.append((0, s)) 345 else: 346 inverted = "".join(chr(0x10FFFF - ord(ch)) for ch in s) 347 result.append((0, inverted)) 348 return tuple(result)
349 350 351def op_is_all_dashes(row: Dict[str, Any], key: str) -> bool: 352 """Check if all operands in a virtual column expression are dashes.""" 353 # Remove all whitespace 354 key = "".join(key.split()) 355 356 # Split on operator 357 for op in ["+", "-", "*"]: 358 if op in key: 359 keys = [k.strip() for k in key.split(op)] 360 for k in keys: 361 v = row.get(k, None) 362 if v is not None and v not in ("-", "—", "–", ""): 363 return False 364 return True 365 return False 366 367
[docs] 368def sort_with_fallback( 369 items: List[Tuple[str, Dict[str, Any]]], sort_keys: List[Tuple[str, str]] 370): 371 """Sort items by multiple keys with fallback. 372 373 Args: 374 items: List of (id, row) tuples to sort 375 sort_keys: List of (column_expr, direction) tuples 376 377 Returns: 378 Sorted list of items 379 380 Raises: 381 SortConfigurationError: If sort configuration is invalid 382 """ 383 if not items: 384 return items 385 386 # Validate sort configuration 387 overview_table = {item_id: row for item_id, row in items} 388 validate_sort_configuration(sort_keys, overview_table) 389 390 # Add original indices to maintain stable sorting 391 indexed_items = list(enumerate(items)) 392 393 def sort_key(indexed_item): 394 idx, (item_id, row) = indexed_item 395 result = [] 396 for k, d in sort_keys: 397 # Check if it's a virtual column 398 if any(op in k for op in ["+", "-", "*"]): 399 # For virtual columns, we need to check if all values are dashes 400 if op_is_all_dashes(row, k): 401 # All dashes - sort last 402 result.append((1, "-")) 403 else: 404 val = parse_virtual_column(row, k) 405 if val is None: 406 # Invalid - sort after valid values 407 result.append((1, "-")) 408 else: 409 # Valid - sort normally 410 result.append((0, val if d == "asc" else -val)) 411 else: 412 # Simple column 413 val = row.get(k, None) 414 result.append(parse_value(val, d)) 415 # Add original index for stable sorting 416 result.append(idx) 417 return tuple(result) 418 419 # Sort and return just the items without indices 420 return [item for _, item in sorted(indexed_items, key=sort_key)]
421 422
[docs] 423def sort_overview( 424 overview_data: List[OverviewData], 425 table_config: dict, 426 sort_arg: Optional[str] = None, 427) -> List[OverviewData]: 428 """Sort OverviewData objects according to table_config and sort_arg. 429 430 Args: 431 overview_data: List of OverviewData objects to sort 432 table_config: Table configuration dict with columns and default_sort 433 sort_arg: Optional sort argument from CLI (overrides default_sort) 434 435 Returns: 436 Sorted list of OverviewData objects 437 """ 438 # Create overview_table for sorting (same as renderer does) 439 overview_table = {} 440 for row in overview_data: 441 # Use the same rowmod logic as the renderer 442 row_data = row.model_dump().copy() 443 # Replace repo field with project name for sorting 444 row_data["repo"] = row.project.name 445 # Ensure supervisors column matches renderer logic 446 if row.extra.supervisors: 447 row_data["supervisors"] = "; ".join(row.extra.supervisors) 448 elif row.extra.authors: 449 row_data["supervisors"] = "Authors: " + "; ".join(row.extra.authors) 450 else: 451 row_data.setdefault("supervisors", "-") 452 # Guarantee that every configured column key exists 453 for col in table_config["columns"]: 454 key = col["key"] 455 if key not in row_data: 456 row_data[key] = "-" 457 overview_table[row.project.name] = row_data 458 459 sort_keys = parse_sort_keys(sort_arg, table_config, overview_table) 460 461 # Sort the OverviewData objects based on the sorted table 462 sorted_items = sort_with_fallback(list(overview_table.items()), sort_keys) 463 sorted_project_names = [name for name, _ in sorted_items] 464 465 # Sort OverviewData in the same order 466 return sorted( 467 overview_data, key=lambda row: sorted_project_names.index(row.project.name) 468 )
469 470 471__all__ = [ 472 "parse_virtual_column", 473 "parse_value", 474 "is_virtual_column_sortable", 475 "parse_sort_keys", 476 "sort_row_tuple", 477 "sort_with_fallback", 478 "sort_overview", 479]