refactor(multi-pr-review): remove Python scripts and use Task tool directly (#2719)

## Summary - Remove Python orchestrator scripts (`orchestrate_review.py`, `aggregate_results.py`, `post_comment.py`) from the multi-pr-review skill - Rewrite SKILL.md to use Claude Code's Task tool for spawning sub-agents directly - Replace simple consensus voting (2+ agents agree) with reasoned validation for more accurate issue detection - Add merge verdict determination (YES / NOT SURE / NO) to guide reviewers ## Test plan - [ ] Run `/dyad:multi-pr-review` on a test PR to verify the new Task-tool-based approach works - [ ] Verify all three agents spawn correctly with different file orderings - [ ] Confirm issues are validated through reasoned analysis rather than vote counting - [ ] Check that merge verdict is correctly displayed in the summary comment 🤖 Generated with [Claude Code](https://claude.com/claude-code)  --- <a href="https://app.devin.ai/review/dyad-sh/dyad/pull/2719" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open with Devin"> </picture> </a>  Co-authored-by: Will Chen <willchen90@gmail.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

refactor(multi-pr-review): remove Python scripts and use Task tool directly (#2719)
7d8379b2 · wwwillchen-bot · GitHub · 80729b2e · 7d8379b2 · 80729b2e
--- a/.claude/skills/multi-pr-review/SKILL.md
+++ b/.claude/skills/multi-pr-review/SKILL.md
--- a/.claude/skills/multi-pr-review/scripts/aggregate_results.py
+++ b/.claude/skills/multi-pr-review/scripts/aggregate_results.py
-#!/usr/bin/env python3
-"""
-Standalone issue aggregation using consensus voting.
-
-Can be used to re-process raw agent outputs or for testing.
-"""
-
-import argparse
-import json
-import sys
-from pathlib import Path
-
-SEVERITY_RANK = {"HIGH": 3, "MEDIUM": 2, "LOW": 1}
-
-
-def issues_match(a: dict, b: dict, line_tolerance: int = 5) -> bool:
-    """Check if two issues refer to the same problem."""
-    if a['file'] != b['file']:
-        return False
-    
-    # Check line overlap with tolerance (applied symmetrically to both issues)
-    a_start = a.get('line_start', 0)
-    a_end = a.get('line_end', a_start)
-    b_start = b.get('line_start', 0)
-    b_end = b.get('line_end', b_start)
-
-    a_range = set(range(max(1, a_start - line_tolerance), a_end + line_tolerance + 1))
-    b_range = set(range(max(1, b_start - line_tolerance), b_end + line_tolerance + 1))
-    
-    if not a_range.intersection(b_range):
-        return False
-    
-    # Same category is a strong signal
-    if a.get('category') == b.get('category'):
-        return True
-    
-    # Check for similar titles
-    a_words = set(a.get('title', '').lower().split())
-    b_words = set(b.get('title', '').lower().split())
-    overlap = len(a_words.intersection(b_words))
-    
-    if overlap >= 2 or (overlap >= 1 and len(a_words) <= 3):
-        return True
-    
-    return False
-
-
-def aggregate(
-    agent_results: list[list[dict]],
-    consensus_threshold: int = 2,
-    min_severity: str = "MEDIUM"
-) -> list[dict]:
-    """
-    Aggregate issues from multiple agents using consensus voting.
-    
-    Args:
-        agent_results: List of issue lists, one per agent
-        consensus_threshold: Minimum number of agents that must agree
-        min_severity: Minimum severity level to include
-    
-    Returns:
-        List of consensus issues
-    """
-    # Flatten and tag with agent ID
-    flat_issues = []
-    for agent_id, issues in enumerate(agent_results):
-        for issue in issues:
-            issue_copy = dict(issue)
-            issue_copy['agent_id'] = agent_id
-            flat_issues.append(issue_copy)
-    
-    if not flat_issues:
-        return []
-    
-    # Group similar issues
-    groups = []
-    used = set()
-    
-    for i, issue in enumerate(flat_issues):
-        if i in used:
-            continue
-        
-        group = [issue]
-        used.add(i)
-        
-        for j, other in enumerate(flat_issues):
-            if j in used:
-                continue
-            if issues_match(issue, other):
-                group.append(other)
-                used.add(j)
-        
-        groups.append(group)
-    
-    # Filter by consensus and severity
-    min_rank = SEVERITY_RANK.get(min_severity.upper(), 2)
-    consensus_issues = []
-    
-    for group in groups:
-        # Count unique agents
-        agents = set(issue['agent_id'] for issue in group)
-        if len(agents) < consensus_threshold:
-            continue
-        
-        # Check severity threshold
-        max_severity = max(SEVERITY_RANK.get(i.get('severity', 'LOW').upper(), 0) for i in group)
-        if max_severity < min_rank:
-            continue
-        
-        # Use highest-severity version as representative
-        representative = max(group, key=lambda i: SEVERITY_RANK.get(i.get('severity', 'LOW').upper(), 0))
-        
-        result = dict(representative)
-        result['consensus_count'] = len(agents)
-        result['all_severities'] = [i.get('severity', 'LOW') for i in group]
-        del result['agent_id']
-        
-        consensus_issues.append(result)
-    
-    # Sort by severity then file
-    consensus_issues.sort(
-        key=lambda x: (-SEVERITY_RANK.get(x.get('severity', 'LOW').upper(), 0), 
-                       x.get('file', ''), 
-                       x.get('line_start', 0))
-    )
-    
-    return consensus_issues
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Aggregate agent review results')
-    parser.add_argument('input_files', nargs='+', help='JSON files with agent results')
-    parser.add_argument('--output', '-o', type=str, default='-', help='Output file (- for stdout)')
-    parser.add_argument('--threshold', type=int, default=2, help='Consensus threshold')
-    parser.add_argument('--min-severity', type=str, default='MEDIUM',
-                       choices=['HIGH', 'MEDIUM', 'LOW'], help='Minimum severity')
-    args = parser.parse_args()
-    
-    # Load all agent results
-    agent_results = []
-    for input_file in args.input_files:
-        path = Path(input_file)
-        if not path.exists():
-            print(f"Warning: File not found: {input_file}", file=sys.stderr)
-            continue
-        
-        with open(path) as f:
-            data = json.load(f)
-            # Handle both raw arrays and wrapped results
-            if isinstance(data, list):
-                agent_results.append(data)
-            elif isinstance(data, dict) and 'issues' in data:
-                agent_results.append(data['issues'])
-            else:
-                print(f"Warning: Unexpected format in {input_file}", file=sys.stderr)
-    
-    if not agent_results:
-        print("Error: No valid input files", file=sys.stderr)
-        sys.exit(1)
-    
-    # Aggregate
-    consensus = aggregate(
-        agent_results,
-        consensus_threshold=args.threshold,
-        min_severity=args.min_severity
-    )
-    
-    # Output
-    output_json = json.dumps(consensus, indent=2)
-    
-    if args.output == '-':
-        print(output_json)
-    else:
-        Path(args.output).write_text(output_json)
-        print(f"Wrote {len(consensus)} consensus issues to {args.output}", file=sys.stderr)
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/.claude/skills/multi-pr-review/scripts/orchestrate_review.py
+++ b/.claude/skills/multi-pr-review/scripts/orchestrate_review.py
--- a/.claude/skills/multi-pr-review/scripts/post_comment.py
+++ b/.claude/skills/multi-pr-review/scripts/post_comment.py