browser-use/browser-use

10 workflows · maturity 83% · 7 patterns · GitHub ↗

Security 20/100

Practices

✓ Matrix✓ Permissions○ Security scan✓ AI review✓ Cache✓ Concurrency○ Reusable workflows

Detected patterns

ai-code-review flaky-test-retry hardware-matrix least-privilege-permissions multi-channel-release multi-stage-release performance-tracking

Security dimensions

permissions

security scan

supply chain

secret handling

harden runner

Workflows (10)

claude AI .github/workflows/claude.yml

Triggers

issue_comment, pull_request_review_comment, issues, pull_request_review

Runs on

ubuntu-latest

Jobs

claude

Actions

astral-sh/setup-uv, anthropics/claude-code-action

Commands

uv sync --dev --all-extras
echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV
playwright install chromium --with-deps

View raw YAML

name: Claude Code

on:
  issue_comment:
    types: [created]
  pull_request_review_comment:
    types: [created]
  issues:
    types: [opened, assigned]
  pull_request_review:
    types: [submitted]

jobs:
  claude:
    if: |
      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      pull-requests: read
      id-token: write
      discussions: write
      issues: write
    env:
      IS_SANDBOX: '1'
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          activate-environment: true

      - run: uv sync --dev --all-extras

      - name: Detect installed Playwright version
        run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV

      # - name: Cache chrome binaries
      #   uses: actions/cache@v4
      #   with:
      #     path: |
      #       /tmp/google-chrome-stable_current_amd64.deb
      #    key: ${{ runner.os }}-${{ runner.arch }}-chrome-stable

      # - name: Install Chrome stable binary
      #   run: |
      #     sudo apt-get update -qq \
      #     && sudo curl -o "/tmp/google-chrome-stable_current_amd64.deb" --no-clobber "https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb" \
      #     && sudo apt-get install -y "/tmp/google-chrome-stable_current_amd64.deb" -f
      # - run: patchright install chrome --with-deps
      # - run: playwright install chrome --with-deps

      - name: Cache chromium binaries
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/ms-playwright
          key: ${{ runner.os }}-${{ runner.arch }}-playwright-${{ env.PLAYWRIGHT_VERSION }}-chromium

      - run: playwright install chromium --with-deps
      # - run: patchright install chromium --with-deps

      - name: Run Claude Code
        id: claude
        uses: anthropics/claude-code-action@beta
        with:
          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
          model: "claude-opus-4-20250514"
          fallback_model: "claude-3-5-sonnet-20241022"
          custom_instructions: |
            when making any significant changes, start by adding one or two new failing test functions to the most relevant file you can find in tests/ci/*.py, then work on your changes until you get the tests passing.
            make sure all lint errors are fixed before committing: `uv run pre-commit --all-files`, you can also use mcp tools to check Github CI status.
            make sure to run the whole test file at the end to make sure no other tests in that file started failing due to your changes: `uv run pytest/ci/test_....py`.
            if any significant features were added or removed, or any public-facing parameters/signatures changed, make sure to look through docs/*.mdx and examples/**.py and fix any relevant areas that might need to be updated.
          branch_prefix: "claude-"
          additional_permissions: |
            actions: read
          claude_env: |
            IN_DOCKER: 'true'
            BROWSER_USE_CLOUD_SYNC: 'false'
            ANONYMIZED_TELEMETRY: 'false'
            BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
            OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
            PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
            ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
            GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
            GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
          settings: |
            {
              "permissions": {
                "allow": [
                  "Bash(git:*)",
                  "Bash(uv:*)",
                  "Bash(uv run pytest:*)",
                  "Bash(uv run ruff:*)",
                  "Bash(uv run pyright:*)",
                  "Bash(uv run pre-commit:*)",
                  "Bash(uv pip:*)",
                  "Bash(uv add:*)",
                  "Bash(uv sync --all-extras --dev)",
                  "Bash(.venv/bin/*:*)",
                  "Bash(.venv/bin/python:*)",
                  "Bash(sed:*)",
                  "Bash(rg:*)",
                  "Bash(jq:*)",
                  "Bash(find:*)",
                  "Bash(grep:*)",
                  "Bash(python:*)",
                  "Bash(chmod:*)",
                  "Bash(rm:*)",
                  "Bash(playwright:*)",
                  "Bash(uv run playwright:*)",
                  "Bash(./bin/lint.sh)",
                  "Bash(./bin/test.sh)",
                  "WebFetch(*)",
                  "WebSearch(*)"
                ],
                "additionalDirectories": ["/home/runner/work"]
              }
            }
          allowed_tools: |
            Bash(git:*)
            Bash(uv:*)
            Bash(uv run pytest:*)
            Bash(uv run ruff:*)
            Bash(uv run pyright:*)
            Bash(uv run pre-commit:*)
            Bash(uv pip:*)
            Bash(uv add:*)
            Bash(uv sync --all-extras --dev)
            Bash(.venv/bin/*:*)
            Bash(.venv/bin/python:*)
            Bash(sed:*)
            Bash(rg:*)
            Bash(jq:*)
            Bash(find:*)
            Bash(grep:*)
            Bash(python:*)
            Bash(chmod:*)
            Bash(rm:*)
            Bash(playwright:*)
            Bash(uv run playwright:*)
            Bash(./bin/lint.sh)
            Bash(./bin/test.sh)
            WebFetch(*)
            WebSearch(*)

cloud_evals perms .github/workflows/cloud_evals.yml

Triggers: push, workflow_dispatch
Runs on: ubuntu-latest
Jobs: trigger_cloud_eval_image_build

View raw YAML

name: cloud_evals

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
  
on:
  push:
    branches:
      - main
      - 'releases/*'
  workflow_dispatch:
    inputs:
      commit_hash:
        description: Commit hash of the library to build the Cloud eval image for
        required: false

permissions: {}

jobs:
  trigger_cloud_eval_image_build:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.TRIGGER_CLOUD_BUILD_GH_KEY }}
          script: |
            const result = await github.rest.repos.createDispatchEvent({
              owner: 'browser-use',
              repo: 'cloud',
              event_type: 'trigger-workflow',
              client_payload: {"commit_hash": "${{ github.event.inputs.commit_hash || github.sha }}"}
            })
            console.log(result)

docker .github/workflows/docker.yml

Triggers: push, release, workflow_dispatch
Runs on: ubuntu-latest
Jobs: build_publish_image
Actions: docker/setup-qemu-action, docker/setup-buildx-action, docker/login-action, docker/login-action, docker/metadata-action, docker/build-push-action

View raw YAML

name: docker

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

on:
  push:
    branches:
      - main
      - stable
      - 'releases/**'
    tags:
      - '*'
  release:
    types: [published]
  workflow_dispatch:

jobs:
  build_publish_image:
    runs-on: ubuntu-latest
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}

      - name: Login to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Compute Docker tags based on tag/branch
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: |
            browseruse/browseruse
            ghcr.io/browser-use/browser-use
          tags: |
            type=ref,event=branch
            type=ref,event=pr
            type=pep440,pattern={{version}}
            type=pep440,pattern={{major}}.{{minor}}
            type=sha

      - name: Build and push Docker image
        id: push
        uses: docker/build-push-action@v6
        with:
          platforms: linux/amd64,linux/arm64
          context: .
          file: ./Dockerfile
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=registry,ref=browseruse/browseruse:buildcache
          cache-to: type=registry,ref=browseruse/browseruse:buildcache,mode=max

eval-on-pr perms .github/workflows/eval-on-pr.yml

Triggers

pull_request

Runs on

ubuntu-latest

Jobs

trigger-evaluation

Commands

echo "🚀 Triggering evaluation - PR #${{ github.event.pull_request.number }}" echo "Commit: ${{ github.event.pull_request.head.sha }}" # You can customize the test here TEST_CASE="${{ vars.EVAL_TEST_CASE }}" if [ -z "$TEST_CASE" ]; then TEST_CASE="InteractionTasks_v8" fi response=$(curl -X POST \ "${{ secrets.EVAL_PLATFORM_URL }}/api/triggerInteractionTasksV6" \ -H "Authorization: Bearer ${{ secrets.EVAL_PLATFORM_KEY }}" \ -H "Content-Type: application/json" \ -d "{ \"commitSha\": \"${{ github.event.pull_request.head.sha }}\", \"prNumber\": ${{ github.event.pull_request.number }}, \"branchName\": \"${{ github.event.pull_request.head.ref }}\", \"testCase\": \"${TEST_CASE}\", \"githubRepo\": \"${{ github.repository }}\" }" -s) echo "Response: $response" # Check if trigger was was successful if echo "$response" | jq -e '.success == true' > /dev/null; then echo "✅ Evaluation triggered successfully" exit 0 else echo "Failed" echo "$response" exit 1 fi

View raw YAML

name: Evaluate PR

permissions:
  contents: read
  pull-requests: write

on:
  pull_request:
    types: [opened, synchronize, reopened]

jobs:
  trigger-evaluation:
    runs-on: ubuntu-latest
    # Only run if PR author has write access
    if: |
      github.event.pull_request.author_association == 'OWNER' ||
      github.event.pull_request.author_association == 'MEMBER' ||
      github.event.pull_request.author_association == 'COLLABORATOR'

    steps:
      - name: Trigger Evaluation settings
        id: trigger
        continue-on-error: true
        run: |
          echo "🚀 Triggering evaluation - PR #${{ github.event.pull_request.number }}"
          echo "Commit: ${{ github.event.pull_request.head.sha }}"

          # You can customize the test here
          TEST_CASE="${{ vars.EVAL_TEST_CASE }}"
          if [ -z "$TEST_CASE" ]; then
            TEST_CASE="InteractionTasks_v8"
          fi

          response=$(curl -X POST \
            "${{ secrets.EVAL_PLATFORM_URL }}/api/triggerInteractionTasksV6" \
            -H "Authorization: Bearer ${{ secrets.EVAL_PLATFORM_KEY }}" \
            -H "Content-Type: application/json" \
            -d "{
              \"commitSha\": \"${{ github.event.pull_request.head.sha }}\",
              \"prNumber\": ${{ github.event.pull_request.number }},
              \"branchName\": \"${{ github.event.pull_request.head.ref }}\",
              \"testCase\": \"${TEST_CASE}\",
              \"githubRepo\": \"${{ github.repository }}\"
            }" -s)

          echo "Response: $response"

          # Check if trigger was was successful
          if echo "$response" | jq -e '.success == true' > /dev/null; then
            echo "✅ Evaluation triggered successfully"
            exit 0
          else
            echo "Failed"
            echo "$response"
            exit 1
          fi

install-script matrix perms .github/workflows/install-script.yml

Triggers

push, pull_request, workflow_dispatch

Runs on

${{ matrix.os }}, ${{ matrix.os }}, windows-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest

Jobs

test-install-sh-linux, test-install-sh-macos, test-install-sh-windows, test-uv-pip-install, test-uvx-run, test-uvx-pypi

Matrix

os→ macos-14, macos-latest, ubuntu-22.04, ubuntu-latest

Commands

bash browser_use/skill_cli/install.sh
echo "$HOME/.browser-use-env/bin" >> $GITHUB_PATH echo "$HOME/.local/bin" >> $GITHUB_PATH
source ~/.browser-use-env/bin/activate browser-use --help
source ~/.browser-use-env/bin/activate # Verify chromium binary exists in playwright cache ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \ ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \ echo "Chromium binary check completed"
source ~/.browser-use-env/bin/activate browser-use doctor
bash browser_use/skill_cli/install.sh
echo "$HOME/.browser-use-env/bin" >> $GITHUB_PATH echo "$HOME/.local/bin" >> $GITHUB_PATH
source ~/.browser-use-env/bin/activate browser-use --help

View raw YAML

name: Test Install Script

on:
  push:
    branches:
      - main
    paths:
      - 'browser_use/skill_cli/install.sh'
      - '.github/workflows/install-script.yml'
  pull_request:
    paths:
      - 'browser_use/skill_cli/install.sh'
      - '.github/workflows/install-script.yml'
  workflow_dispatch:

permissions:
  contents: read

# Cancel in-progress runs when a new commit is pushed
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

env:
  # Use current branch for testing install.sh
  # For PRs, use the fork's repo (head.repo), otherwise use the base repo
  BROWSER_USE_BRANCH: ${{ github.head_ref || github.ref_name }}
  BROWSER_USE_REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }}

jobs:
  # ===========================================================================
  # Test install.sh on all platforms
  # ===========================================================================

  test-install-sh-linux:
    name: install.sh (Linux ${{ matrix.os }})
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, ubuntu-22.04]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Run install.sh
        run: bash browser_use/skill_cli/install.sh

      - name: Add to PATH
        run: |
          echo "$HOME/.browser-use-env/bin" >> $GITHUB_PATH
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Verify browser-use CLI
        run: |
          source ~/.browser-use-env/bin/activate
          browser-use --help

      - name: Verify Chromium installed
        run: |
          source ~/.browser-use-env/bin/activate
          # Verify chromium binary exists in playwright cache
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
          echo "Chromium binary check completed"

      - name: Run browser-use doctor
        run: |
          source ~/.browser-use-env/bin/activate
          browser-use doctor

  test-install-sh-macos:
    name: install.sh (macOS ${{ matrix.os }})
    strategy:
      fail-fast: false
      matrix:
        os: [macos-latest, macos-14]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4

      - name: Run install.sh
        run: bash browser_use/skill_cli/install.sh

      - name: Add to PATH
        run: |
          echo "$HOME/.browser-use-env/bin" >> $GITHUB_PATH
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Verify browser-use CLI
        run: |
          source ~/.browser-use-env/bin/activate
          browser-use --help

      - name: Verify Chromium installed
        run: |
          source ~/.browser-use-env/bin/activate
          # Check playwright cache for chromium
          ls ~/Library/Caches/ms-playwright/chromium-*/chrome-mac/ 2>/dev/null || \
          ls ~/Library/Caches/ms-playwright/chromium-*/Chromium.app 2>/dev/null || \
          echo "Chromium binary check completed"

      - name: Run browser-use doctor
        run: |
          source ~/.browser-use-env/bin/activate
          browser-use doctor

  test-install-sh-windows:
    name: install.sh (Windows)
    runs-on: windows-latest
    defaults:
      run:
        shell: bash
    env:
      # Fix Unicode output on Windows (checkmarks, etc.)
      PYTHONIOENCODING: utf-8
    steps:
      - uses: actions/checkout@v4

      - name: Setup Python (Windows requires manual setup)
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'

      - name: Run install.sh
        run: bash browser_use/skill_cli/install.sh

      - name: Add to PATH
        run: |
          echo "$HOME/.browser-use-env/Scripts" >> $GITHUB_PATH
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Verify browser-use CLI
        run: |
          source ~/.browser-use-env/Scripts/activate
          browser-use --help

      - name: Run browser-use doctor
        run: |
          source ~/.browser-use-env/Scripts/activate
          browser-use doctor

  # ===========================================================================
  # Test alternative install methods: uv pip install + browser-use install
  # ===========================================================================

  test-uv-pip-install:
    name: uv pip install (Linux)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install uv
        run: curl -LsSf https://astral.sh/uv/install.sh | sh

      - name: Add uv to PATH
        run: echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Create venv and install browser-use
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
          # Install from current branch
          uv pip install .

      - name: Run browser-use install (installs Chromium)
        run: |
          source .venv/bin/activate
          browser-use install

      - name: Verify browser-use CLI
        run: |
          source .venv/bin/activate
          browser-use --help

      - name: Verify Chromium installed
        run: |
          source .venv/bin/activate
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
          echo "Chromium check completed"

      - name: Run browser-use doctor
        run: |
          source .venv/bin/activate
          browser-use doctor

  # ===========================================================================
  # Test uvx "browser-use[cli]" - ephemeral install
  # ===========================================================================

  test-uvx-run:
    name: uvx browser-use[cli] (Linux)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Install uv
        run: curl -LsSf https://astral.sh/uv/install.sh | sh

      - name: Add uv to PATH
        run: echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Build wheel from current branch
        run: |
          uv venv .venv --python 3.11
          source .venv/bin/activate
          uv pip install build
          python -m build --wheel

      - name: Test uvx with local wheel
        run: |
          WHEEL=$(ls dist/*.whl)
          uvx --from "$WHEEL" browser-use --help

      - name: Test uvx browser-use install
        run: |
          WHEEL=$(ls dist/*.whl)
          uvx --from "$WHEEL" browser-use install

      - name: Verify Chromium installed after uvx install
        run: |
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
          echo "Chromium check completed"

      - name: Test uvx browser-use doctor
        run: |
          WHEEL=$(ls dist/*.whl)
          uvx --from "$WHEEL" browser-use doctor

  # ===========================================================================
  # Test uvx from PyPI (only on main branch after release)
  # ===========================================================================

  test-uvx-pypi:
    name: uvx browser-use[cli] from PyPI
    runs-on: ubuntu-latest
    # Only run on main branch or manual trigger
    if: github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch'
    steps:
      - name: Install uv
        run: curl -LsSf https://astral.sh/uv/install.sh | sh

      - name: Add uv to PATH
        run: echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Test uvx browser-use --help
        run: uvx "browser-use[cli]" --help

      - name: Test uvx browser-use install
        run: uvx "browser-use[cli]" install

      - name: Verify Chromium installed
        run: |
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chrome 2>/dev/null || \
          ls ~/.cache/ms-playwright/chromium-*/chrome-linux/chromium 2>/dev/null || \
          echo "Chromium check completed"

      - name: Test uvx browser-use doctor
        run: uvx "browser-use[cli]" doctor

lint perms .github/workflows/lint.yml

Triggers

push, pull_request, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest, ubuntu-latest

Jobs

lint-syntax, lint-style, lint-typecheck

Actions

astral-sh/setup-uv, astral-sh/setup-uv, astral-sh/setup-uv

Commands

uv run ruff check --no-fix --select PLE
uv python install 3.11
uv sync --dev --all-extras --python 3.11
uv run --no-sync pre-commit run --all-files --show-diff-on-failure
uv sync --dev --all-extras
uv run --no-sync pyright

View raw YAML

name: lint

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

on:
  push:
    branches:
      - main
      - stable
      - 'releases/**'
    tags:
      - '*'
  pull_request:
  workflow_dispatch:

permissions:
  contents: read

jobs:
  lint-syntax:
    name: syntax-errors
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true
      - run: uv run ruff check --no-fix --select PLE

  lint-style:
    name: code-style
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true
      - run: uv python install 3.11
      - run: uv sync --dev --all-extras --python 3.11
      - run: uv run --no-sync pre-commit run --all-files --show-diff-on-failure

  lint-typecheck:
    name: type-checker
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - run: uv sync --dev --all-extras  # install extras for examples to avoid pyright missing imports errors-
      - run: uv run --no-sync pyright

package matrix perms .github/workflows/package.yaml

Triggers

push, workflow_dispatch

Runs on

ubuntu-latest, ${{ matrix.os }}

Jobs

build, build_test

Matrix

os, python-version→ 3.11, 3.13, macos-latest, ubuntu-latest, windows-latest

Actions

astral-sh/setup-uv, astral-sh/setup-uv

Commands

uv build --python 3.12
uv venv /tmp/testenv --python ${{ matrix.python-version }} --clear if [[ "$RUNNER_OS" == "Windows" ]]; then . /tmp/testenv/Scripts/activate else source /tmp/testenv/bin/activate fi uv pip install *.whl python -c 'from browser_use import Agent, BrowserProfile, BrowserSession, Tools, ActionModel, ActionResult'

View raw YAML

name: package

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

on:
  push:
    branches:
      - main
      - stable
      - 'releases/**'
    tags:
      - '*'
  workflow_dispatch:

permissions:
  contents: read

jobs:
  build:
    name: pip-build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
      - run: uv build --python 3.12
      - uses: actions/upload-artifact@v4
        with:
          name: dist-artifact
          path: |
            dist/*.whl
            dist/*.tar.gz

  build_test:
    name: pip-install-on-${{ matrix.os }}-py-${{ matrix.python-version }}
    needs: build
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        python-version: ["3.11", "3.13"]
    env:
      ANONYMIZED_TELEMETRY: 'false'

    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v5
      - uses: actions/download-artifact@v4
        with:
          name: dist-artifact

      - name: Set up venv and test for OS/Python versions
        shell: bash
        run: |
          uv venv /tmp/testenv --python ${{ matrix.python-version }} --clear
          if [[ "$RUNNER_OS" == "Windows" ]]; then
            . /tmp/testenv/Scripts/activate
          else
            source /tmp/testenv/bin/activate
          fi
          uv pip install *.whl
          python -c 'from browser_use import Agent, BrowserProfile, BrowserSession, Tools, ActionModel, ActionResult'

publish perms .github/workflows/publish.yml

Triggers

release, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest

Jobs

tag_pre_release, publish_to_pypi

Actions

astral-sh/setup-uv

Commands

git fetch --tags latest_tag=$(git tag --list --sort=-v:refname | grep -E '^[0-9]+\.[0-9]+\.[0-9]+(rc[0-9]+)?$' | head -n 1) if [ -z "$latest_tag" ]; then echo "Failed to find the latest git tag from list:" > /dev/stderr git tag --list --sort=-v:refname exit 1 else # Bump the tag rc version if [[ "$latest_tag" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)(rc([0-9]+))?$ ]]; then major="${BASH_REMATCH[1]}" minor="${BASH_REMATCH[2]}" patch="${BASH_REMATCH[3]}" rc="${BASH_REMATCH[5]}" echo "latest_tag: ${major}.${minor}.${patch}rc${rc:-0}" if [ -z "$rc" ]; then # No rc, so bump patch and set rc=1 # 0.2.1 -> 0.2.2rc1 patch=$((patch + 1)) new_tag="${major}.${minor}.${patch}rc1" else if [ "$rc" -ge 99 ]; then echo "Error: rc version is already at 99 for tag $latest_tag, refusing to increment further." > /dev/stderr exit 1 fi rc=$((rc + 1)) new_tag="${major}.${minor}.${patch}rc${rc}" # 0.2.1rc1 -> 0.2.1rc2 fi else echo "Error: latest_tag '$latest_tag' does not match expected version pattern." > /dev/stderr exit 1 fi fi echo "new_tag: $new_tag" git tag $new_tag git push origin $new_tag
uv sync
uv run --no-sync ruff check --no-fix --select PLE
uv build
uvx playwright install chrome
uvx playwright install chromium
uv publish --trusted-publishing always
git checkout -b stable git push origin -f stable

View raw YAML

# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: publish

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

on:
  release:
    types: [published]     # publish full release to PyPI when a release is created on Github
  # schedule:
  #   - cron: "0 17 * * FRI" # tag a pre-release on Github every Friday at 5 PM UTC
  workflow_dispatch:

permissions:
  contents: write
  id-token: write

jobs:
  tag_pre_release:
    if: github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Create pre-release tag
        run: |
          git fetch --tags
          latest_tag=$(git tag --list --sort=-v:refname | grep -E '^[0-9]+\.[0-9]+\.[0-9]+(rc[0-9]+)?$' | head -n 1)
          if [ -z "$latest_tag" ]; then
            echo "Failed to find the latest git tag from list:" > /dev/stderr
            git tag --list --sort=-v:refname
            exit 1
          else
            # Bump the tag rc version
            if [[ "$latest_tag" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)(rc([0-9]+))?$ ]]; then
              major="${BASH_REMATCH[1]}"
              minor="${BASH_REMATCH[2]}"
              patch="${BASH_REMATCH[3]}"
              rc="${BASH_REMATCH[5]}"
              echo "latest_tag: ${major}.${minor}.${patch}rc${rc:-0}"
              if [ -z "$rc" ]; then
                # No rc, so bump patch and set rc=1            # 0.2.1 -> 0.2.2rc1
                patch=$((patch + 1))
                new_tag="${major}.${minor}.${patch}rc1"
              else
                if [ "$rc" -ge 99 ]; then
                  echo "Error: rc version is already at 99 for tag $latest_tag, refusing to increment further." > /dev/stderr
                  exit 1
                fi
                rc=$((rc + 1))
                new_tag="${major}.${minor}.${patch}rc${rc}"    # 0.2.1rc1 -> 0.2.1rc2
              fi
            else
              echo "Error: latest_tag '$latest_tag' does not match expected version pattern." > /dev/stderr
              exit 1
            fi
          fi
          echo "new_tag: $new_tag"
          git tag $new_tag
          git push origin $new_tag

  publish_to_pypi:
    if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    env:
      IN_DOCKER: 'True'
      ANONYMIZED_TELEMETRY: 'false'
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          activate-environment: true
      - run: uv sync

      - run: uv run --no-sync ruff check --no-fix --select PLE # quick check for syntax errors to avoid waiting time doing the rest of the build
      - run: uv build

      # - name: Detect installed Playwright version
      #   run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV

      # - name: Cache playwright binaries
      #   uses: actions/cache@v3
      #   with:
      #     path: |
      #       ~/.cache/ms-playwright
      #     key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}

      - run: uvx playwright install chrome
      - run: uvx playwright install chromium

      # TODO: just depend on the other test.yml action for this instead of re-running the tests here
      # - run: uv run pytest tests/ci/test_tools.py   # final sanity check: run a few of the tests before release
      
      # publish to PyPI
      - run: uv publish --trusted-publishing always
      - name: Push to stable branch (if stable release)
        if: github.event_name == 'release' && !contains(github.ref_name, 'rc')
        run: |
          git checkout -b stable
          git push origin -f stable

stale-bot perms .github/workflows/stale-bot.yml

Triggers: schedule, workflow_dispatch
Runs on: ubuntu-latest
Jobs: stale
Actions: actions/stale

View raw YAML

name: 'Manage stale issues and PRs'
on:
  schedule:
    - cron: '0 2 * * *'  # Run daily at 2:00 AM UTC
  workflow_dispatch:  # Allow manual triggering

permissions:
  issues: write
  pull-requests: write

jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/stale@v9
        with:
          # General settings
          repo-token: ${{ secrets.GITHUB_TOKEN }}
          
          # Days before marking as stale (more lenient for AI/browser automation project)
          days-before-stale: 60
          days-before-close: 14
          
          # Different timing for PRs vs issues
          days-before-pr-stale: 45
          days-before-pr-close: 14
          
          # Stale labels
          stale-issue-label: 'stale'
          stale-pr-label: 'stale'
          
          # Remove stale label when there's activity
          remove-stale-when-updated: true
          remove-issue-stale-when-updated: true
          remove-pr-stale-when-updated: true
          
          # Messages
          stale-issue-message: |
            👋 This issue has been automatically marked as stale because it hasn't had activity for 60 days.
            
            **⚡ We've made significant progress recently!** Please test with the latest version of browser-use to see if this issue has been resolved. If the issue persists, please let us know by commenting below.
            
            **To keep this issue open:**
            - Add a comment explaining why this is still relevant after testing the latest version
            - Add the `pinned` label if this is an important long-term issue
            - Reference it in a PR if you're working on a fix
            
            **This will be automatically closed in 14 days** if no further activity occurs.
            
            Thanks for contributing to browser-use! 🤖 If you have questions, join our [Discord](https://discord.gg/uC9hDSbt).
          
          stale-pr-message: |
            👋 This PR has been automatically marked as stale because it hasn't had activity for 45 days.
            
            **To keep this PR open:**
            - Rebase against the latest main branch
            - Address any review feedback or merge conflicts
            - Add a comment explaining the current status
            - Add the `work-in-progress` label if you're still actively working on this
            
            **This will be automatically closed in 14 days** if no further activity occurs.
            
            Thanks for contributing to browser-use! 🤖
          
          close-issue-message: |
            🔒 This issue was automatically closed because it was stale for 14 days with no activity.
            
            **Don't worry!** If this issue is still relevant:
            - **First, test with the latest version** - we've made tons of improvements recently!
            - **Reopen it** if you have permissions and the issue persists
            - **Create a fresh issue** with updated information if the problem still exists after testing the latest version
            - **Join our [Discord](https://discord.gg/uC9hDSbt)** to discuss
            
            We appreciate your contribution to browser-use! 🤖
          
          close-pr-message: |
            🔒 This PR was automatically closed because it was stale for 14 days with no activity.
            
            **Don't worry!** If you'd like to continue this work:
            - **Reopen this PR** and rebase against main
            - **Create a fresh PR** with updated changes
            - **Join our [Discord](https://discord.gg/uC9hDSbt)** if you need help
            
            Thanks for contributing to browser-use! 🤖
          
          # Comprehensive exemptions for AI/browser automation project
          exempt-issue-labels: 'pinned,security,bug,enhancement,good-first-issue,help-wanted,documentation,ci,breaking-change,feature-request,roadmap'
          exempt-pr-labels: 'pinned,work-in-progress,wip,breaking-change,security,dependencies,ci'
          exempt-milestones: true
          exempt-all-assignees: true
          exempt-all-pr-assignees: true
          
          # Don't mark issues/PRs stale if they have recent PR references
          exempt-pr-author: true
          
          # Advanced settings
          operations-per-run: 200  # More conservative to avoid rate limits
          ascending: true  # Process oldest issues first
          
          # Enable debug output
          debug-only: false
          
          # Only process issues/PRs, not drafts
          include-only-assigned: false
          
          # Additional safety: don't close issues with many reactions (community interest)
          ignore-issue-updates: false
          ignore-pr-updates: false

test matrix perms .github/workflows/test.yaml

Triggers

push, pull_request, workflow_dispatch

Runs on

ubuntu-latest, ubuntu-latest, ubuntu-latest, ubuntu-latest

Jobs

setup-chromium, find_tests, tests, evaluate-tasks

Matrix

test_filename→ ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }}

Actions

astral-sh/setup-uv, astral-sh/setup-uv, nick-fields/retry, astral-sh/setup-uv, nick-fields/retry

Commands

echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT
uvx playwright install chromium --with-deps --no-shell
echo "🔍 Discovering test files at $(date)" echo "Git commit: $(git rev-parse HEAD)" echo "Git branch: $(git branch --show-current)" echo "" TEST_FILENAMES="$(find tests/ci -name 'test_*.py' -type f | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')" echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT" echo "📋 Test matrix: $TEST_FILENAMES"
if [ -z "${{ steps.lsgrep.outputs.TEST_FILENAMES }}" ]; then echo "Failed to find any test_*.py files in tests/ci/ folder!" > /dev/stderr exit 1 fi
if [[ "${{ matrix.test_filename }}" == "FAILED_TO_DISCOVER_TESTS" ]]; then echo "Failed get list of test files in tests/ci/test_*.py from find_tests job" > /dev/stderr exit 1 fi
uv sync --dev --all-extras
echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT
uvx playwright install chromium --with-deps --no-shell

View raw YAML

name: test
permissions:
  actions: read
  contents: write
  pull-requests: write  # Allow writing comments on PRs
  issues: write         # Allow writing comments on issues
  statuses: write       # Allow writing statuses on PRs
  discussions: write

# Cancel in-progress runs when a new commit is pushed to the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

on:
  push:
    branches:
      - main
      - stable
      - 'releases/**'
    tags:
      - '*'
  pull_request:
  workflow_dispatch:

jobs:
  setup-chromium:
    runs-on: ubuntu-latest
    timeout-minutes: 5
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6

      - name: Get week number for cache key
        id: week
        run: echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT

      - name: Cache chromium binaries
        id: cache-chromium
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/ms-playwright
          key: ${{ runner.os }}-${{ runner.arch }}-chromium-${{ steps.week.outputs.number }}
          restore-keys: |
            ${{ runner.os }}-${{ runner.arch }}-chromium-

      - name: Install Chromium if not cached
        if: steps.cache-chromium.outputs.cache-hit != 'true'
        run: uvx playwright install chromium --with-deps --no-shell

  find_tests:
    runs-on: ubuntu-latest
    timeout-minutes: 5  # Prevent hanging
    outputs:
      TEST_FILENAMES: ${{ steps.lsgrep.outputs.TEST_FILENAMES }}
      # ["test_browser", "test_tools", "test_browser_session", "test_tab_management", ...]
    steps:
      - uses: actions/checkout@v4
        with:
          # Force fresh checkout to avoid any caching issues
          fetch-depth: 1
      - id: lsgrep
        run: |
          echo "🔍 Discovering test files at $(date)"
          echo "Git commit: $(git rev-parse HEAD)"
          echo "Git branch: $(git branch --show-current)"
          echo ""

          TEST_FILENAMES="$(find tests/ci -name 'test_*.py' -type f | sed 's|^tests/ci/||' | sed 's|\.py$||' | jq -R -s -c 'split("\n")[:-1]')"
          echo "TEST_FILENAMES=${TEST_FILENAMES}" >> "$GITHUB_OUTPUT"
          echo "📋 Test matrix: $TEST_FILENAMES"
        # https://code.dblock.org/2021/09/03/generating-task-matrix-by-looping-over-repo-files-with-github-actions.html
      - name: Check that at least one test file is found
        run: |
          if [ -z "${{ steps.lsgrep.outputs.TEST_FILENAMES }}" ]; then
            echo "Failed to find any test_*.py files in tests/ci/ folder!" > /dev/stderr
            exit 1
          fi

  tests:
    needs: [setup-chromium, find_tests]
    runs-on: ubuntu-latest
    timeout-minutes: 4  # Reduced timeout - tests should complete quickly or retry
    env:
      IN_DOCKER: 'True'
      ANONYMIZED_TELEMETRY: 'false'
      BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
      GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
      AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
      AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
      BROWSER_USE_API_KEY: ${{ secrets.BROWSER_USE_API_KEY }}
      OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
    strategy:
      matrix:
        test_filename: ${{ fromJson(needs.find_tests.outputs.TEST_FILENAMES || '["FAILED_TO_DISCOVER_TESTS"]') }}
        # autodiscovers all the files in tests/ci/test_*.py
        # - test_browser
        # - test_tools
        # - test_browser_session
        # - test_tab_management
        # ... and more
    name: ${{ matrix.test_filename }}
    steps:
      - name: Check that the previous step managed to find some test files for us to run
        run: |
          if [[ "${{ matrix.test_filename }}" == "FAILED_TO_DISCOVER_TESTS" ]]; then
            echo "Failed get list of test files in tests/ci/test_*.py from find_tests job" > /dev/stderr
            exit 1
          fi

      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          activate-environment: true

      - name: Cache uv packages and venv
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/uv
            .venv
          key: ${{ runner.os }}-uv-venv-${{ hashFiles('pyproject.toml') }}
          restore-keys: |
            ${{ runner.os }}-uv-venv-

      - run: uv sync --dev --all-extras

      - name: Get week number for cache key
        id: week
        run: echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT

      - name: Cache chromium binaries
        id: cache-chromium
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/ms-playwright
          key: ${{ runner.os }}-${{ runner.arch }}-chromium-${{ steps.week.outputs.number }}
          restore-keys: |
            ${{ runner.os }}-${{ runner.arch }}-chromium-

      - name: Install Chromium browser if not cached
        if: steps.cache-chromium.outputs.cache-hit != 'true'
        run: uvx playwright install chromium --with-deps --no-shell

      - name: Cache browser-use extensions
        uses: actions/cache@v4
        with:
          path: |
            ~/.config/browseruse/extensions
          key: ${{ runner.os }}-browseruse-extensions-${{ hashFiles('browser_use/browser/profile.py') }}
          restore-keys: |
            ${{ runner.os }}-browseruse-extensions-

      - name: Check if test file exists
        id: check-file
        run: |
          TEST_FILE="tests/ci/${{ matrix.test_filename }}.py"
          if [ -f "$TEST_FILE" ]; then
            echo "exists=true" >> $GITHUB_OUTPUT
            echo "✅ Test file found: $TEST_FILE"
          else
            echo "exists=false" >> $GITHUB_OUTPUT
            echo "❌ Test file not found: $TEST_FILE"
            echo "This file may have been renamed or removed. Current test files:"
            find tests/ci -name 'test_*.py' -type f | sed 's|tests/ci/||' | sed 's|\.py$||' | sort
          fi

      - name: Run test with retry
        if: steps.check-file.outputs.exists == 'true'
        uses: nick-fields/retry@v3
        with:
          timeout_minutes: 4
          max_attempts: 1
          retry_on: error
          command: pytest "tests/ci/${{ matrix.test_filename }}.py"

  evaluate-tasks:
    needs: setup-chromium
    runs-on: ubuntu-latest
    timeout-minutes: 8  # Allow more time for agent eval
    env:
      IN_DOCKER: 'true'
      BROWSER_USE_CLOUD_SYNC: 'false'
      ANONYMIZED_TELEMETRY: 'false'
      BROWSER_USE_LOGGING_LEVEL: 'DEBUG'
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      PERPLEXITY_API_KEY: ${{ secrets.PERPLEXITY_API_KEY }}
      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
      GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
      GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
      BROWSER_USE_API_KEY: ${{ secrets.BROWSER_USE_API_KEY }}
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
          activate-environment: true

      - name: Cache uv packages and venv
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/uv
            .venv
          key: ${{ runner.os }}-uv-venv-${{ hashFiles('pyproject.toml') }}
          restore-keys: |
            ${{ runner.os }}-uv-venv-

      - run: uv sync --dev --all-extras

      - name: Get week number for cache key
        id: week
        run: echo "number=$(date +%Y-W%U)" >> $GITHUB_OUTPUT

      - name: Cache chromium binaries
        id: cache-chromium
        uses: actions/cache@v4
        with:
          path: |
            ~/.cache/ms-playwright
          key: ${{ runner.os }}-${{ runner.arch }}-chromium-${{ steps.week.outputs.number }}
          restore-keys: |
            ${{ runner.os }}-${{ runner.arch }}-chromium-

      - name: Install Chromium browser if not cached
        if: steps.cache-chromium.outputs.cache-hit != 'true'
        run: uvx playwright install chromium --with-deps --no-shell

      - name: Cache browser-use extensions
        uses: actions/cache@v4
        with:
          path: |
            ~/.config/browseruse/extensions
          key: ${{ runner.os }}-browseruse-extensions-${{ hashFiles('browser_use/browser/profile.py') }}
          restore-keys: |
            ${{ runner.os }}-browseruse-extensions-

      - name: Run agent tasks evaluation and capture score
        id: eval
        uses: nick-fields/retry@v3
        with:
          timeout_minutes: 4
          max_attempts: 1
          retry_on: error
          command: |
            python tests/ci/evaluate_tasks.py > result.txt
            cat result.txt
            echo "PASSED=$(grep '^PASSED=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
            echo "TOTAL=$(grep '^TOTAL=' result.txt | cut -d= -f2)" >> $GITHUB_ENV
            echo "DETAILED_RESULTS=$(grep '^DETAILED_RESULTS=' result.txt | cut -d= -f2-)" >> $GITHUB_ENV

      - name: Print agent evaluation summary
        run: |
          echo "Agent tasks passed: $PASSED / $TOTAL"

      - name: Write agent evaluation summary to workflow overview
        run: |
          if [ "$PASSED" = "$TOTAL" ]; then
            COLOR="green"
          else
            COLOR="yellow"
          fi
          echo "<h2>Agent Tasks Score: <span style='color:$COLOR;'>$PASSED/$TOTAL</span></h2>" >> $GITHUB_STEP_SUMMARY

      - name: Comment PR with agent evaluation results
        if: github.event_name == 'pull_request'
        uses: actions/github-script@v7
        continue-on-error: true
        with:
          script: |
            const passed = parseInt(process.env.PASSED);
            const total = parseInt(process.env.TOTAL);
            const detailedResults = JSON.parse(process.env.DETAILED_RESULTS);
            const score = `${passed}/${total}`;
            const percentage = Math.round((passed / total) * 100);

            // Fail the workflow if 0% pass rate
            if (percentage === 0) {
              core.setFailed(`Evaluation failed: 0% pass rate (${passed}/${total})`);
            }

            // Create detailed table
            let tableRows = '';
            detailedResults.forEach(result => {
              const emoji = result.success ? '✅' : '❌';
              const status = result.success ? 'Pass' : 'Fail';
              tableRows += `| ${result.task} | ${emoji} ${status} | ${result.reason} |\n`;
            });

            const comment = `## Agent Task Evaluation Results: ${score} (${percentage}%)

            <details>
            <summary>View detailed results</summary>

            | Task | Result | Reason |
            |------|--------|--------|
            ${tableRows}

            Check the [evaluate-tasks job](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for detailed task execution logs.
            </details>`;

            // Find existing comment to update or create new one
            const { data: comments } = await github.rest.issues.listComments({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
            });

            const botComment = comments.find(comment =>
              comment.user.type === 'Bot' &&
              comment.body.includes('Agent Task Evaluation Results')
            );

            if (botComment) {
              // Update existing comment
              await github.rest.issues.updateComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                comment_id: botComment.id,
                body: comment
              });
            } else {
              // Create new comment
              await github.rest.issues.createComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: context.issue.number,
                body: comment
              });
            }