diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index f11894d33..0faf8a866 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -29,6 +29,15 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cache turbo + uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf + with: + path: .turbo + key: ${{ runner.os }}-turbo-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-turbo- + - name: Install pnpm run: npm install -g pnpm@9 - uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0 @@ -42,6 +51,6 @@ jobs: if: matrix.os == 'macos-latest' run: npm install -g appdmg - name: Install dependencies - run: pnpm install + run: npm exec turbo run bootstrap - name: Run e2e - run: cd apps/ui-tars && pnpm run test:e2e + run: npm exec turbo run ui-tars-desktop#test:e2e diff --git a/.github/workflows/release-agent-tars.yml b/.github/workflows/release-agent-tars.yml new file mode 100644 index 000000000..6c2bdf665 --- /dev/null +++ b/.github/workflows/release-agent-tars.yml @@ -0,0 +1,98 @@ +name: Release Agent-TARS app +on: + workflow_dispatch: + +env: + CI: true + NODE_OPTIONS: --max-old-space-size=8192 + HUSKY: 0 + +permissions: + id-token: write + contents: write + attestations: write + +jobs: + publish_on_mac: + strategy: + fail-fast: false + matrix: + os: [macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Show operating system info + run: | + echo "Operating System:" + uname -a + - name: Install pnpm + run: npm install -g pnpm@9 + - uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0 + with: + node-version: 20 + cache: 'pnpm' + - name: Install Python setuptools + run: brew install python-setuptools + - name: Install appdmg + run: npm install -g appdmg + - name: Install the Apple certificate and provisioning profile + env: + BUILD_CERTIFICATE_BASE64: ${{ secrets.BUILD_CERTIFICATE_BASE64 }} + P12_PASSWORD: ${{ secrets.P12_PASSWORD }} + BUILD_PROVISION_PROFILE_BASE64: ${{ secrets.BUILD_PROVISION_PROFILE_BASE64 }} + KEYCHAIN_PASSWORD: ${{ secrets.KEYCHAIN_PASSWORD }} + run: | + # create variables + CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12 + PP_PATH=$RUNNER_TEMP/build_pp.mobileprovision + KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db + + # import certificate and provisioning profile from secrets + echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH + echo -n "$BUILD_PROVISION_PROFILE_BASE64" | base64 --decode -o $PP_PATH + + # create temporary keychain + security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + security set-keychain-settings -lut 21600 $KEYCHAIN_PATH + security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + + # import certificate to keychain + security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH + security set-key-partition-list -S apple-tool:,apple: -k "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH + security list-keychain -d user -s $KEYCHAIN_PATH + + # apply provisioning profile + mkdir -p ~/Library/MobileDevice/Provisioning\ Profiles + cp $PP_PATH ~/Library/MobileDevice/Provisioning\ Profiles + - name: install dependencies + run: pnpm install + - name: publish Mac Universal + env: + APPLE_ID: ${{ secrets.APPLE_ID }} + APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }} + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + KEYCHAIN_PATH: ${{ runner.temp }}/app-signing.keychain-db + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + cd apps/agent-tars && pnpm run publish:mac + + # publish_on_win: + # runs-on: windows-latest + # steps: + # - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + # - name: Show operating system info + # run: | + # echo "Operating System:" + # cmd /c ver + # - name: Install pnpm + # run: npm install -g pnpm@9 + # - uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0 + # with: + # node-version: 20 + # cache: 'pnpm' + # - name: install dependencies + # run: pnpm install + # - name: publish + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # run: cd apps/agent-tars && pnpm run publish:win32 diff --git a/.github/workflows/release.yml b/.github/workflows/release-ui-tars.yml similarity index 99% rename from .github/workflows/release.yml rename to .github/workflows/release-ui-tars.yml index 18051682f..c6a0a0184 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release-ui-tars.yml @@ -1,4 +1,4 @@ -name: Release app +name: Release UI-TARS app on: # push: # branches: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3bf6d0141..66aa8fcd4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,6 +28,15 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: Cache turbo + uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf + with: + path: .turbo + key: ${{ runner.os }}-turbo-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-turbo- + - name: Install pnpm run: npm install -g pnpm@9 - uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0 @@ -35,11 +44,11 @@ jobs: node-version: 20 cache: 'pnpm' - name: Install dependencies - run: pnpm install + run: npm exec turbo run bootstrap - name: Run typecheck - run: cd apps/ui-tars && pnpm run typecheck + run: npm exec turbo run typecheck - name: Run test - run: pnpm run coverage + run: npm exec turbo run coverage - name: Upload coverage data uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3 # v5.3.1 with: diff --git a/.gitignore b/.gitignore index f47f3bf33..140fd2317 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ node_modules dist out +.tmp .DS_Store *.log* .eslintcache .env test-results/ coverage/ +.turbo/ diff --git a/.prettierignore b/.prettierignore index 9c6b791d5..ed170b57c 100644 --- a/.prettierignore +++ b/.prettierignore @@ -4,3 +4,4 @@ pnpm-lock.yaml LICENSE.md tsconfig.json tsconfig.*.json +apps/agent-tars/src/vendor/chat-ui/index.js diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..df814a23d --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,127 @@ +# Contributor covenant code of conduct + +## Our pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or + advances of any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email + address, without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1b9b86092..6e4abf460 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,5 +1,7 @@ # Contributing to UI-TARS Desktop + + First off, thanks for taking the time to contribute! ❤️ All types of contributions are encouraged and valued. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. 🎉 @@ -55,30 +57,41 @@ This is a [Monorepo](https://pnpm.io/workspaces) project including the following ```bash . ├── README.md -├── package.json # Electron application dependencies -├── forge.config.ts # Electron pack and publish configuration -├── electron.vite.config.ts # Electron bundle configuration -│ -├── src # Electron application source code -│   ├── main # Main process source code(Like backend) -│   ├── preload # Preload script source code -│   └── renderer # Renderer process source code(Like frontend) -│ -├── packages # Packages or Modules or SDK for UI-TARS Desktop -│   ├── action-parser # Action parser for parsing UI-TARS model output into actions -│   ├── core # Core SDK package for UI-TARS Agent -│   ├── electron-ipc # Electron IPC for communication between main and renderer processes -│   ├── shared # Shared code of the project(including types, utils, constants, etc.) -│   ├── utio # UTIO (UI-TARS Insights and Observation) -│   ├── visualizer # Sharing HTML Visualization Reporter -│ └── operators # Automation operators -│ ├── browserbase # Browserbase integration -│   └── nut-js # Nut.js integration -│ -├── docs # Documentation of the project -├── rfcs # RFCs (Request for Comments) for the project -├── e2e # E2E test cases for the project -├── playwright.config.ts # E2E test configuration +├── apps +│   ├── agent-tars +│   │   ├── src +│   │   │   ├── main +│   │   │   ├── preload +│   │   │   ├── renderer +│   │   │   └── vendor +│   └── ui-tars +│      └── src +│        ├── main +│         ├── preload +│         └── renderer +│  +├── packages +│   ├── agent-infra +│   │   ├── browser +│   │   ├── browser-use +│   │   ├── logger +│   │   ├── mcp-client +│   │   ├── mcp-servers +│   │   ├── search +│   │   └── shared +│   ├── common +│   │   ├── configs +│   │   └── electron-build +│   └── ui-tars +│   ├── action-parser +│   ├── cli +│   ├── electron-ipc +│   ├── operators +│   ├── sdk +│   ├── shared +│   ├── tsconfig.node.json +│   ├── utio +│   └── visualizer └── vitest.*.mts # Unit test configuration ``` @@ -103,7 +116,8 @@ $ pnpm install #### Run the application ```bash -$ pnpm run dev +$ pnpm run dev:ui-tars # Start UI-TARS Desktop +$ pnpm run dev:agent-tars # Start Agent-TARS Desktop ``` After the application starts, you can see the UI-TARS interface within the application. diff --git a/README.md b/README.md index e8d6471da..cf0f5172c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ + +> [!IMPORTANT] +> **\[2025-03-16\]** We released a **technical preview** version of a new desktop app - [Agent TARS](./apps/agent-tars/README.md), a multimodal AI agent that leverages browser operations by visually interpreting web pages and seamlessly integrating with command lines and file systems. +

- UI-TARS + UI-TARS

# UI-TARS Desktop @@ -18,19 +22,6 @@ UI-TARS Desktop is a GUI Agent application based on [UI-TARS (Vision-Language Mo |    👓 Midscene (use in browser)

-### ⚠️ Important Announcement: GGUF Model Performance - -The **GGUF model** has undergone quantization, but unfortunately, its performance cannot be guaranteed. As a result, we have decided to **downgrade** it. - -💡 **Alternative Solution**: -You can use **[Cloud Deployment](#cloud-deployment)** or **[Local Deployment [vLLM]](#local-deployment-vllm)**(If you have enough GPU resources) instead. - -We appreciate your understanding and patience as we work to ensure the best possible experience. - -## Updates - -- 🚀 01.25: We updated the **[Cloud Deployment](#cloud-deployment)** section in the 中文版: [GUI模型部署教程](https://bytedance.sg.larkoffice.com/docx/TCcudYwyIox5vyxiSDLlgIsTgWf#U94rdCxzBoJMLex38NPlHL21gNb) with new information related to the ModelScope platform. You can now use the ModelScope platform for deployment. - ## Showcases | Instruction | Video | @@ -38,6 +29,13 @@ We appreciate your understanding and patience as we work to ensure the best poss | Get the current weather in SF using the web browser |