Skip to content

Commit 6edf3a0

Browse files
authored
feat: Adding a sync function to runpod-python sdk to help copying workspace between pods (#426)
* Fixing UTC error * Fixing download path errors * Adding runpod sync command
1 parent ded7d4d commit 6edf3a0

File tree

4 files changed

+402
-4
lines changed

4 files changed

+402
-4
lines changed

runpod/cli/groups/pod/commands.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
RunPod | CLI | Pod | Commands
33
"""
44

5+
import os
6+
import tempfile
7+
import uuid
58
import click
69
from prettytable import PrettyTable
710

@@ -71,3 +74,173 @@ def connect_to_pod(pod_id):
7174
click.echo(f"Connecting to pod {pod_id}...")
7275
ssh = ssh_cmd.SSHConnection(pod_id)
7376
ssh.launch_terminal()
77+
78+
79+
@pod_cli.command("sync")
80+
@click.argument("source_pod_id")
81+
@click.argument("dest_pod_id")
82+
@click.argument("source_workspace", default="/workspace")
83+
@click.argument("dest_workspace", default="/workspace")
84+
def sync_pods(source_pod_id, dest_pod_id, source_workspace, dest_workspace):
85+
"""
86+
Sync data between two pods via SSH.
87+
88+
Transfers files from source_pod_id:source_workspace to dest_pod_id:dest_workspace.
89+
The workspace will be zipped and transferred to avoid file name conflicts.
90+
91+
📋 PREREQUISITES:
92+
93+
1. SSH Key Setup:
94+
• You must have an SSH key configured in your RunPod account
95+
• If you don't have one, create it with: runpod ssh add-key
96+
• List your keys with: runpod ssh list-keys
97+
98+
2. Pod Configuration:
99+
• Both pods must have SSH access enabled
100+
• For running pods using official RunPod templates, you may need to add
101+
your public key to the PUBLIC_KEY environment variable and restart the pod
102+
103+
⚠️ IMPORTANT NOTES:
104+
105+
• If a pod was started before adding your SSH key, you'll need to:
106+
1. Stop the pod
107+
2. Add PUBLIC_KEY environment variable with your public key
108+
3. Restart the pod
109+
110+
• The sync creates a unique folder (sync_XXXXXXXX) in the destination to avoid
111+
file conflicts
112+
113+
📖 EXAMPLES:
114+
115+
Basic sync (uses /workspace as default):
116+
runpod pod sync pod1 pod2
117+
118+
Custom paths:
119+
runpod pod sync pod1 pod2 /workspace/data /workspace/backup
120+
121+
Different directories:
122+
runpod pod sync pod1 pod2 /home/user/files /workspace/imported
123+
"""
124+
125+
# Check if user has SSH keys configured
126+
try:
127+
from ...groups.ssh.functions import get_user_pub_keys
128+
user_keys = get_user_pub_keys()
129+
if not user_keys:
130+
click.echo("❌ No SSH keys found in your RunPod account!")
131+
click.echo("")
132+
click.echo("🔑 To create an SSH key, run:")
133+
click.echo(" runpod ssh add-key")
134+
click.echo("")
135+
click.echo("📖 For more help, see:")
136+
click.echo(" runpod ssh add-key --help")
137+
return
138+
else:
139+
click.echo(f"✅ Found {len(user_keys)} SSH key(s) in your account")
140+
except Exception as e:
141+
click.echo(f"⚠️ Warning: Could not verify SSH keys: {str(e)}")
142+
click.echo("Continuing with sync attempt...")
143+
144+
click.echo(f"🔄 Syncing from {source_pod_id}:{source_workspace} to {dest_pod_id}:{dest_workspace}")
145+
146+
# Generate unique folder name to avoid conflicts
147+
transfer_id = str(uuid.uuid4())[:8]
148+
temp_zip_name = f"sync_{transfer_id}.tar.gz"
149+
dest_folder = f"sync_{transfer_id}"
150+
151+
try:
152+
# Connect to source pod
153+
click.echo(f"📡 Connecting to source pod {source_pod_id}...")
154+
with ssh_cmd.SSHConnection(source_pod_id) as source_ssh:
155+
156+
# Count files in source directory
157+
click.echo(f"📊 Counting files in {source_workspace}...")
158+
_, stdout, _ = source_ssh.ssh.exec_command(f"find {source_workspace} -type f | wc -l")
159+
file_count = stdout.read().decode().strip()
160+
click.echo(f"📁 Found {file_count} files in source workspace")
161+
162+
# Check if source directory exists
163+
_, stdout, stderr = source_ssh.ssh.exec_command(f"test -d {source_workspace} && echo 'exists' || echo 'not_found'")
164+
result = stdout.read().decode().strip()
165+
if result != 'exists':
166+
click.echo(f"❌ Error: Source workspace {source_workspace} does not exist on pod {source_pod_id}")
167+
return
168+
169+
# Create tar.gz archive of the workspace
170+
click.echo(f"📦 Creating archive of {source_workspace}...")
171+
archive_path = f"/tmp/{temp_zip_name}"
172+
tar_command = f"cd {os.path.dirname(source_workspace)} && tar -czf {archive_path} {os.path.basename(source_workspace)}"
173+
source_ssh.run_commands([tar_command])
174+
175+
# Check if archive was created successfully
176+
_, stdout, _ = source_ssh.ssh.exec_command(f"test -f {archive_path} && echo 'created' || echo 'failed'")
177+
archive_result = stdout.read().decode().strip()
178+
if archive_result != 'created':
179+
click.echo(f"❌ Error: Failed to create archive on source pod")
180+
return
181+
182+
# Get archive size for progress indication
183+
_, stdout, _ = source_ssh.ssh.exec_command(f"du -h {archive_path} | cut -f1")
184+
archive_size = stdout.read().decode().strip()
185+
click.echo(f"✅ Archive created successfully ({archive_size})")
186+
187+
# Download archive to local temp file
188+
click.echo("⬇️ Downloading archive to local machine...")
189+
with tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz") as temp_file:
190+
local_temp_path = temp_file.name
191+
source_ssh.get_file(archive_path, local_temp_path)
192+
193+
# Clean up archive on source pod
194+
source_ssh.run_commands([f"rm -f {archive_path}"])
195+
196+
# Connect to destination pod
197+
click.echo(f"📡 Connecting to destination pod {dest_pod_id}...")
198+
with ssh_cmd.SSHConnection(dest_pod_id) as dest_ssh:
199+
200+
# Check if destination directory exists, create if not
201+
click.echo(f"📂 Preparing destination workspace {dest_workspace}...")
202+
dest_ssh.run_commands([f"mkdir -p {dest_workspace}"])
203+
204+
# Upload archive to destination pod
205+
click.echo("⬆️ Uploading archive to destination pod...")
206+
dest_archive_path = f"/tmp/{temp_zip_name}"
207+
dest_ssh.put_file(local_temp_path, dest_archive_path)
208+
209+
# Extract archive in destination workspace
210+
click.echo(f"📦 Extracting archive to {dest_workspace}/{dest_folder}...")
211+
extract_command = f"cd {dest_workspace} && mkdir -p {dest_folder} && cd {dest_folder} && tar -xzf {dest_archive_path} --strip-components=1"
212+
dest_ssh.run_commands([extract_command])
213+
214+
# Verify extraction and count files
215+
_, stdout, _ = dest_ssh.ssh.exec_command(f"find {dest_workspace}/{dest_folder} -type f | wc -l")
216+
dest_file_count = stdout.read().decode().strip()
217+
click.echo(f"📁 Extracted {dest_file_count} files to destination")
218+
219+
# Clean up archive on destination pod
220+
dest_ssh.run_commands([f"rm -f {dest_archive_path}"])
221+
222+
# Show final destination path
223+
click.echo("")
224+
click.echo("🎉 Sync completed successfully!")
225+
click.echo(f"📊 Files transferred: {file_count}")
226+
click.echo(f"📍 Destination location: {dest_pod_id}:{dest_workspace}/{dest_folder}")
227+
click.echo("")
228+
click.echo("💡 To access the synced files:")
229+
click.echo(f" runpod ssh {dest_pod_id}")
230+
click.echo(f" cd {dest_workspace}/{dest_folder}")
231+
232+
except Exception as e:
233+
click.echo(f"❌ Error during sync: {str(e)}")
234+
click.echo("")
235+
click.echo("🔧 Troubleshooting tips:")
236+
click.echo("• Ensure both pods have SSH access enabled")
237+
click.echo("• Check that your SSH key is added to your RunPod account: runpod ssh list-keys")
238+
click.echo("• For running pods, you may need to add PUBLIC_KEY env var and restart")
239+
click.echo("• Verify the source and destination paths exist")
240+
finally:
241+
# Clean up local temp file
242+
try:
243+
if 'local_temp_path' in locals():
244+
os.unlink(local_temp_path)
245+
except:
246+
pass

runpod/serverless/utils/rp_debugger.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import datetime
8+
from datetime import timezone
89
import platform
910
import time
1011

@@ -86,7 +87,7 @@ def start(self, name):
8687
index = self.name_lookup[name]
8788
self.checkpoints[index]["start"] = time.perf_counter()
8889
self.checkpoints[index]["start_utc"] = (
89-
datetime.datetime.utcnow().isoformat() + "Z"
90+
datetime.datetime.now(timezone.utc).isoformat() + "Z"
9091
)
9192

9293
def stop(self, name):
@@ -103,7 +104,7 @@ def stop(self, name):
103104

104105
self.checkpoints[index]["end"] = time.perf_counter()
105106
self.checkpoints[index]["stop_utc"] = (
106-
datetime.datetime.utcnow().isoformat() + "Z"
107+
datetime.datetime.now(timezone.utc).isoformat() + "Z"
107108
)
108109

109110
def get_checkpoints(self):

0 commit comments

Comments
 (0)