Skip to content

add gpu & deep-learning support #152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions examples/D00-tensorflow/seed-compose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from seedemu.core import Emulator, Binding, Filter, Action
from seedemu.layers import Base
from seedemu.compiler import Docker

# Initialize the emulator and layers
emu = Emulator()
base = Base()

# Create an autonomous system
as100 = base.createAutonomousSystem(100)

# Create a network
as100.createNetwork('net0')

# Create a host in the autonomous system and connect it to the network
host = as100.createHost('custom-host').joinNetwork('net0')
# use the first GPU only
host.setGPUDevices([0])

# download miniconda
host.addBuildCommand("mkdir -p ~/miniconda3 && curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output ~/miniconda3/miniconda.sh")
# # install miniconda
host.addBuildCommand("bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 && rm -rf ~/miniconda3/miniconda.sh")
# host.addBuildCommand("curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba")
host.addBuildCommand("~/miniconda3/bin/conda install -n base conda-libmamba-solver && ~/miniconda3/bin/conda config --set solver libmamba")

host.addBuildCommand("~/miniconda3/bin/conda init bash && ~/miniconda3/bin/conda init zsh")
# for chinese users
host.addBuildCommand("~/miniconda3/bin/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/free/ && ~/miniconda3/bin/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/pkgs/main/ && ~/miniconda3/bin/conda config --set show_channel_urls yes")
host.addBuildCommand("~/miniconda3/bin/conda config --add channels https://mirrors.ustc.edu.cn/anaconda/cloud/conda-forge/")
# install deps
host.addBuildCommand("CONDA_OVERRIDE_CUDA=11.8 ~/miniconda3/bin/conda install tensorflow-gpu cudnn cudatoolkit=11.8")
# test is_gpu_available
host.appendStartCommand("~/miniconda3/bin/conda run python3 -c \"import tensorflow as tf; print('is_gpu_available': tf.test.is_gpu_available())\"")

# Bind the base layer to the emulator
emu.addLayer(base)

# Render the emulation
emu.render()

# Compile the emulation
emu.compile(Docker(), './output', override=True)

print("""
Before running docker-compose, it is required to add GPU support to your Docker by:
Following Nvidia Container Toolkit Official Guide:
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/user-guide.html
In some linux system, you can driectly install the toolkit via community package manager (e.g. AUR in arch)
https://wiki.archlinux.org/title/docker#With_NVIDIA_Container_Toolkit_.28recommended.29

When the container running, you are able to run your tensorflow code with GPU
""")
40 changes: 39 additions & 1 deletion seedemu/compiler/Docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,42 @@
- net.ipv4.ip_forward=1
- net.ipv4.conf.default.rp_filter=0
- net.ipv4.conf.all.rp_filter=0
{gpuDevices}
privileged: true
networks:
{networks}{ports}{volumes}
labels:
{labelList}
"""

# DockerCompilerFileTemplates['compose_service_resources_limits'] = """\
# deploy:
# resources:
# limits:
# cpus: {cpuLimit}
# memory: {memLimit}
# pids: {pidLimit}
# """ # TODO: try writing with yaml lib?

# DockerCompilerFileTemplates['compose_service_resources_reservations'] = """\
# deploy:
# resources:
# reservations:
# cpus: {cpuLimit}
# memory: {memLimit}
# pids: {pidLimit}
# """ # TODO: try writing with yaml lib?

DockerCompilerFileTemplates['compose_service_resources_gpus'] = """\
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
device_ids: [{deviceIDList}]
"""

DockerCompilerFileTemplates['compose_label_meta'] = """\
org.seedsecuritylabs.seedemu.meta.{key}: "{value}"
"""
Expand Down Expand Up @@ -853,6 +882,14 @@ def _compileNode(self, node: Node) -> str:
netId = real_netname,
address = address
)

_gpuDevices = node.getGPUDevices()
gpuDevices = ""
if len(_gpuDevices) > 0:
gpuDevices = DockerCompilerFileTemplates['compose_service_resources_gpus'].format(
deviceIDList = ', '.join(f'"{device}"' for device in _gpuDevices)
)


_ports = node.getPorts()
ports = ''
Expand Down Expand Up @@ -967,7 +1004,8 @@ def _compileNode(self, node: Node) -> str:
# privileged = 'true' if node.isPrivileged() else 'false',
ports = ports,
labelList = self._getNodeMeta(node),
volumes = volumes
volumes = volumes,
gpuDevices = gpuDevices
)

def _compileNet(self, net: Network) -> str:
Expand Down
24 changes: 24 additions & 0 deletions seedemu/core/Node.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ class Node(Printable, Registrable, Configurable, Vertex):

__name_servers: List[str]

__gpus: List[str]

def __init__(self, name: str, role: NodeRole, asn: int, scope: str = None):
"""!
@brief Node constructor.
Expand Down Expand Up @@ -268,6 +270,8 @@ def __init__(self, name: str, role: NodeRole, asn: int, scope: str = None):

self.__name_servers = []

self.__gpus = []

def configure(self, emulator: Emulator):
"""!
@brief configure the node. This is called when rendering.
Expand Down Expand Up @@ -779,6 +783,26 @@ def getInterfaces(self) -> List[Interface]:
"""
return self.__interfaces

def setGPUDevices(self, devices: List[str]):
"""!
@brief Set the GPU device for the node.

@param devices the new list of GPU ID.

@returns self, for chaining API calls.
"""
self.__gpus = devices

return self

def getGPUDevices(self) -> List[str]:
"""!
@brief Get the GPU device for the node.

@returns list of GPU ID.
"""
return self.__gpus

def addSharedFolder(self, nodePath: str, hostPath: str) -> Node:
"""!
@@brief Add a new shared folder between the node and host.
Expand Down