zhouzypaul/policy_server_advanced.py

## 302 changes: 302 additions & 0 deletions policy_server_advanced.py
@@ -0,0 +1,302 @@

    """
"""

    The advanced policy server provides an example of serving a policy that needs:
The advanced policy server provides an example of serving a policy that needs:

        action chunking (with temporal ensembling)
    action chunking (with temporal ensembling)

        observation history
    observation history

    You can modify this example to make your policy track other internal states.
You can modify this example to make your policy track other internal states.


    The policy server needs to provide an endpoint .reset() that resets these internal states.
The policy server needs to provide an endpoint .reset() that resets these internal states.

    The .reset() endpoint will be called by AutoEval at the start of every evaluation trajectory.
The .reset() endpoint will be called by AutoEval at the start of every evaluation trajectory.


    -------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------


    Provide a lightweight server/client implementation template for deploying your generalist policy over a
Provide a lightweight server/client implementation template for deploying your generalist policy over a

    REST API. This template implements *just* the server.
REST API. This template implements *just* the server.

    See auto_eval/robot/policy_clients.py:OpenWebClient for an example of how the client is handled.
See auto_eval/robot/policy_clients.py:OpenWebClient for an example of how the client is handled.


    Dependencies:
Dependencies:

    pip install uvicorn fastapi json-numpy draccus
pip install uvicorn fastapi json-numpy draccus


    Usage:
Usage:

    python policy_server.py --port 8000
python policy_server.py --port 8000


    To make your server accessible on the open web, you can use ngrok or bore.pub
To make your server accessible on the open web, you can use ngrok or bore.pub

    With ngrok:
With ngrok:

      ngrok http 8000
  ngrok http 8000

    With bore.pub:
With bore.pub:

      bore local 8000 --to bore.pub
  bore local 8000 --to bore.pub


    Note that if you aren't able to resolve bore.pub's DNS (test this with `ping bore.pub`), you can use their actual IP: 159.223.171.199
Note that if you aren't able to resolve bore.pub's DNS (test this with `ping bore.pub`), you can use their actual IP: 159.223.171.199

    """
"""


    import json_numpy
import json_numpy


    json_numpy.patch()
json_numpy.patch()


    import json
import json

    import logging
import logging

    import traceback
import traceback

    from collections import deque
from collections import deque

    from dataclasses import dataclass
from dataclasses import dataclass

    from typing import Any, Dict
from typing import Any, Dict


    import draccus
import draccus

    import numpy as np
import numpy as np

    import uvicorn
import uvicorn

    from fastapi import FastAPI, HTTPException
from fastapi import FastAPI, HTTPException

    from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.cors import CORSMiddleware

    from fastapi.responses import JSONResponse
from fastapi.responses import JSONResponse


    # === Server Interface ===
# === Server Interface ===

    class ActionChunkingObsHistoryPolicyServer:
class ActionChunkingObsHistoryPolicyServer:

        """
    """

        A simple server for your robot policy; exposes `/act` to predict an action for a given image + instruction.
    A simple server for your robot policy; exposes `/act` to predict an action for a given image + instruction.

            => Takes in {"image": np.ndarray, "instruction": str, "proprio": Optional[np.ndarray]}
        => Takes in {"image": np.ndarray, "instruction": str, "proprio": Optional[np.ndarray]}

            => Returns  {"action": np.ndarray}
        => Returns  {"action": np.ndarray}


        Features:
    Features:

        - Optional observation history: Maintain a history of past observations
    - Optional observation history: Maintain a history of past observations

        - Optional action chunking: Predict multiple actions at once
    - Optional action chunking: Predict multiple actions at once

        - Optional temporal ensembling: Combine multiple predictions for the same timestep
    - Optional temporal ensembling: Combine multiple predictions for the same timestep

        """
    """


        def __init__(
    def __init__(

            self,
        self,

            obs_horizon: int = 1,
        obs_horizon: int = 1,

            action_pred_horizon: int = 1,
        action_pred_horizon: int = 1,

            action_temporal_ensemble: bool = False,
        action_temporal_ensemble: bool = False,

            action_exp_weight: float = 0.0,
        action_exp_weight: float = 0.0,

        ):
    ):

            """
        """

            Initialize the policy server with optional history and action chunking support.
        Initialize the policy server with optional history and action chunking support.


            Args:
        Args:

                obs_horizon: Number of observations to keep in history (1 means no history)
            obs_horizon: Number of observations to keep in history (1 means no history)

                pred_horizon: Number of actions to predict at once (1 means no chunking)
            pred_horizon: Number of actions to predict at once (1 means no chunking)

                temporal_ensemble: Whether to use temporal ensembling for actions
            temporal_ensemble: Whether to use temporal ensembling for actions

                exp_weight: Exponential weight for temporal ensembling
            exp_weight: Exponential weight for temporal ensembling

            """
        """

            ####################### TODO #######################
        ####################### TODO #######################

            ##                 Load model here                ##
        ##                 Load model here                ##

            ####################################################
        ####################################################

            self.model = ...
        self.model = ...


            # History tracking
        # History tracking

            self.obs_horizon = max(1, obs_horizon)
        self.obs_horizon = max(1, obs_horizon)

            self.observation_history = deque(maxlen=self.obs_horizon)
        self.observation_history = deque(maxlen=self.obs_horizon)


            # Action chunking
        # Action chunking

            self.action_pred_horizon = max(1, action_pred_horizon)
        self.action_pred_horizon = max(1, action_pred_horizon)

            # Temporal ensembling
        # Temporal ensembling

            self.action_temporal_ensemble = action_temporal_ensemble
        self.action_temporal_ensemble = action_temporal_ensemble

            self.action_exp_weight = action_exp_weight
        self.action_exp_weight = action_exp_weight

            self.action_history = (
        self.action_history = (

                deque(maxlen=self.action_pred_horizon) if action_temporal_ensemble else None
            deque(maxlen=self.action_pred_horizon) if action_temporal_ensemble else None

            )
        )


        def predict_action(self, payload: Dict[str, Any]) -> JSONResponse:
    def predict_action(self, payload: Dict[str, Any]) -> JSONResponse:

            """
        """

            Predict action(s) given an image + proprio + instruction.
        Predict action(s) given an image + proprio + instruction.


            With history and chunking support:
        With history and chunking support:

            - If obs_horizon > 1: Uses stacked observations
        - If obs_horizon > 1: Uses stacked observations

            - If pred_horizon > 1: Returns multiple actions
        - If pred_horizon > 1: Returns multiple actions

            - If temporal_ensemble: Applies temporal ensembling
        - If temporal_ensemble: Applies temporal ensembling

            """
        """

            try:
        try:

                if double_encode := "encoded" in payload:
            if double_encode := "encoded" in payload:

                    # Support cases where `json_numpy` is hard to install, and numpy arrays are "double-encoded" as strings
                # Support cases where `json_numpy` is hard to install, and numpy arrays are "double-encoded" as strings

                    assert len(payload.keys()) == 1, "Only uses encoded payload!"
                assert len(payload.keys()) == 1, "Only uses encoded payload!"

                    payload = json.loads(payload["encoded"])
                payload = json.loads(payload["encoded"])


                # Parse payload components
            # Parse payload components

                if "image" not in payload or "instruction" not in payload:
            if "image" not in payload or "instruction" not in payload:

                    raise HTTPException(
                raise HTTPException(

                        status_code=400,
                    status_code=400,

                        detail="Missing required fields: image and instruction",
                    detail="Missing required fields: image and instruction",

                    )
                )


                # Extract observation components
            # Extract observation components

                observation = {
            observation = {

                    "image": payload["image"],
                "image": payload["image"],

                    "instruction": payload["instruction"],
                "instruction": payload["instruction"],

                }
            }


                # Add optional proprio if provided
            # Add optional proprio if provided

                if "proprio" in payload:
            if "proprio" in payload:

                    observation["proprio"] = payload["proprio"]
                observation["proprio"] = payload["proprio"]


                # Update observation history
            # Update observation history

                if self.obs_horizon > 1:
            if self.obs_horizon > 1:

                    observation = self._update_observation_history(observation)
                observation = self._update_observation_history(observation)


                ####################### TODO #######################
            ####################### TODO #######################

                ##              Run model inference               ##
            ##              Run model inference               ##

                ####################################################
            ####################################################

                # Predict action chunks (pred_horizon actions at once)
            # Predict action chunks (pred_horizon actions at once)

                # Replace this with your actual model inference
            # Replace this with your actual model inference

                action = ...  # self.model(observation)
            action = ...  # self.model(observation)


                # check if actions are chunked
            # check if actions are chunked

                if len(action.shape) > 1:
            if len(action.shape) > 1:

                    assert action.shape[0] == self.action_pred_horizon
                assert action.shape[0] == self.action_pred_horizon


                    # Apply temporal ensembling if enabled
                # Apply temporal ensembling if enabled

                    if self.action_temporal_ensemble:
                if self.action_temporal_ensemble:

                        action = self._apply_temporal_ensembling(action)
                    action = self._apply_temporal_ensembling(action)


                if double_encode:
            if double_encode:

                    return JSONResponse(json_numpy.dumps(action))
                return JSONResponse(json_numpy.dumps(action))

                else:
            else:

                    return JSONResponse(action)
                return JSONResponse(action)

            except HTTPException:
        except HTTPException:

                raise
            raise

            except Exception as e:
        except Exception as e:

                logging.error(traceback.format_exc())
            logging.error(traceback.format_exc())

                raise HTTPException(
            raise HTTPException(

                    status_code=500,
                status_code=500,

                    detail=(
                detail=(

                        "Error processing request."
                    "Error processing request."

                        "Make sure your request complies with the expected format:\n"
                    "Make sure your request complies with the expected format:\n"

                        "{'image': np.ndarray, 'instruction': str, 'proprio': Optional[np.ndarray]}\n"
                    "{'image': np.ndarray, 'instruction': str, 'proprio': Optional[np.ndarray]}\n"

                    ),
                ),

                )
            )


        def run(self, host: str = "0.0.0.0", port: int = 8000) -> None:
    def run(self, host: str = "0.0.0.0", port: int = 8000) -> None:

            self.app = FastAPI()
        self.app = FastAPI()


            # Add CORS middleware
        # Add CORS middleware

            self.app.add_middleware(
        self.app.add_middleware(

                CORSMiddleware,
            CORSMiddleware,

                allow_origins=["*"],  # Allows all origins
            allow_origins=["*"],  # Allows all origins

                allow_credentials=True,
            allow_credentials=True,

                allow_methods=["*"],  # Allows all methods
            allow_methods=["*"],  # Allows all methods

                allow_headers=["*"],  # Allows all headers
            allow_headers=["*"],  # Allows all headers

            )
        )


            # Add health check endpoint
        # Add health check endpoint

            @self.app.get("/health")
        @self.app.get("/health")

            async def health_check():
        async def health_check():

                return {"status": "healthy"}
            return {"status": "healthy"}


            self.app.post("/act")(self.predict_action)
        self.app.post("/act")(self.predict_action)


            # Add reset endpoint
        # Add reset endpoint

            @self.app.post("/reset")
        @self.app.post("/reset")

            async def reset_server():
        async def reset_server():

                self.reset()
            self.reset()

                return {"status": "reset successful"}
            return {"status": "reset successful"}


            # Configure server with increased timeout and request size limits
        # Configure server with increased timeout and request size limits

            config = uvicorn.Config(
        config = uvicorn.Config(

                self.app,
            self.app,

                host=host,
            host=host,

                port=port,
            port=port,

                timeout_keep_alive=120,
            timeout_keep_alive=120,

                limit_concurrency=2,
            limit_concurrency=2,

            )
        )

            server = uvicorn.Server(config)
        server = uvicorn.Server(config)

            server.run()
        server.run()


        def _update_observation_history(
    def _update_observation_history(

            self, observation: Dict[str, Any]
        self, observation: Dict[str, Any]

        ) -> Dict[str, Any]:
    ) -> Dict[str, Any]:

            """
        """

            Update the observation history and return the stacked observations.
        Update the observation history and return the stacked observations.


            Args:
        Args:

                observation: Current observation
            observation: Current observation


            Returns:
        Returns:

                Stacked observations with history
            Stacked observations with history

            """
        """

            # If this is the first observation, initialize history with copies
        # If this is the first observation, initialize history with copies

            if len(self.observation_history) == 0:
        if len(self.observation_history) == 0:

                self.observation_history.extend([observation] * self.obs_horizon)
            self.observation_history.extend([observation] * self.obs_horizon)

            else:
        else:

                self.observation_history.append(observation)
            self.observation_history.append(observation)


            # If using history (obs_horizon > 1), stack and pad
        # If using history (obs_horizon > 1), stack and pad

            if self.obs_horizon > 1:
        if self.obs_horizon > 1:

                return stack_history(self.observation_history)
            return stack_history(self.observation_history)

            else:
        else:

                return observation
            return observation


        def _apply_temporal_ensembling(self, action_chunks: np.ndarray) -> np.ndarray:
    def _apply_temporal_ensembling(self, action_chunks: np.ndarray) -> np.ndarray:

            """
        """

            Apply temporal ensembling to the predicted action chunks.
        Apply temporal ensembling to the predicted action chunks.


            Args:
        Args:

                action_chunks: Predicted action chunks of shape (pred_horizon, action_dim)
            action_chunks: Predicted action chunks of shape (pred_horizon, action_dim)


            Returns:
        Returns:

                First action after temporal ensembling
            First action after temporal ensembling

            """
        """

            # Add current prediction to history
        # Add current prediction to history

            self.action_history.append(action_chunks)
        self.action_history.append(action_chunks)

            num_actions = len(self.action_history)
        num_actions = len(self.action_history)


            # Select the predicted action for the current step from history of action chunk predictions
        # Select the predicted action for the current step from history of action chunk predictions

            curr_act_preds = np.stack(
        curr_act_preds = np.stack(

                [
            [

                    pred_actions[i]
                pred_actions[i]

                    for (i, pred_actions) in zip(
                for (i, pred_actions) in zip(

                        range(num_actions - 1, -1, -1), self.action_history
                    range(num_actions - 1, -1, -1), self.action_history

                    )
                )

                ]
            ]

            )
        )


            # More recent predictions get exponentially *less* weight than older predictions
        # More recent predictions get exponentially *less* weight than older predictions

            weights = np.exp(-self.action_exp_weight * np.arange(num_actions))
        weights = np.exp(-self.action_exp_weight * np.arange(num_actions))

            weights = weights / weights.sum()
        weights = weights / weights.sum()


            # Compute the weighted average across all predictions for this timestep
        # Compute the weighted average across all predictions for this timestep

            action = np.sum(weights[:, None] * curr_act_preds, axis=0)
        action = np.sum(weights[:, None] * curr_act_preds, axis=0)

            assert action.shape == (7,), action.shape
        assert action.shape == (7,), action.shape


            return action
        return action


        def reset(self) -> None:
    def reset(self) -> None:

            """
        """

            Reset the server state (observation history and action history).
        Reset the server state (observation history and action history).

            """
        """

            self.observation_history.clear()
        self.observation_history.clear()

            if self.action_pred_horizon > 1:
        if self.action_pred_horizon > 1:

                self.action_history.clear()
            self.action_history.clear()


    def stack_history(
def stack_history(

        history: deque,
    history: deque,

    ):
):

        """
    """

        Converts a list of observation dictionaries (`history`) into a single observation dictionary
    Converts a list of observation dictionaries (`history`) into a single observation dictionary

        by stacking the values.
    by stacking the values.

        """
    """

        full_obs = {k: np.stack([dic[k] for dic in history]) for k in history[0]}
    full_obs = {k: np.stack([dic[k] for dic in history]) for k in history[0]}

        return full_obs
    return full_obs


    @dataclass
@dataclass

    class DeployConfig:
class DeployConfig:

        # Server Configuration
    # Server Configuration

        host: str = "0.0.0.0"  # Host IP Address
    host: str = "0.0.0.0"  # Host IP Address

        port: int = 8000  # Host Port
    port: int = 8000  # Host Port

        pred_horizon: int = 1  # action chunk to predict
    pred_horizon: int = 1  # action chunk to predict

        obs_horizon: int = 1  # observation history
    obs_horizon: int = 1  # observation history

        temporal_ensemble: bool = False  # whether to use temporal ensembling
    temporal_ensemble: bool = False  # whether to use temporal ensembling

        exp_weight: float = 0.0  # exponential weight for temporal ensembling
    exp_weight: float = 0.0  # exponential weight for temporal ensembling


    @draccus.wrap()
@draccus.wrap()

    def deploy(cfg: DeployConfig) -> None:
def deploy(cfg: DeployConfig) -> None:

        server = PolicyServer(
    server = PolicyServer(

            obs_horizon=cfg.obs_horizon,
        obs_horizon=cfg.obs_horizon,

            action_pred_horizon=cfg.pred_horizon,
        action_pred_horizon=cfg.pred_horizon,

            action_temporal_ensemble=cfg.temporal_ensemble,
        action_temporal_ensemble=cfg.temporal_ensemble,

            exp_weight=cfg.exp_weight,
        exp_weight=cfg.exp_weight,

        )
    )

        server.run(cfg.host, port=cfg.port)
    server.run(cfg.host, port=cfg.port)


    if __name__ == "__main__":
if __name__ == "__main__":

        deploy()
    deploy()