| |
| %pip install azure-ai-ml azure-identity --upgrade --quiet |
|
|
| import os |
| import time |
| from azure.ai.ml import MLClient |
| from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment |
| from azure.identity import DefaultAzureCredential |
|
|
| |
| |
| os.environ["SUBSCRIPTION_ID"] = "<YOUR_SUBSCRIPTION_ID>" |
| os.environ["RESOURCE_GROUP"] = "<YOUR_RESOURCE_GROUP>" |
| os.environ["WORKSPACE_NAME"] = "<YOUR_WORKSPACE_NAME>" |
|
|
| |
| timestamp = str(int(time.time())) |
| os.environ["ENDPOINT_NAME"] = f"hf-ep-{timestamp}" |
| os.environ["DEPLOYMENT_NAME"] = f"hf-deploy-{timestamp}" |
|
|
| |
| client = MLClient( |
| credential=DefaultAzureCredential(), |
| subscription_id=os.getenv("SUBSCRIPTION_ID"), |
| resource_group_name=os.getenv("RESOURCE_GROUP"), |
| workspace_name=os.getenv("WORKSPACE_NAME"), |
| ) |
|
|
| |
| model_uri = f"azureml://registries/HuggingFace/models/salesforce-codegen-350m-multi/labels/latest" |
|
|
| |
| endpoint = ManagedOnlineEndpoint(name=os.getenv("ENDPOINT_NAME")) |
|
|
| deployment = ManagedOnlineDeployment( |
| name=os.getenv("DEPLOYMENT_NAME"), |
| endpoint_name=os.getenv("ENDPOINT_NAME"), |
| model=model_uri, |
| |
| instance_type="Standard_NC40ads_H100_v5", |
| instance_count=1, |
| ) |
|
|
| |
| client.begin_create_or_update(endpoint).wait() |
| client.online_deployments.begin_create_or_update(deployment).wait() |
|
|
| print(f"Endpoint '{os.getenv('ENDPOINT_NAME')}' deployed successfully!") |
| print("You can now send requests to your endpoint via Microsoft Foundry or Azure Machine Learning.") |