Deploy LLM

Question

I currently uploaded a LLM to Azure blob service, may I ask how can I deploy it and use it in my own app?

Accepted Answer

Hello @Chi Ming HUNG (Matthew) , here's a simple python script that allows you to use the model stored in Azure Blob service.

import os
import json
import tempfile
import zipfile
import torch
from azure.storage.blob import BlobServiceClient
from transformers import AutoModelForCausalLM, AutoTokenizer

# Global variables for the model and tokenizer
model = None
tokenizer = None

def load_model(blob_name, container_name='your-container'):
    """
    Downloads a zipped model from Azure Blob Storage, extracts it,
    and loads the model along with its tokenizer using Hugging Face Transformers.
    
    Parameters:
        blob_name (str): The name (or path) of the blob file containing the zipped model.
        container_name (str): The name of the Azure Blob Storage container where the model is stored.
    
    Returns:
        model: The loaded model instance.
        tokenizer: The loaded tokenizer instance.
    """
    # Retrieve the Azure Storage connection string from an environment variable.
    connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
    if not connect_str:
        raise ValueError("AZURE_STORAGE_CONNECTION_STRING environment variable not set.")

    # Create a BlobServiceClient to interact with your Blob storage.
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    
    # Get a container client for the specified container.
    container_client = blob_service_client.get_container_client(container_name)
    
    # Create a temporary directory to download and extract the model.
    tmp_dir = tempfile.mkdtemp()
    
    # Define the local path for the downloaded zip file.
    local_zip_path = os.path.join(tmp_dir, os.path.basename(blob_name))
    
    # Download the blob (the zipped model) to the local path.
    print(f"Downloading blob '{blob_name}' from container '{container_name}'...")
    with open(local_zip_path, "wb") as file:
        blob_client = container_client.get_blob_client(blob_name)
        download_stream = blob_client.download_blob()
        file.write(download_stream.readall())
    print("Download complete.")
    
    # Extract the zip file into the temporary directory.
    print("Extracting model files...")
    with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
        zip_ref.extractall(tmp_dir)
    print("Extraction complete.")
    
    # Determine the model directory.
    # This example assumes that the zip file contains a folder named like the zip file (without extension).
    model_dir_candidate = os.path.join(tmp_dir, os.path.splitext(os.path.basename(blob_name))[0])
    model_dir = model_dir_candidate if os.path.isdir(model_dir_candidate) else tmp_dir

    # Load the model and tokenizer from the extracted directory.
    print("Loading model and tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModelForCausalLM.from_pretrained(model_dir)
    print("Model loaded successfully.")
    
    return model, tokenizer

def init():
    """
    Initialization function that is executed once when the service starts.
    Loads the model and tokenizer from Azure Blob Storage.
    """
    global model, tokenizer

    # Customize these values as per your configuration.
    blob_file_name = "my_large_language_model.zip"  # Replace with your actual blob file name
    container_name = "models"                       # Replace with your actual container name
    
    # Load model and tokenizer
    model, tokenizer = load_model(blob_file_name, container_name)
    
    # Set the model to evaluation mode
    model.eval()
    print("Service initialization complete.")

def run(raw_data):
    """
    Run function that processes the incoming data and returns predictions.
    
    Parameters:
        raw_data (str): A JSON-formatted string containing input text.
    
    Returns:
        A JSON-formatted string containing the model's response or an error message.
    """
    global model, tokenizer
    try:
        # Parse the input data from JSON.
        input_json = json.loads(raw_data)
        input_text = input_json.get("input")
        
        if not input_text:
            raise ValueError("No 'input' key found in the input data.")

        # Tokenize the input text.
        inputs = tokenizer(input_text, return_tensors="pt")
        
        # Generate output tokens from the model.
        # This example uses greedy decoding; adjust generation parameters as needed.
        with torch.no_grad():
            outputs = model.generate(**inputs, max_length=100)
        
        # Decode the output tokens to text.
        output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Return the output text in JSON format.
        return json.dumps({"result": output_text})
    except Exception as e:
        # If any error occurs, return the error message.
        return json.dumps({"error": str(e)})

# Optional: For local testing, you can run this script directly.
if __name__ == "__main__":
    # Initialize the service (load model & tokenizer).
    init()
    
    # Define a sample input for testing.
    sample_input = json.dumps({"input": "Hello, how are you?"})
    
    # Get and print the model's prediction.
    result = run(sample_input)
    print("Response:", result)

Share via

Deploy LLM

0 additional answers

Your answer