export const SNIPPETS = {
  "Generating Chart and Dataset": [
    {
      title: "Dataset Creation",
      tooltip: "Dataset Creation",
      code: `# Function to output a new dataset
def transform(entities, context):
    
    input_df_1 = entities['titanic']  # This is for reading the input dataset

    # Import necessary libraries: pandas for data manipulation and numpy for numerical operations
    import pandas as pd
    import numpy as np

    # Drop the 'Age' column from the input dataframe
    output_df_1 = input_df_1.drop(['Age'], axis=1)  # axis=1 means drop a column (instead of a row)

    # Return the modified dataset as 'output_dataset'
    return {
        "output_dataset": output_df_1 # on the canvas the name of the new dataset will be output_dataset
    }`
    },
    {
      title: "Chart Creation",
      tooltip: "Dataset Creation",
      code: `# Function to create a chart 
def transform(entities, context):
    
    input_df_1 = entities['titanic']  # This is for reading the input dataset

    # Import necessary libraries for creating the chart
    import plotly.express as px
    import plotly.graph_objects as go  # For more complex figures like Pie charts
    import plotly.io as pio  # To configure default templates for Plotly
    import pandas as pd
    import numpy as np

    # Set default plot theme to 'simple_white' for a clean chart style
    pio.templates.default = 'simple_white'

    # Count occurrences of each gender ('Sex' column) in the dataset
    gender_counts = input_df_1['Sex'].value_counts()

    # Create a Pie chart with gender labels and their respective counts, adding a hole for a donut chart style
    fig_1 = go.Figure(data=[go.Pie(labels=gender_counts.index, values=gender_counts.values, hole=0.3)])

    # Update the layout of the Pie chart, including title, font style, size, and color
    fig_1.update_layout(
        title_text='Gender Distribution',  # Title of the chart
        font=dict(family='Roboto, monospace', size=16, color='black'),  # Font customization
        autosize=True  # Enable automatic resizing of the chart
    )

    # Display the generated chart
    fig_1.show()

    # Return the generated chart with a specific key name (can be renamed as needed)
    return {
        'auto_generated_9EYi': fig_1,  # 'auto_generated_9EYi' is the identifier for the generated chart in the canvas
    }`
    }
  ],
  "Connecting to APIs": [
    {
      title: "Fetching Data from an API",
      tooltip: "Fetching Data from an API",
      code: `# Fetching Data from an API
def transform(entities, context):
    # Importing necessary libraries: requests to make API calls and pandas for data manipulation
    import requests
    import pandas as pd

    # Define the API URL from which to fetch user data
    url = "https://jsonplaceholder.typicode.com/users"

    # Make a GET request to the API to fetch the user data
    response = requests.get(url)

    # Check if the response from the API is successful (status code 200)
    if response.status_code == 200:
        # Convert the API response from JSON format to a DataFrame
        data = response.json()
        users_df = pd.json_normalize(data)

        # Inform that the data retrieval was successful
        print("Successfully retrieved users.")

        # Return the resulting DataFrame inside a dictionary with the key 'output_users_df'
        return {'output_users_df': users_df}
    else:
        # If there's an error, print the error status code and message for troubleshooting
        print(f"Error: {response.status_code}")
        print(response.text)

        # Return an empty dictionary in case of failure
        return {}`
    },
    {
      title: "Posting Data to an API",
      tooltip: "Posting Data to an API",
      code: `# Posting Data to an API

def transform(entities, context):
    # Importing necessary libraries: pandas for data manipulation and requests to interact with the API
    import pandas as pd
    import requests

    # Extract the input dataset from the provided entities
    input_df_1 = entities['Dataset_Name']  # this is for reading input dataset

    # Extract the values from the first row of the dataset
    first_row = input_df_1.iloc[0]

    # Define the API endpoint for posting data
    url = "https://jsonplaceholder.typicode.com/posts"

    # Prepare the data to be posted to the API using specific columns from the first row
    post_data = {
        'title': str(first_row['JobTitle']),      # Using 'JobTitle' as the title of the post
        'body': str(first_row['GivenName']),      # Using 'GivenName' as the body of the post
        'userId': int(first_row['EmployeeNumber'])# Using 'EmployeeNumber' as the userId
    }
  
    # Make a POST request to the API with the prepared data
    response = requests.post(url, json=post_data)
    
    # Check if the POST request was successful (status code 201)
    if response.status_code == 201:
        # Convert the API response to a DataFrame
        new_post = response.json()
        new_post_df = pd.json_normalize(new_post)

        # Inform that the post was successfully created
        print("Successfully created a new post.")

        # Return the new post's DataFrame inside a dictionary with the key 'output_new_post_df'
        return {'output_new_post_df': new_post_df}
    else:
        # If there's an error, print the error status code and message for troubleshooting
        print(f"Error: {response.status_code}")
        print(response.text)

        # Return an empty dictionary in case of failure
        return {}`
    },
    {
      title: "Updating Data in an API",
      tooltip: "Updating Data in an API",
      code: `# Updating Data in an API

def transform(entities, context):
    # Importing necessary libraries: requests to interact with the API and pandas for data manipulation
    import requests
    import pandas as pd

    # Extract the input dataset from the provided entities
    input_df_1 = entities['Dataset_Name']  # this is for reading the input dataset

    # Extract values from the 16th row (index 15) of the dataset
    first_row = input_df_1.iloc[15]

    # Specify the ID of the post to be updated
    post_id = 1  # ID of the post to be updated
    url = f"https://jsonplaceholder.typicode.com/posts/{post_id}"  # API URL for updating the post

    # Prepare the updated data using specific columns from the 16th row
    updated_data = {
        'title': str(first_row['JobTitle']),      # Updating the post title using 'JobTitle'
        'body': str(first_row['GivenName']),      # Updating the post body using 'GivenName'
        'userId': int(first_row['EmployeeNumber'])# Updating the userId using 'EmployeeNumber'
    }
    
    # Make a PUT request to update the post with the new data
    response = requests.put(url, json=updated_data)

    # Check if the PUT request was successful (status code 200)
    if response.status_code == 200:
        # Convert the API response to a DataFrame
        updated_post = response.json()
        updated_post_df = pd.json_normalize(updated_post)

        # Inform that the post was successfully updated
        print("Successfully updated the post.")

        # Return the updated post's DataFrame inside a dictionary with the key 'output_updated_post_df'
        return {'output_updated_post_df': updated_post_df}
    else:
        # If there's an error, print the error status code and message for troubleshooting
        print(f"Error: {response.status_code}")
        print(response.text)

        # Return an empty dictionary in case of failure
        return {}`
    },
    {
      title: "Fetch data from RapidCanvas API",
      tooltip: "Fetch data from RapidCanvas API",
      code: `# Fetch data from RapidCanvas API
def transform(entities, context):
    # Import necessary libraries
    import requests  # For making HTTP requests to the API
    import pandas as pd  # For data manipulation with DataFrames
    import json  # For handling JSON data
    from utils.notebookhelpers.helpers import Helpers  # Helper utilities

    # Define the service ID and server from which the logs will be fetched
    service_id = '****-****-****-****-************'  # Prediction service ID
    server = 'staging.dev.rapidcanvas.net'  # Server environment

    # Fetch the secret token used for API authorization
    token = Helpers.get_secret(context, "token")

    # Define the base URLs for fetching logs and expanded log details
    logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/logs"
    expand_logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/expand-logs?runId="

    # Set up the headers for the API requests, including the authorization token
    headers = {
        "Authorization": f"Bearer {token}",  # Token-based authorization
        "Content-Type": "application/json"  # Specify JSON format for the requests
    }

    # Make a request to fetch the basic log entries
    logs_response = requests.get(logs_url, headers=headers)
    logs_response.raise_for_status()  # Raise an error if the request fails
    logs = logs_response.json()['data']['rows']  # Extract log entries from the JSON response

    # Initialize an empty list to store the detailed log data
    data = []

    # Iterate over each log entry to fetch expanded log details
    for log in logs:
        log_id = log['cells'][0]  # Assume the log ID is in the first cell of the log entry
        # Fetch detailed information for each log using the log ID
        log_details_response = requests.get(expand_logs_url + log_id, headers=headers)
        log_details_response.raise_for_status()  # Raise an error if the request fails
        log_details = log_details_response.json()  # Parse the detailed log response

        # Extract the request and response data from the detailed log
        print(log_details.get('request', '{}'))  # Debugging: print the request data
        request_data = log_details.get('request', '{}')  # Get the request data, defaulting to '{}'
        print(log_details.get('response', '{}'))  # Debugging: print the response data
        response_data = log_details.get('response', '{}')  # Get the response data, defaulting to '{}'

        # Append the request and response data to the 'data' list
        data.append({'req': request_data, 'res': response_data})

    # Convert the collected data into a pandas DataFrame for further use
    df = pd.DataFrame(data)

    # Return the DataFrame as part of the result dictionary
    return {'output_users_df': df}`
    }
  ],
  "Example 3rd Party Connections": [
    {
      title: "Fetching from an API with Access Token",
      tooltip: "Fetching from an API with Access Token",
      code: `# Fetching from an API with Access Token
def transform(entities, context):
    # Import necessary libraries: requests for API interaction, pandas for data manipulation, and Helpers for token retrieval
    import requests
    import pandas as pd
    from utils.notebookhelpers.helpers import Helpers
  
    # Fetch the access token using a helper function
    access_token = Helpers.get_secret(context, "Example_Secret_Key")
    if not access_token:
        # If the access token could not be retrieved, print an error and stop execution
        print("Failed to retrieve access token.")
        return {}
  
    # Define the API URL to fetch orders with specific parameters
    url_orders = "https://www.domain.shop/admin/api/2024-01/orders.json?created_at_max=2024-12-12T23:59:59-00:00&status=any"
    url = url_orders
  
    # Set up the request headers with the access token for authorization
    headers = {
        'X-Shopify-Access-Token': access_token
    }
  
    # Initialize variables to handle pagination and store the fetched data
    dataframes = []  # To store data from all pages
    i = 0  # Page counter
    next_link = True  # Indicator to check if more pages are available
  
    # Loop through paginated API results
    while True:
        if not next_link:  # Stop if there are no more next links
            break
  
        # For the first request, use the initial URL; for subsequent requests, use the 'next' link
        if i == 0:
            current_url = url
        else:
            current_url = next_url
  
        # Make the GET request to fetch orders
        response = requests.get(current_url, headers=headers)
  
        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            data = response.json()
  
            # If there are no more orders, stop the loop
            if not data.get('orders'):
                break
  
            # Convert the orders data from JSON to a DataFrame
            df = pd.json_normalize(data['orders'])
            dataframes.append(df)  # Append the DataFrame to the list
            i += 1  # Increment page counter
            print('Page ' + str(i))
  
            # Check if there's a 'next' link for further pages
            if 'next' in response.links:
                next_link = response.links['next']
                next_url = next_link['url']
            else:
                print('No more next links.')
                next_link = False  # No more pages to fetch
  
        else:
            # If there's an error, print the status code and error message
            print(f"Error: {response.status_code}")
            print(response.text)
            break
  
    # Combine all fetched DataFrames into a single DataFrame and remove duplicate entries by 'id'
    orders_df = pd.concat(dataframes, ignore_index=True)
    orders_df.drop_duplicates(subset=['id'], inplace=True)
  
    # Return the final orders DataFrame as output
    return {'output_orders_df': orders_df}`
    },
    {
      title: "Fetching data from GCS",
      tooltip: "Fetching data from GCS",
      code: `# Fetch data from GCS
def transform(entities, context):
    import os
    import pandas as pd
    from utils.notebookhelpers.gcs import GCSHelper
    from utils.notebookhelpers.helpers import Helpers

    # Retrieve the secret key by accessing the tenant's settings (in this case, the 'RC_DATA_INTERNAL_KEY' secret).
    # This key is required to authenticate with GCS (Google Cloud Storage).
    key = Helpers.get_secret(context, 'RC_DATA_INTERNAL_KEY')

    # Write the retrieved secret key to a local file, which will be used for GCS authentication.
    # This key file is necessary for accessing GCS resources.
    key_file = os.path.join(Helpers.getChildDir(context), "gcs.key")
    with open(key_file, "w") as f:
        f.write(key)

    # Initialize GCSHelper, a utility that helps interact with Google Cloud Storage.
    # We pass the key file and the name of the GCS bucket ("rapidcanvas-training-data") to the helper.
    gcs_helper = GCSHelper(key_file, "rapidcanvas-training-data")

    # Specify the name of the CSV file you want to download from the GCS bucket.
    file_name = "raw_txn_lite.csv"

    # Download the specified CSV file from the GCS bucket to the local directory.
    # 'Helpers.getChildDir(context)' provides the path where the file will be saved locally.
    gcs_helper.download_file(file_name, Helpers.getChildDir(context))

    # Construct the full local file path where the CSV file has been saved after the download.
    # We concatenate the directory path with the file name to create the full file path.
    file_path = Helpers.getChildDir(context) + 'raw_txn_lite.csv'

    # Use pandas to read the CSV file into a DataFrame, which allows easy data manipulation and analysis.
    df = pd.read_csv(file_path)

    # Return the DataFrame as part of a dictionary, making it accessible outside the function.
    return {"df": df}
`
    },
    {
      title: "Fetching data from Slack API",
      tooltip: "Fetching data from Slack API",
      code: `# Example integration with Slack API

def transform(entities, context):
    # Import necessary libraries
    import requests  # For making API requests
    import pandas as pd  # For data manipulation with DataFrames
    import time  # For working with timestamps
    from datetime import datetime, timedelta  # For date calculations
    from utils.notebookhelpers.helpers import Helpers  # Helper utilities

    # Get the timestamp from 30 days ago, used to fetch messages within this time frame
    oldest_timestamp = get_30_days_ago_timestamp()

    # Fetch the Slack API token from secure storage
    token = Helpers.get_secret(context, "slack_token")
    print(token)  # Debugging: print the token (be careful in production with printing sensitive info)

    # List of Slack channel names to retrieve messages from
    channel_names = ["channel1", "channel2"]

    # Fetch the available channels from Slack using the token
    channels = get_channels(token)  

    # Initialize empty lists to hold column names and the data rows
    cols = []  # Column headers for the DataFrame
    data = []  # Rows of data to be collected

    # Check if channels were successfully fetched
    if channels:
        # Iterate through each channel name specified in 'channel_names'
        for channel_name in channel_names:
            # Get the Slack channel ID corresponding to the channel name
            channel_id = get_channel_id(channels, channel_name)

            if channel_id:
                print("found channel id", channel_id, oldest_timestamp)  # Debugging: print channel ID and timestamp

                # Fetch messages from the Slack channel using the channel ID and token
                messages = fetch_slack_messages(channel_id, token, oldest_timestamp)
                print(messages)  # Debugging: print the retrieved messages

                # Process each message retrieved from Slack
                for message in messages:
                    # Process the message and store it as a row in the data list
                    row = process_message(message, channel_name, cols)
                    data.append(row)

    # Convert the collected data into a DataFrame for easier manipulation and analysis
    df = pd.DataFrame(data, columns=cols)

    # Return the DataFrame as part of the result dictionary
    return {'messages': df}

    # Helper function to fetch the list of channels from Slack
    def get_channels(token):
        # Set up the headers for the API request, including the Bearer token for authorization
        headers = {"Authorization": f"Bearer {token}"}
        # Define the parameters to fetch both public and private channels, limited to 1000 results
        params = {"limit": 1000, "types": "public_channel,private_channel"}
        # Make the API request to Slack to get the list of channels
        response = requests.get(SLACK_CHANNEL_LIST_URL, headers=headers, params=params)
        if response.status_code == 200:
            # Return the list of channels if the request was successful
            return response.json().get("channels", [])
        else:
            # Print an error message if the request failed
            print(f"Error fetching channels: {response.status_code}")
            return None

    # Helper function to get the ID of a specific channel by its name
    def get_channel_id(channels, channel_name):
        # Iterate through all channels and find the one that matches the provided name
        for channel in channels:
            if channel["name"] == channel_name:
                return channel["id"]
        return None  # Return None if the channel name was not found

    # Helper function to fetch messages from a specific Slack channel
    def fetch_slack_messages(channel_id, token, oldest_timestamp):
        # Set up the headers for the API request, including the Bearer token for authorization
        headers = {"Authorization": f"Bearer {token}"}
        # Define the parameters, including the channel ID and oldest timestamp, limited to 1000 results
        params = {"channel": channel_id, "oldest": oldest_timestamp, "limit": 1000}
        messages = []  # Initialize an empty list to collect messages

        # Loop to fetch messages and handle pagination if more messages exist
        while True:
            # Make the API request to Slack to fetch the messages
            response = requests.get(SLACK_API_URL, headers=headers, params=params)
            if response.status_code == 200:
                # Add the messages to the list
                json_response = response.json()
                messages += json_response.get("messages", [])
                # Check if there is a next cursor for pagination
                next_cursor = json_response.get("response_metadata", {}).get("next_cursor", "")
                if not next_cursor:
                    break  # Exit the loop if there are no more pages of messages
                else:
                    # Set the cursor for the next request if there are more messages to fetch
                    params["cursor"] = next_cursor
            else:
                # Print an error message if the request failed
                print(f"Error fetching messages: {response.status_code}")
                break
        return messages  # Return the list of messages

    # Helper function to get the timestamp for 30 days ago
    def get_30_days_ago_timestamp():
        # Get the current time in seconds since the epoch
        current_time = time.time()
        # Calculate 30 days in seconds
        thirty_days_in_seconds = 30 * 24 * 60 * 60
        # Return the timestamp from 30 days ago
        return int(current_time - thirty_days_in_seconds)

    # Helper function to process a Slack message and format it for the DataFrame
    def process_message(message, channel_name, cols):
        row = {}  # Initialize an empty dictionary to hold the message data
        # Get the message timestamp and text content
        ts = message.get("ts", "")
        text = message.get("text", "")
        attachment_text = ""  # Initialize an empty string for any attachment text
        # Collect all attachment text from the message (if any)
        for attachment in message.get("attachments", []):
            attachment_text += attachment.get("text", "")
        # Add each piece of data to the row and update the column list
        add_data("ts", ts, cols, row)
        add_data("text", text, cols, row)
        add_data("attach_text", attachment_text, cols, row)
        add_data("channel_name", channel_name, cols, row)
        return row  # Return the processed row

    # Helper function to add data to the row and ensure the column is included
    def add_data(key, val, cols, row):
        # Add the key to the column list if it's not already there
        if key not in cols:
            cols.append(key)
        # Add the value to the row dictionary under the given key
        row[key] = val`
    }
  ],
  "Working with Artifacts": [
    {
      title: "Fetching data from an artifact",
      tooltip: "Fetching data from an artifact",
      code: `# Reading data from excel stored in an artifact
  
  def transform(entities, context):
      # Import necessary modules
      import requests  # Used for making HTTP requests, if needed
      import pandas as pd  # Pandas library for data manipulation
      from utils.notebookhelpers.helpers import Helpers  # Import Helpers class to use utility functions
  
      # Try to download the Excel file using the Helpers.downloadArtifacts function
      try:
          # Downloads the 'transaction.xlsx' file and stores it in excel_file
          excel_file = Helpers.downloadArtifacts(context, 'Name of the artifact')['transaction.xlsx']
      except KeyError:
          # Raise an error if 'transaction.xlsx' is not found in the downloaded artifact
          raise ValueError("File 'transaction.xlsx' not found in the artifact.")
      except Exception as e:
          # Handle any other error that occurs during the download process
          raise RuntimeError(f"Error downloading the file: {e}")
      
      # Try to read the Excel file and retrieve all sheets using pandas
      try:
          # Read the Excel file and load all sheets into a dictionary where
          # the keys are sheet names and values are DataFrames containing the data
          excel_data = pd.read_excel(excel_file, sheet_name=None)
      except Exception as e:
          # Handle any error that occurs while reading the Excel file
          raise RuntimeError(f"Error reading the Excel file: {e}")
      
      # Initialize an empty dictionary to store DataFrames for each sheet
      dataframes = {}
  
      # Loop through the sheets in the Excel file
      for sheet_name, data in excel_data.items():
          # Check if the current sheet has data or is empty
          if not data.empty:
              # If the sheet is not empty, add it to the dataframes dictionary
              dataframes[sheet_name] = data
          else:
              # Print a warning message if the sheet is empty
              print(f"Warning: Sheet '{sheet_name}' is empty.")
      
      # Return the dictionary containing DataFrames for each sheet in the Excel file
      return dataframes`
    },
    {
      title: "Create and Save Data to Artifacts",
      tooltip: "Create and Save Data to Artifacts",
      code: `# Create and Save Data to Artifacts
def transform(entities, context):
    # Import necessary libraries
    import requests  # For making HTTP requests (not used in this code but imported for future use)
    import pandas as pd  # For data manipulation and creation of DataFrames
    from utils.dtos.templateOutput import ArtifactOutput  # Used to structure the output for artifacts
    from utils.notebookhelpers.helpers import Helpers  # Helper utilities for managing artifact directories

    # Define the data that will be used to create the DataFrame
    data = {
        'Col1': ['A', 'B', 'C'],  # First column with values 'A', 'B', 'C'
        'Col2': [1, 2, 3]  # Second column with numeric values 1, 2, and 3
    }

    # Create a pandas DataFrame using the defined data
    df = pd.DataFrame(data)

    # Get or create an artifacts directory using a unique ID ('test-artifact') and store files in it
    artifactsDir = Helpers.getOrCreateArtifactsDir(context, artifactsId="test-artifact")

    # Save the DataFrame as a CSV file in the artifacts directory
    df.to_csv(artifactsDir + '/test.csv')  # Save as 'test.csv'

    # Save the first 10 rows of the DataFrame as a Pickle file in the artifacts directory
    df.head(10).to_csv(artifactsDir + '/test.pkl')  # Save as 'test.pkl'

    # Prepare the output dictionary that references the stored artifacts
    output_dict = {
        "test-artifact": ArtifactOutput()  # Create an artifact output for 'test-artifact'
    }

    # Return the dictionary with references to the artifacts created
    return output_dict
`
    }
  ],
  "ML Models": [
    {
      title: "Build a Machine Learning model",
      tooltip: "Build a Machine Learning model",
      code: `# Build an Machine Learning model 
def transform(entities, context):
    # Read the input dataset for processing
    input_df_1 = entities['CPP data']  # 'CPP data' is the input dataset
    
    # Import necessary libraries
    import pandas as pd  # For data manipulation
    import pickle  # For saving and loading model and encoders
    import os  # For file path operations
    import plotly.express as px  # For data visualization
    import numpy as np  # For numerical operations
    import plotly.io as pio  # For controlling plotly display options
    pio.templates.default = 'none'  # Disable plotly templates
    
    # Import machine learning libraries
    from sklearn.model_selection import train_test_split as data_split  # For splitting data
    from sklearn.preprocessing import OneHotEncoder  # For encoding categorical variables
    from sklearn.ensemble import RandomForestRegressor  # For training the model
    from sklearn.metrics import mean_squared_error, r2_score  # For evaluation metrics
    
    # Import platform helper functions
    from utils.notebookhelpers.helpers import Helpers
    from utils.dtos.templateOutput import ModelOutput
    from utils.dtos.rc_ml_model import RCMLModel
    
    # Define which columns are numeric, categorical, dropped, and the target column
    features_data = {}
    features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 
                                 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 
                                 'peakrpm', 'citympg', 'highwaympg']
    features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody', 
                                 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']
    features_data['dropped_cols'] = ['car_ID']
    features_data['target_col'] = 'price'

    # Drop unnecessary columns
    input_df_1 = input_df_1.drop(columns=features_data['dropped_cols'])

    # Handle missing numeric values by replacing them with the column mean
    for col in features_data['num_cols']:
        if input_df_1[col].isnull().any():
            mean_val = int(round(input_df_1[col].mean()))
            input_df_1[col].fillna(mean_val, inplace=True)
            features_data[col] = mean_val

    # Handle missing categorical values by replacing them with the mode (most frequent value)
    for col in features_data['cat_cols']:
        if input_df_1[col].isnull().any():
            mode_value = input_df_1[col].mode()[0]
            input_df_1[col].fillna(mode_value, inplace=True)
            features_data[col] = mode_value

    # One-hot encode the categorical variables and store the encoders for each column
    features_data['ohe_cols'] = []
    for col in features_data['cat_cols']:
        ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64)
        ohe.fit(input_df_1[[col]])
        ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(),
                                    columns=ohe.get_feature_names_out())
        input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col])
        encoderId = col + '_ohe.pkl'
        with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle:
            pickle.dump(ohe, handle)
        features_data['ohe_cols'].append(col)

    # Separate the features (X) and target (y)
    X = input_df_1.drop(columns=[features_data['target_col']])
    y = input_df_1[features_data['target_col']]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest Regressor model
    rf = RandomForestRegressor(random_state=42)
    rf.fit(X_train, y_train)

    # Save the trained model to the artifacts directory
    artifacts = {}
    model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl')
    with open(model_path, 'wb') as f:
        pickle.dump(rf, f)
    artifacts['model_file'] = model_path

    # Save the feature metadata
    features_id = 'features.pkl'
    with open(Helpers.getChildDir(context) + features_id, 'wb') as handle:
        pickle.dump(features_data, handle)
    artifacts[features_id] = Helpers.getChildDir(context) + features_id

    # Save each one-hot encoder for future use
    for ohe_col in features_data['ohe_cols']:
        encoderId = ohe_col + '_ohe.pkl'
        artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId)

    # Define a custom class for the price prediction model
    class PricePredictionModel(RCMLModel):
        import pickle

        # Load the saved model and encoders
        def load(self, artifacts):
            with open(artifacts['model_file'], 'rb') as model_file:
                self.ml_model = pickle.load(model_file)
            self.encoders = artifacts

        # Preprocess the input data to match the training data
        def pre_process(self, df_input):
            features_id = 'features.pkl'
            file_path = self.encoders[features_id]
            features_data = pickle.load(open(file_path, 'rb'))

            # Handle dropped, numeric, and categorical columns
            for col in features_data['dropped_cols']:
                try:
                    df_input = df_input.drop(columns=[col])
                except KeyError as e:
                    print(f'Error dropping column {col}: {e}')
            
            for col in features_data.get('num_cols', []):
                try:
                    if col in df_input.columns:
                        df_input[col].fillna(features_data[col], inplace=True)
                except KeyError:
                    pass

            for col in features_data.get('cat_cols', []):
                try:
                    if col in df_input.columns:
                        mode_value = features_data[col]
                        if pd.api.types.is_categorical_dtype(df_input[col]):
                            if mode_value not in df_input[col].cat.categories:
                                df_input[col].cat.add_categories([mode_value], inplace=True)
                        df_input[col].fillna(mode_value, inplace=True)
                except KeyError:
                    pass

            # One-hot encode categorical columns during prediction
            for ohe_col in features_data.get('cat_cols', []):
                try:
                    if ohe_col in df_input.columns:
                        encoderId = ohe_col + '_ohe.pkl'
                        file_path = self.encoders[encoderId]
                        ohe = self.pickle.load(open(file_path, 'rb'))
                        df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(),
                                                    columns=ohe.get_feature_names_out())
                        df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col])
                except Exception as e:
                    print(f'Error during one-hot encoding for column {ohe_col}: {e}')
                    
            return df_input

        # Make predictions on new data
        def predict(self, model_input):
            model_input = self.pre_process(model_input)
            predictions = self.ml_model.predict(model_input)
            return pd.DataFrame(predictions)

    # Output the trained model for future predictions
    model = ModelOutput(PricePredictionModel, artifacts=artifacts)

    # Make predictions on the test data and generate plots for evaluation
    y_pred = rf.predict(X_test)

    # Plot Actual vs Predicted Prices
    fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'}, 
                       title='Actual vs Predicted Prices')
    fig_1.update_layout(autosize=False, height=450, width=950)
    fig_1.show()

    # Plot Residuals vs Predicted Prices
    residuals = y_test - y_pred
    fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'}, 
                       title='Residuals vs Predicted Prices')
    fig_2.update_layout(autosize=False, height=450, width=950)
    fig_2.show()

    # Plot Histogram of Residuals
    fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'}, 
                         title='Distribution of Residuals')
    fig_3.update_layout(autosize=False, height=450, width=950)
    fig_3.show()

    # Plot Feature Importance
    feature_importance = rf.feature_importances_
    features = X.columns
    fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'}, 
                   title='Feature Importance')
    fig_4.update_layout(autosize=False, height=450, width=950)
    fig_4.show()

    # Return the model and plots as outputs
    return {
        'auto_generated_BMSK': fig_1,
        'auto_generated_N4dF': fig_2,
        'auto_generated_Dbu3': fig_3,
        'auto_generated_4jCh': fig_4,
        'auto_generated_aJjF': model,
    }`
    },
    {
      title: "Using RC model to predict outcome on new dataset",
      tooltip: "Using RC model to predict outcome on new dataset",
      code: `#Using RC model to predict outcome on new dataset

def transform(entities, context):
    # Extract the input data for making predictions from the 'entities' dictionary
    df_for_predictions = entities['input data for predictions']

    # Import necessary libraries and modules
    import requests  # For making HTTP requests (if needed)
    import pandas as pd  # For data manipulation
    from utils.rc.dtos.artifact import Artifact  # For managing artifacts in the RapidCanvas environment
    from utils.notebookhelpers.helpers import Helpers  # Helper utilities provided by the platform
    import os  # For file path operations

    # Specify the model name created in RapidCanvas (or pre-trained)
    model_name = 'modle_created_in_rc'

    # Load the model using Helpers to retrieve the machine learning model from the RC platform
    imported_model = Helpers.get_rc_ml_model(context, model_name)

    # Define the features that will be used for making predictions
    # 'X_test' is the subset of the input DataFrame consisting of the required feature columns
    X_test = df_for_predictions[['feature_1', 'feature_2', 'feature_3', ....]]  # Add all relevant features

    # Extract the actual outcomes (if available) for validation or comparison purposes
    # 'y_test' is the column containing the true values for the outcome (if available in your use case)
    y_test = df_for_predictions['outcome_column']

    # Make predictions using the imported model. The model's output is assumed to have a format where
    # it provides predicted values (e.g., in a column named 'predict') and optionally probabilities (e.g., '0_prob', '1_prob')
    model_output = imported_model.predict(X_test)

    # Extract the 'predict' column from the model's output to get the predicted outcome
    # Assuming the model output is a DataFrame or dict-like object with a 'predict' key/column
    y_pred = pd.DataFrame(model_output)['predict']  # Convert to DataFrame and extract the predictions

    # Assign the predicted outcomes (y_pred) to a new column 'predictions' in the original input DataFrame
    df_for_predictions['predictions'] = y_pred

    # Return the updated DataFrame (with the 'predictions' column) as part of the result dictionary
    return {
        'data_with_predictions': df_for_predictions,
    }
`
    }
  ]
};
