export const CODE_RECIPE_SNIPPETS = {
  "Generating Chart and Dataset": [
    {
      title: "Dataset Creation",
      tooltip: "Dataset Creation",
      code: `# Function to output a new dataset
  def transform(entities, context):
      
      input_df_1 = entities['titanic']  # This is for reading the input dataset
  
      # Import necessary libraries: pandas for data manipulation and numpy for numerical operations
      import pandas as pd
      import numpy as np
  
      # Drop the 'Age' column from the input dataframe
      output_df_1 = input_df_1.drop(['Age'], axis=1)  # axis=1 means drop a column (instead of a row)
  
      # Return the modified dataset as 'output_dataset'
      return {
          "output_dataset": output_df_1 # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "Chart Creation",
      tooltip: "Dataset Creation",
      code: `# Function to create a chart 
  def transform(entities, context):
      
      input_df_1 = entities['titanic']  # This is for reading the input dataset
  
      # Import necessary libraries for creating the chart
      import plotly.express as px
      import plotly.graph_objects as go  # For more complex figures like Pie charts
      import plotly.io as pio  # To configure default templates for Plotly
      import pandas as pd
      import numpy as np
  
      # Set default plot theme to 'simple_white' for a clean chart style
      pio.templates.default = 'simple_white'
  
      # Count occurrences of each gender ('Sex' column) in the dataset
      gender_counts = input_df_1['Sex'].value_counts()
  
      # Create a Pie chart with gender labels and their respective counts, adding a hole for a donut chart style
      fig_1 = go.Figure(data=[go.Pie(labels=gender_counts.index, values=gender_counts.values, hole=0.3)])
  
      # Update the layout of the Pie chart, including title, font style, size, and color
      fig_1.update_layout(
          title_text='Gender Distribution',  # Title of the chart
          font=dict(family='Roboto, monospace', size=16, color='black'),  # Font customization
          autosize=True  # Enable automatic resizing of the chart
      )
  
      # Display the generated chart
      fig_1.show()
  
      # Return the generated chart with a specific key name (can be renamed as needed)
      return {
          'auto_generated_9EYi': fig_1,  # 'auto_generated_9EYi' is the identifier for the generated chart in the canvas
      }`
    }
  ],
  "Connecting to APIs": [
    {
      title: "Fetching Data from an API",
      tooltip: "Fetching Data from an API",
      code: `# Fetching Data from an API
  def transform(entities, context):
      # Importing necessary libraries: requests to make API calls and pandas for data manipulation
      import requests
      import pandas as pd
  
      # Define the API URL from which to fetch user data
      url = "https://jsonplaceholder.typicode.com/users"
  
      # Make a GET request to the API to fetch the user data
      response = requests.get(url)
  
      # Check if the response from the API is successful (status code 200)
      if response.status_code == 200:
          # Convert the API response from JSON format to a DataFrame
          data = response.json()
          users_df = pd.json_normalize(data)
  
          # Inform that the data retrieval was successful
          print("Successfully retrieved users.")
  
          # Return the resulting DataFrame inside a dictionary with the key 'output_users_df'
          return {'output_users_df': users_df}
      else:
          # If there's an error, print the error status code and message for troubleshooting
          print(f"Error: {response.status_code}")
          print(response.text)
  
          # Return an empty dictionary in case of failure
          return {}`
    },
    {
      title: "Posting Data to an API",
      tooltip: "Posting Data to an API",
      code: `# Posting Data to an API
  
  def transform(entities, context):
      # Importing necessary libraries: pandas for data manipulation and requests to interact with the API
      import pandas as pd
      import requests
  
      # Extract the input dataset from the provided entities
      input_df_1 = entities['Dataset_Name']  # this is for reading input dataset
  
      # Extract the values from the first row of the dataset
      first_row = input_df_1.iloc[0]
  
      # Define the API endpoint for posting data
      url = "https://jsonplaceholder.typicode.com/posts"
  
      # Prepare the data to be posted to the API using specific columns from the first row
      post_data = {
          'title': str(first_row['JobTitle']),      # Using 'JobTitle' as the title of the post
          'body': str(first_row['GivenName']),      # Using 'GivenName' as the body of the post
          'userId': int(first_row['EmployeeNumber'])# Using 'EmployeeNumber' as the userId
      }
    
      # Make a POST request to the API with the prepared data
      response = requests.post(url, json=post_data)
      
      # Check if the POST request was successful (status code 201)
      if response.status_code == 201:
          # Convert the API response to a DataFrame
          new_post = response.json()
          new_post_df = pd.json_normalize(new_post)
  
          # Inform that the post was successfully created
          print("Successfully created a new post.")
  
          # Return the new post's DataFrame inside a dictionary with the key 'output_new_post_df'
          return {'output_new_post_df': new_post_df}
      else:
          # If there's an error, print the error status code and message for troubleshooting
          print(f"Error: {response.status_code}")
          print(response.text)
  
          # Return an empty dictionary in case of failure
          return {}`
    },
    {
      title: "Updating Data in an API",
      tooltip: "Updating Data in an API",
      code: `# Updating Data in an API
  
  def transform(entities, context):
      # Importing necessary libraries: requests to interact with the API and pandas for data manipulation
      import requests
      import pandas as pd
  
      # Extract the input dataset from the provided entities
      input_df_1 = entities['Dataset_Name']  # this is for reading the input dataset
  
      # Extract values from the 16th row (index 15) of the dataset
      first_row = input_df_1.iloc[15]
  
      # Specify the ID of the post to be updated
      post_id = 1  # ID of the post to be updated
      url = f"https://jsonplaceholder.typicode.com/posts/{post_id}"  # API URL for updating the post
  
      # Prepare the updated data using specific columns from the 16th row
      updated_data = {
          'title': str(first_row['JobTitle']),      # Updating the post title using 'JobTitle'
          'body': str(first_row['GivenName']),      # Updating the post body using 'GivenName'
          'userId': int(first_row['EmployeeNumber'])# Updating the userId using 'EmployeeNumber'
      }
      
      # Make a PUT request to update the post with the new data
      response = requests.put(url, json=updated_data)
  
      # Check if the PUT request was successful (status code 200)
      if response.status_code == 200:
          # Convert the API response to a DataFrame
          updated_post = response.json()
          updated_post_df = pd.json_normalize(updated_post)
  
          # Inform that the post was successfully updated
          print("Successfully updated the post.")
  
          # Return the updated post's DataFrame inside a dictionary with the key 'output_updated_post_df'
          return {'output_updated_post_df': updated_post_df}
      else:
          # If there's an error, print the error status code and message for troubleshooting
          print(f"Error: {response.status_code}")
          print(response.text)
  
          # Return an empty dictionary in case of failure
          return {}`
    },
    {
      title: "Fetch data from RapidCanvas API",
      tooltip: "Fetch data from RapidCanvas API",
      code: `# Fetch data from RapidCanvas API
  def transform(entities, context):
      # Import necessary libraries
      import requests  # For making HTTP requests to the API
      import pandas as pd  # For data manipulation with DataFrames
      import json  # For handling JSON data
      from utils.notebookhelpers.helpers import Helpers  # Helper utilities
  
      # Define the service ID and server from which the logs will be fetched
      service_id = '****-****-****-****-************'  # Prediction service ID
      server = 'staging.dev.rapidcanvas.net'  # Server environment
  
      # Fetch the secret token used for API authorization
      token = Helpers.get_secret(context, "token")
  
      # Define the base URLs for fetching logs and expanded log details
      logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/logs"
      expand_logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/expand-logs?runId="
  
      # Set up the headers for the API requests, including the authorization token
      headers = {
          "Authorization": f"Bearer {token}",  # Token-based authorization
          "Content-Type": "application/json"  # Specify JSON format for the requests
      }
  
      # Make a request to fetch the basic log entries
      logs_response = requests.get(logs_url, headers=headers)
      logs_response.raise_for_status()  # Raise an error if the request fails
      logs = logs_response.json()['data']['rows']  # Extract log entries from the JSON response
  
      # Initialize an empty list to store the detailed log data
      data = []
  
      # Iterate over each log entry to fetch expanded log details
      for log in logs:
          log_id = log['cells'][0]  # Assume the log ID is in the first cell of the log entry
          # Fetch detailed information for each log using the log ID
          log_details_response = requests.get(expand_logs_url + log_id, headers=headers)
          log_details_response.raise_for_status()  # Raise an error if the request fails
          log_details = log_details_response.json()  # Parse the detailed log response
  
          # Extract the request and response data from the detailed log
          print(log_details.get('request', '{}'))  # Debugging: print the request data
          request_data = log_details.get('request', '{}')  # Get the request data, defaulting to '{}'
          print(log_details.get('response', '{}'))  # Debugging: print the response data
          response_data = log_details.get('response', '{}')  # Get the response data, defaulting to '{}'
  
          # Append the request and response data to the 'data' list
          data.append({'req': request_data, 'res': response_data})
  
      # Convert the collected data into a pandas DataFrame for further use
      df = pd.DataFrame(data)
  
      # Return the DataFrame as part of the result dictionary
      return {'output_users_df': df}`
    }
  ],
  "Example 3rd Party Connections": [
    {
      title: "Fetching from an API with Access Token",
      tooltip: "Fetching from an API with Access Token",
      code: `# Fetching from an API with Access Token
  def transform(entities, context):
      # Import necessary libraries: requests for API interaction, pandas for data manipulation, and Helpers for token retrieval
      import requests
      import pandas as pd
      from utils.notebookhelpers.helpers import Helpers
    
      # Fetch the access token using a helper function
      access_token = Helpers.get_secret(context, "Example_Secret_Key")
      if not access_token:
          # If the access token could not be retrieved, print an error and stop execution
          print("Failed to retrieve access token.")
          return {}
    
      # Define the API URL to fetch orders with specific parameters
      url_orders = "https://www.domain.shop/admin/api/2024-01/orders.json?created_at_max=2024-12-12T23:59:59-00:00&status=any"
      url = url_orders
    
      # Set up the request headers with the access token for authorization
      headers = {
          'X-Shopify-Access-Token': access_token
      }
    
      # Initialize variables to handle pagination and store the fetched data
      dataframes = []  # To store data from all pages
      i = 0  # Page counter
      next_link = True  # Indicator to check if more pages are available
    
      # Loop through paginated API results
      while True:
          if not next_link:  # Stop if there are no more next links
              break
    
          # For the first request, use the initial URL; for subsequent requests, use the 'next' link
          if i == 0:
              current_url = url
          else:
              current_url = next_url
    
          # Make the GET request to fetch orders
          response = requests.get(current_url, headers=headers)
    
          # Check if the request was successful (status code 200)
          if response.status_code == 200:
              data = response.json()
    
              # If there are no more orders, stop the loop
              if not data.get('orders'):
                  break
    
              # Convert the orders data from JSON to a DataFrame
              df = pd.json_normalize(data['orders'])
              dataframes.append(df)  # Append the DataFrame to the list
              i += 1  # Increment page counter
              print('Page ' + str(i))
    
              # Check if there's a 'next' link for further pages
              if 'next' in response.links:
                  next_link = response.links['next']
                  next_url = next_link['url']
              else:
                  print('No more next links.')
                  next_link = False  # No more pages to fetch
    
          else:
              # If there's an error, print the status code and error message
              print(f"Error: {response.status_code}")
              print(response.text)
              break
    
      # Combine all fetched DataFrames into a single DataFrame and remove duplicate entries by 'id'
      orders_df = pd.concat(dataframes, ignore_index=True)
      orders_df.drop_duplicates(subset=['id'], inplace=True)
    
      # Return the final orders DataFrame as output
      return {'output_orders_df': orders_df}`
    },
    {
      title: "Fetching data from GCS",
      tooltip: "Fetching data from GCS",
      code: `# Fetch data from GCS
  def transform(entities, context):
      import os
      import pandas as pd
      from utils.notebookhelpers.gcs import GCSHelper
      from utils.notebookhelpers.helpers import Helpers
  
      # Retrieve the secret key by accessing the workspace's settings (in this case, the 'RC_DATA_INTERNAL_KEY' secret).
      # This key is required to authenticate with GCS (Google Cloud Storage).
      key = Helpers.get_secret(context, 'RC_DATA_INTERNAL_KEY')
  
      # Write the retrieved secret key to a local file, which will be used for GCS authentication.
      # This key file is necessary for accessing GCS resources.
      key_file = os.path.join(Helpers.getChildDir(context), "gcs.key")
      with open(key_file, "w") as f:
          f.write(key)
  
      # Initialize GCSHelper, a utility that helps interact with Google Cloud Storage.
      # We pass the key file and the name of the GCS bucket ("rapidcanvas-training-data") to the helper.
      gcs_helper = GCSHelper(key_file, "rapidcanvas-training-data")
  
      # Specify the name of the CSV file you want to download from the GCS bucket.
      file_name = "raw_txn_lite.csv"
  
      # Download the specified CSV file from the GCS bucket to the local directory.
      # 'Helpers.getChildDir(context)' provides the path where the file will be saved locally.
      gcs_helper.download_file(file_name, Helpers.getChildDir(context))
  
      # Construct the full local file path where the CSV file has been saved after the download.
      # We concatenate the directory path with the file name to create the full file path.
      file_path = Helpers.getChildDir(context) + 'raw_txn_lite.csv'
  
      # Use pandas to read the CSV file into a DataFrame, which allows easy data manipulation and analysis.
      df = pd.read_csv(file_path)
  
      # Return the DataFrame as part of a dictionary, making it accessible outside the function.
      return {"df": df}
  `
    },
    {
      title: "Fetching data from Slack API",
      tooltip: "Fetching data from Slack API",
      code: `# Example integration with Slack API
  
  def transform(entities, context):
      # Import necessary libraries
      import requests  # For making API requests
      import pandas as pd  # For data manipulation with DataFrames
      import time  # For working with timestamps
      from datetime import datetime, timedelta  # For date calculations
      from utils.notebookhelpers.helpers import Helpers  # Helper utilities
  
      # Get the timestamp from 30 days ago, used to fetch messages within this time frame
      oldest_timestamp = get_30_days_ago_timestamp()
  
      # Fetch the Slack API token from secure storage
      token = Helpers.get_secret(context, "slack_token")
      print(token)  # Debugging: print the token (be careful in production with printing sensitive info)
  
      # List of Slack channel names to retrieve messages from
      channel_names = ["channel1", "channel2"]
  
      # Fetch the available channels from Slack using the token
      channels = get_channels(token)  
  
      # Initialize empty lists to hold column names and the data rows
      cols = []  # Column headers for the DataFrame
      data = []  # Rows of data to be collected
  
      # Check if channels were successfully fetched
      if channels:
          # Iterate through each channel name specified in 'channel_names'
          for channel_name in channel_names:
              # Get the Slack channel ID corresponding to the channel name
              channel_id = get_channel_id(channels, channel_name)
  
              if channel_id:
                  print("found channel id", channel_id, oldest_timestamp)  # Debugging: print channel ID and timestamp
  
                  # Fetch messages from the Slack channel using the channel ID and token
                  messages = fetch_slack_messages(channel_id, token, oldest_timestamp)
                  print(messages)  # Debugging: print the retrieved messages
  
                  # Process each message retrieved from Slack
                  for message in messages:
                      # Process the message and store it as a row in the data list
                      row = process_message(message, channel_name, cols)
                      data.append(row)
  
      # Convert the collected data into a DataFrame for easier manipulation and analysis
      df = pd.DataFrame(data, columns=cols)
  
      # Return the DataFrame as part of the result dictionary
      return {'messages': df}
  
      # Helper function to fetch the list of channels from Slack
      def get_channels(token):
          # Set up the headers for the API request, including the Bearer token for authorization
          headers = {"Authorization": f"Bearer {token}"}
          # Define the parameters to fetch both public and private channels, limited to 1000 results
          params = {"limit": 1000, "types": "public_channel,private_channel"}
          # Make the API request to Slack to get the list of channels
          response = requests.get(SLACK_CHANNEL_LIST_URL, headers=headers, params=params)
          if response.status_code == 200:
              # Return the list of channels if the request was successful
              return response.json().get("channels", [])
          else:
              # Print an error message if the request failed
              print(f"Error fetching channels: {response.status_code}")
              return None
  
      # Helper function to get the ID of a specific channel by its name
      def get_channel_id(channels, channel_name):
          # Iterate through all channels and find the one that matches the provided name
          for channel in channels:
              if channel["name"] == channel_name:
                  return channel["id"]
          return None  # Return None if the channel name was not found
  
      # Helper function to fetch messages from a specific Slack channel
      def fetch_slack_messages(channel_id, token, oldest_timestamp):
          # Set up the headers for the API request, including the Bearer token for authorization
          headers = {"Authorization": f"Bearer {token}"}
          # Define the parameters, including the channel ID and oldest timestamp, limited to 1000 results
          params = {"channel": channel_id, "oldest": oldest_timestamp, "limit": 1000}
          messages = []  # Initialize an empty list to collect messages
  
          # Loop to fetch messages and handle pagination if more messages exist
          while True:
              # Make the API request to Slack to fetch the messages
              response = requests.get(SLACK_API_URL, headers=headers, params=params)
              if response.status_code == 200:
                  # Add the messages to the list
                  json_response = response.json()
                  messages += json_response.get("messages", [])
                  # Check if there is a next cursor for pagination
                  next_cursor = json_response.get("response_metadata", {}).get("next_cursor", "")
                  if not next_cursor:
                      break  # Exit the loop if there are no more pages of messages
                  else:
                      # Set the cursor for the next request if there are more messages to fetch
                      params["cursor"] = next_cursor
              else:
                  # Print an error message if the request failed
                  print(f"Error fetching messages: {response.status_code}")
                  break
          return messages  # Return the list of messages
  
      # Helper function to get the timestamp for 30 days ago
      def get_30_days_ago_timestamp():
          # Get the current time in seconds since the epoch
          current_time = time.time()
          # Calculate 30 days in seconds
          thirty_days_in_seconds = 30 * 24 * 60 * 60
          # Return the timestamp from 30 days ago
          return int(current_time - thirty_days_in_seconds)
  
      # Helper function to process a Slack message and format it for the DataFrame
      def process_message(message, channel_name, cols):
          row = {}  # Initialize an empty dictionary to hold the message data
          # Get the message timestamp and text content
          ts = message.get("ts", "")
          text = message.get("text", "")
          attachment_text = ""  # Initialize an empty string for any attachment text
          # Collect all attachment text from the message (if any)
          for attachment in message.get("attachments", []):
              attachment_text += attachment.get("text", "")
          # Add each piece of data to the row and update the column list
          add_data("ts", ts, cols, row)
          add_data("text", text, cols, row)
          add_data("attach_text", attachment_text, cols, row)
          add_data("channel_name", channel_name, cols, row)
          return row  # Return the processed row
  
      # Helper function to add data to the row and ensure the column is included
      def add_data(key, val, cols, row):
          # Add the key to the column list if it's not already there
          if key not in cols:
              cols.append(key)
          # Add the value to the row dictionary under the given key
          row[key] = val`
    }
  ],
  "Working with Artifacts": [
    {
      title: "Fetching data from an artifact",
      tooltip: "Fetching data from an artifact",
      code: `# Reading data from excel stored in an artifact
    
    def transform(entities, context):
        # Import necessary modules
        import requests  # Used for making HTTP requests, if needed
        import pandas as pd  # Pandas library for data manipulation
        from utils.notebookhelpers.helpers import Helpers  # Import Helpers class to use utility functions
    
        # Try to download the Excel file using the Helpers.downloadArtifacts function
        try:
            # Downloads the 'transaction.xlsx' file and stores it in excel_file
            excel_file = Helpers.downloadArtifacts(context, 'Name of the artifact')['transaction.xlsx']
        except KeyError:
            # Raise an error if 'transaction.xlsx' is not found in the downloaded artifact
            raise ValueError("File 'transaction.xlsx' not found in the artifact.")
        except Exception as e:
            # Handle any other error that occurs during the download process
            raise RuntimeError(f"Error downloading the file: {e}")
        
        # Try to read the Excel file and retrieve all sheets using pandas
        try:
            # Read the Excel file and load all sheets into a dictionary where
            # the keys are sheet names and values are DataFrames containing the data
            excel_data = pd.read_excel(excel_file, sheet_name=None)
        except Exception as e:
            # Handle any error that occurs while reading the Excel file
            raise RuntimeError(f"Error reading the Excel file: {e}")
        
        # Initialize an empty dictionary to store DataFrames for each sheet
        dataframes = {}
    
        # Loop through the sheets in the Excel file
        for sheet_name, data in excel_data.items():
            # Check if the current sheet has data or is empty
            if not data.empty:
                # If the sheet is not empty, add it to the dataframes dictionary
                dataframes[sheet_name] = data
            else:
                # Print a warning message if the sheet is empty
                print(f"Warning: Sheet '{sheet_name}' is empty.")
        
        # Return the dictionary containing DataFrames for each sheet in the Excel file
        return dataframes`
    },
    {
      title: "Create and Save Data to Artifacts",
      tooltip: "Create and Save Data to Artifacts",
      code: `# Create and Save Data to Artifacts
  def transform(entities, context):
      # Import necessary libraries
      import requests  # For making HTTP requests (not used in this code but imported for future use)
      import pandas as pd  # For data manipulation and creation of DataFrames
      from utils.dtos.templateOutput import ArtifactOutput  # Used to structure the output for artifacts
      from utils.notebookhelpers.helpers import Helpers  # Helper utilities for managing artifact directories
  
      # Define the data that will be used to create the DataFrame
      data = {
          'Col1': ['A', 'B', 'C'],  # First column with values 'A', 'B', 'C'
          'Col2': [1, 2, 3]  # Second column with numeric values 1, 2, and 3
      }
  
      # Create a pandas DataFrame using the defined data
      df = pd.DataFrame(data)
  
      # Get or create an artifacts directory using a unique ID ('test-artifact') and store files in it
      artifactsDir = Helpers.getOrCreateArtifactsDir(context, artifactsId="test-artifact")
  
      # Save the DataFrame as a CSV file in the artifacts directory
      df.to_csv(artifactsDir + '/test.csv')  # Save as 'test.csv'
  
      # Save the first 10 rows of the DataFrame as a Pickle file in the artifacts directory
      df.head(10).to_csv(artifactsDir + '/test.pkl')  # Save as 'test.pkl'
  
      # Prepare the output dictionary that references the stored artifacts
      output_dict = {
          "test-artifact": ArtifactOutput()  # Create an artifact output for 'test-artifact'
      }
  
      # Return the dictionary with references to the artifacts created
      return output_dict
  `
    }
  ],
  "ML Models": [
    {
      title: "Build a Machine Learning model",
      tooltip: "Build a Machine Learning model",
      code: `# Build an Machine Learning model 
  def transform(entities, context):
      # Read the input dataset for processing
      input_df_1 = entities['CPP data']  # 'CPP data' is the input dataset
      
      # Import necessary libraries
      import pandas as pd  # For data manipulation
      import pickle  # For saving and loading model and encoders
      import os  # For file path operations
      import plotly.express as px  # For data visualization
      import numpy as np  # For numerical operations
      import plotly.io as pio  # For controlling plotly display options
      pio.templates.default = 'none'  # Disable plotly templates
      
      # Import machine learning libraries
      from sklearn.model_selection import train_test_split as data_split  # For splitting data
      from sklearn.preprocessing import OneHotEncoder  # For encoding categorical variables
      from sklearn.ensemble import RandomForestRegressor  # For training the model
      from sklearn.metrics import mean_squared_error, r2_score  # For evaluation metrics
      
      # Import platform helper functions
      from utils.notebookhelpers.helpers import Helpers
      from utils.dtos.templateOutput import ModelOutput
      from utils.dtos.rc_ml_model import RCMLModel
      
      # Define which columns are numeric, categorical, dropped, and the target column
      features_data = {}
      features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight', 
                                   'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 
                                   'peakrpm', 'citympg', 'highwaympg']
      features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody', 
                                   'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']
      features_data['dropped_cols'] = ['car_ID']
      features_data['target_col'] = 'price'
  
      # Drop unnecessary columns
      input_df_1 = input_df_1.drop(columns=features_data['dropped_cols'])
  
      # Handle missing numeric values by replacing them with the column mean
      for col in features_data['num_cols']:
          if input_df_1[col].isnull().any():
              mean_val = int(round(input_df_1[col].mean()))
              input_df_1[col].fillna(mean_val, inplace=True)
              features_data[col] = mean_val
  
      # Handle missing categorical values by replacing them with the mode (most frequent value)
      for col in features_data['cat_cols']:
          if input_df_1[col].isnull().any():
              mode_value = input_df_1[col].mode()[0]
              input_df_1[col].fillna(mode_value, inplace=True)
              features_data[col] = mode_value
  
      # One-hot encode the categorical variables and store the encoders for each column
      features_data['ohe_cols'] = []
      for col in features_data['cat_cols']:
          ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64)
          ohe.fit(input_df_1[[col]])
          ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(),
                                      columns=ohe.get_feature_names_out())
          input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col])
          encoderId = col + '_ohe.pkl'
          with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle:
              pickle.dump(ohe, handle)
          features_data['ohe_cols'].append(col)
  
      # Separate the features (X) and target (y)
      X = input_df_1.drop(columns=[features_data['target_col']])
      y = input_df_1[features_data['target_col']]
  
      # Split the data into training and testing sets
      X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42)
  
      # Train a Random Forest Regressor model
      rf = RandomForestRegressor(random_state=42)
      rf.fit(X_train, y_train)
  
      # Save the trained model to the artifacts directory
      artifacts = {}
      model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl')
      with open(model_path, 'wb') as f:
          pickle.dump(rf, f)
      artifacts['model_file'] = model_path
  
      # Save the feature metadata
      features_id = 'features.pkl'
      with open(Helpers.getChildDir(context) + features_id, 'wb') as handle:
          pickle.dump(features_data, handle)
      artifacts[features_id] = Helpers.getChildDir(context) + features_id
  
      # Save each one-hot encoder for future use
      for ohe_col in features_data['ohe_cols']:
          encoderId = ohe_col + '_ohe.pkl'
          artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId)
  
      # Define a custom class for the price prediction model
      class PricePredictionModel(RCMLModel):
          import pickle
  
          # Load the saved model and encoders
          def load(self, artifacts):
              with open(artifacts['model_file'], 'rb') as model_file:
                  self.ml_model = pickle.load(model_file)
              self.encoders = artifacts
  
          # Preprocess the input data to match the training data
          def pre_process(self, df_input):
              features_id = 'features.pkl'
              file_path = self.encoders[features_id]
              features_data = pickle.load(open(file_path, 'rb'))
  
              # Handle dropped, numeric, and categorical columns
              for col in features_data['dropped_cols']:
                  try:
                      df_input = df_input.drop(columns=[col])
                  except KeyError as e:
                      print(f'Error dropping column {col}: {e}')
              
              for col in features_data.get('num_cols', []):
                  try:
                      if col in df_input.columns:
                          df_input[col].fillna(features_data[col], inplace=True)
                  except KeyError:
                      pass
  
              for col in features_data.get('cat_cols', []):
                  try:
                      if col in df_input.columns:
                          mode_value = features_data[col]
                          if pd.api.types.is_categorical_dtype(df_input[col]):
                              if mode_value not in df_input[col].cat.categories:
                                  df_input[col].cat.add_categories([mode_value], inplace=True)
                          df_input[col].fillna(mode_value, inplace=True)
                  except KeyError:
                      pass
  
              # One-hot encode categorical columns during prediction
              for ohe_col in features_data.get('cat_cols', []):
                  try:
                      if ohe_col in df_input.columns:
                          encoderId = ohe_col + '_ohe.pkl'
                          file_path = self.encoders[encoderId]
                          ohe = self.pickle.load(open(file_path, 'rb'))
                          df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(),
                                                      columns=ohe.get_feature_names_out())
                          df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col])
                  except Exception as e:
                      print(f'Error during one-hot encoding for column {ohe_col}: {e}')
                      
              return df_input
  
          # Make predictions on new data
          def predict(self, model_input):
              model_input = self.pre_process(model_input)
              predictions = self.ml_model.predict(model_input)
              return pd.DataFrame(predictions)
  
      # Output the trained model for future predictions
      model = ModelOutput(PricePredictionModel, artifacts=artifacts)
  
      # Make predictions on the test data and generate plots for evaluation
      y_pred = rf.predict(X_test)
  
      # Plot Actual vs Predicted Prices
      fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'}, 
                         title='Actual vs Predicted Prices')
      fig_1.update_layout(autosize=False, height=450, width=950)
      fig_1.show()
  
      # Plot Residuals vs Predicted Prices
      residuals = y_test - y_pred
      fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'}, 
                         title='Residuals vs Predicted Prices')
      fig_2.update_layout(autosize=False, height=450, width=950)
      fig_2.show()
  
      # Plot Histogram of Residuals
      fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'}, 
                           title='Distribution of Residuals')
      fig_3.update_layout(autosize=False, height=450, width=950)
      fig_3.show()
  
      # Plot Feature Importance
      feature_importance = rf.feature_importances_
      features = X.columns
      fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'}, 
                     title='Feature Importance')
      fig_4.update_layout(autosize=False, height=450, width=950)
      fig_4.show()
  
      # Return the model and plots as outputs
      return {
          'auto_generated_BMSK': fig_1,
          'auto_generated_N4dF': fig_2,
          'auto_generated_Dbu3': fig_3,
          'auto_generated_4jCh': fig_4,
          'auto_generated_aJjF': model,
      }`
    },
    {
      title: "Using RC model to predict outcome on new dataset",
      tooltip: "Using RC model to predict outcome on new dataset",
      code: `#Using RC model to predict outcome on new dataset
  
  def transform(entities, context):
      # Extract the input data for making predictions from the 'entities' dictionary
      df_for_predictions = entities['input data for predictions']
  
      # Import necessary libraries and modules
      import requests  # For making HTTP requests (if needed)
      import pandas as pd  # For data manipulation
      from utils.rc.dtos.artifact import Artifact  # For managing artifacts in the RapidCanvas environment
      from utils.notebookhelpers.helpers import Helpers  # Helper utilities provided by the platform
      import os  # For file path operations
  
      # Specify the model name created in RapidCanvas (or pre-trained)
      model_name = 'modle_created_in_rc'
  
      # Load the model using Helpers to retrieve the machine learning model from the RC platform
      imported_model = Helpers.get_rc_ml_model(context, model_name)
  
      # Define the features that will be used for making predictions
      # 'X_test' is the subset of the input DataFrame consisting of the required feature columns
      X_test = df_for_predictions[['feature_1', 'feature_2', 'feature_3', ....]]  # Add all relevant features
  
      # Extract the actual outcomes (if available) for validation or comparison purposes
      # 'y_test' is the column containing the true values for the outcome (if available in your use case)
      y_test = df_for_predictions['outcome_column']
  
      # Make predictions using the imported model. The model's output is assumed to have a format where
      # it provides predicted values (e.g., in a column named 'predict') and optionally probabilities (e.g., '0_prob', '1_prob')
      model_output = imported_model.predict(X_test)
  
      # Extract the 'predict' column from the model's output to get the predicted outcome
      # Assuming the model output is a DataFrame or dict-like object with a 'predict' key/column
      y_pred = pd.DataFrame(model_output)['predict']  # Convert to DataFrame and extract the predictions
  
      # Assign the predicted outcomes (y_pred) to a new column 'predictions' in the original input DataFrame
      df_for_predictions['predictions'] = y_pred
  
      # Return the updated DataFrame (with the 'predictions' column) as part of the result dictionary
      return {
          'data_with_predictions': df_for_predictions,
      }
  `
    }
  ],
  "LLM API Integration": [
    {
      title: "OpenAI Call",
      tooltip: "OpenAI Call",
      code: `def transform(entities, context):
  
    # Import the OpenAI library
    from openai import OpenAI
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
    
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
    # Create a chat completion request with a specific model and messages
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "developer", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"}
      ]
    )
  
    # Print the response message from the completion
    output = completion.choices[0].message.content
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "OpenAI Call with image input",
      tooltip: "OpenAI Call with image input",
      code: `def transform(entities, context):
    # Import the OpenAI library
    from openai import OpenAI
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
    # Create a chat completion request with an image input
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                        }
                    },
                ],
            }
        ],
        max_tokens=300,
    )
  
    # Print the response from the completion
    print(response.choices[0])
  
    # Print the response message from the completion
    output = response.choices[0].message.content
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "OpenAI Call with base 64 image",
      tooltip: "OpenAI Call with base 64 image",
      code: `def transform(entities, context):
    # Import the OpenAI library and base64 module
    from openai import OpenAI
    import base64
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.getenv("OPEN_AI_KEY"))
  
    # Define a function to convert an image file to a base64 string
    def image_file_to_base64(filepath):
        with open(filepath, "rb") as image_file:
            # Read file as binary
            img_bytes = image_file.read()
            # Encode to base64
            base64_encoded = base64.b64encode(img_bytes).decode("utf-8")
        return base64_encoded
  
    # Convert the image file to base64
    base64_image = image_file_to_base64(image_path)
  
    # Create the content payload with the base64 image
    content = [{
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
        }
    }]
  
    # Create the payload for the chat completion request
    payload = {
        "model": "gpt-4o",
        "messages": [
            {"role": "system", "content": "You are an expert data scientist."},
            {"role": "user", "content": "Explain this image to me."},
            {"role": "user", "content": content}
        ],
        "max_tokens": 1000,
    }
  
    # Create a chat completion request with the payload
    response = client.chat.completions.create(**payload)
  
    # Print the response message from the completion
    output = response.choices[0].message.content
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "OpenAI Call with functions",
      tooltip: "OpenAI Call with functions",
      code: `def transform(entities, context):
    # Import the OpenAI library
    from openai import OpenAI
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
    # Define the tools (functions) to be used in the chat completion
    tools = [
      {
        "type": "function",
        "function": {
          "name": "get_current_weather",
          "description": "Get the current weather in a given location",
          "parameters": {
            "type": "object",
            "properties": {
              "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA",
              },
              "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location"],
          },
        }
      }
    ]
  
    # Define the messages for the chat completion
    messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
  
    # Create a chat completion request with the tools and messages
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=messages,
      tools=tools,
      tool_choice="auto"
    )
  
    # Print the response message from the completion
    output = completion.choices[0].message.content
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "OpenAI O1 Model Call",
      tooltip: "OpenAI O1 Model Call",
      code: `def transform(entities, context):
    # Import the OpenAI library
    from openai import OpenAI
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
    # Create a chat completion request with the O1 model and messages
    completion = client.chat.completions.create(
      model="o1",
      messages=[
        {"role": "developer", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"}
      ]
    )
    # Print the response message from the completion
    print(completion.choices[0].message.content)
  
    # Print the response message from the completion
    output = completion.choices[0].message.content
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "OpenAI with fixed response schema",
      tooltip: "OpenAI with fixed response schema",
      code: `def transform(entities, context):
    # Import the OpenAI library
    from openai import OpenAI
    import os
    import pandas as pd
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Define the response format schema
    response_format={
        "type": "json_schema",
        "json_schema": {
          "name": "questions_schema",
          "strict": True,
          "schema": {
            "type": "object",
            "properties": {
              "question1": {
                "type": "string",
                "description": "First question to be filled by the model."
              },
              "question2": {
                "type": "string",
                "description": "Second question to be filled by the model."
              },
              "question3": {
                "type": "string",
                "description": "Third question to be filled by the model."
              },
              "question4": {
                "type": "string",
                "description": "Fourth question to be filled by the model."
              },
              "question5": {
                "type": "string",
                "description": "Fifth question to be filled by the model."
              }
            },
            "required": [
              "question1",
              "question2",
              "question3",
              "question4",
              "question5"
            ],
            "additionalProperties": False,
            "$defs": {}
          }
        }
      }
  
    # Initialize the OpenAI client with the API key from environment variables
    client = OpenAI(api_key=os.getenv("OPEN_AI_KEY"))
  
    # Define the messages for the chat completion
    messages=[
        {"role": "developer", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Hello!"}
      ]
  
    # Create a chat completion request with the response format schema
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        response_format=response_format,        
        temperature=0.1,
        seed=1
    )
  
    # Extract and print the response content
    output = response.choices[0].message.content
    print(output)
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    },
    {
      title: "Anthropic call with PDF input and content extraction",
      tooltip: "Anthropic call with PDF input and content extraction",
      code: `def transform(entities, context):
    # Import necessary libraries
    import pandas as pd  # Pandas library for data manipulation
    from utils.notebookhelpers.helpers import Helpers  # Import Helpers class to use utility functions
    import base64
    from anthropic import Anthropic
    
  
    input_df_1 = entities['titanic']  # This is for reading the input dataset
  
    # Fetch the access token using a helper function
    access_token = Helpers.get_secret(context, "anthropic")
    if not access_token:
    # If the access token could not be retrieved, print an error and stop execution
        print("Failed to retrieve access token.")
        return {}
  
    # While PDF support is in beta, you must pass in the correct beta header
    client = Anthropic(default_headers={
        "anthropic-beta": "pdfs-2024-09-25"
        },
        api_key=access_token,
    )
  
    # For now, only claude-3-5-sonnet-20241022 supports PDFs
    MODEL_NAME = "claude-3-5-sonnet-20241022"
  
    # Make a useful helper function.
    def get_completion(messages):
        response = client.messages.create(
            model=MODEL_NAME,
            max_tokens=8192,
            temperature=0,
            messages=messages
        )
        return response.content[0].text
  
    # Fetch PDF file from artifact
    try:
        pdf_file_from_artifact = Helpers.downloadArtifacts(context, 'Artifact Name')['File Name.pdf'] 
    except KeyError:
        raise ValueError("File 'File Name' not found in the artifact.")
    except Exception as e:
        raise RuntimeError(f"Error downloading the file: {e}")
  
    # Start by reading in the PDF and encoding it as base64.
    with open(pdf_file_from_artifact, "rb") as pdf_file:
        binary_data = pdf_file.read()
        base_64_encoded_data = base64.b64encode(binary_data)
        base64_string = base_64_encoded_data.decode('utf-8')
  
    # Define the messages for the chat completion
    messages = [
        {
            "role": 'user',
            "content": [
                {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": base64_string}},
                {"type": "text", "text": "Extract the data from this file in a structured format"}
            ]
        }
    ]
  
    # Fetch and process the document summary
    output = get_completion(messages)
    print(output)
  
    #convert output to a dataframe
    output_df = pd.DataFrame({"Output": [output]})
  
    return {
          "output_dataset": output_df # on the canvas the name of the new dataset will be output_dataset
      }`
    }
  ]
};

const RAG_REQUIREMENTS = `llama-cloud-services llama-index-core llama-index-readers-file python-dotenv docling llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai llama-index-embeddings-openai llama-index-agent-openai llama-index-readers-docling llama-index-node-parser-docling llama-index pydantic-core numpy==1.23.5`;

export const API_CONNECTOR_RECIPE_SNIPPETS = {
  "Generating Chart and Dataset": [
    {
      title: "Dataset Creation",
      tooltip: "Dataset Creation",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Function to output a new dataset
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # How to output a new dataset
  input_df_1 = Helpers.getEntityData(context, 'titanic')  # This is for reading the input dataset
  
  # Import necessary libraries: pandas for data manipulation and numpy for numerical operations
  import pandas as pd
  import numpy as np
  
  # Drop the 'Age' column from the input dataframe
  output_df_1 = input_df_1.drop(['Age'], axis=1)  # axis=1 means drop a column (instead of a row)
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='outputDataset', data_frame=output_df_1)`
    },
    {
      title: "Chart Creation",
      tooltip: "Chart Creation",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Function to create a chart 
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  input_df_1 = Helpers.getEntityData(context, 'titanic')  # This is for reading the input dataset
  
  # Import necessary libraries for creating the chart
  import plotly.express as px
  import plotly.graph_objects as go  # For more complex figures like Pie charts
  import plotly.io as pio  # To configure default templates for Plotly
  import pandas as pd
  import numpy as np
  
  # Set default plot theme to 'simple_white' for a clean chart style
  pio.templates.default = 'simple_white'
  
  # Count occurrences of each gender ('Sex' column) in the dataset
  gender_counts = input_df_1['Sex'].value_counts()
  
  # Create a Pie chart with gender labels and their respective counts, adding a hole for a donut chart style
  fig_1 = go.Figure(data=[go.Pie(labels=gender_counts.index, values=gender_counts.values, hole=0.3)])
  
  # Update the layout of the Pie chart, including title, font style, size, and color
  fig_1.update_layout(
      title_text='Gender Distribution',  # Title of the chart
      font=dict(family='Roboto, monospace', size=16, color='black'),  # Font customization
      autosize=True  # Enable automatic resizing of the chart
  )
  
  # Display the generated chart
  fig_1.show()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add chart to output
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_1, group=None)`
    }
  ],
  "Connecting to APIs": [
    {
      title: "Fetching Data from an API",
      tooltip: "Fetching Data from an API",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Fetching Data from an API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Importing necessary libraries: requests to make API calls and pandas for data manipulation
  import requests
  import pandas as pd
  
  # Define the API URL from which to fetch user data
  url = "https://jsonplaceholder.typicode.com/users"
  
  # Make a GET request to the API to fetch the user data
  response = requests.get(url)
  
  # Check if the response from the API is successful (status code 200)
  if response.status_code == 200:
      # Convert the API response from JSON format to a DataFrame
      data = response.json()
      users_df = pd.json_normalize(data)
  
      # Inform that the data retrieval was successful
      print("Successfully retrieved users.")
  else:
      # If there's an error, print the error status code and message for troubleshooting
      print(f"Error: {response.status_code}")
      print(response.text)
  
      users_df = pd.DataFrame()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_users_dataset', data_frame=users_df)`
    },
    {
      title: "Posting Data to an API",
      tooltip: "Posting Data to an API",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Posting Data to an API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Importing necessary libraries: pandas for data manipulation and requests to interact with the API
  import pandas as pd
  import requests
  
  # Extract the input dataset from the provided entities
  input_df_1 = Helpers.getEntityData(context, 'Dataset_Name')  # this is for reading input dataset
  
  # Extract the values from the first row of the dataset
  first_row = input_df_1.iloc[0]
  
  # Define the API endpoint for posting data
  url = "https://jsonplaceholder.typicode.com/posts"
  
  # Prepare the data to be posted to the API using specific columns from the first row
  post_data = {
      'title': str(first_row['JobTitle']),      # Using 'JobTitle' as the title of the post
      'body': str(first_row['GivenName']),      # Using 'GivenName' as the body of the post
      'userId': int(first_row['EmployeeNumber'])# Using 'EmployeeNumber' as the userId
  }
  
  # Make a POST request to the API with the prepared data
  response = requests.post(url, json=post_data)
  
  # Check if the POST request was successful (status code 201)
  if response.status_code == 201:
      # Convert the API response to a DataFrame
      new_post = response.json()
  
      # Inform that the post was successfully created
      print("Successfully created a new post.")
  
      new_post_df = pd.json_normalize(new_post)
  else:
      # If there's an error, print the error status code and message for troubleshooting
      print(f"Error: {response.status_code}")
      print(response.text)
  
      new_post_df = pd.DataFrame()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_new_post_dataset', data_frame=new_post_df)`
    },
    {
      title: "Updating Data in an API",
      tooltip: "Updating Data in an API",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Updating Data in an API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Importing necessary libraries: requests to interact with the API and pandas for data manipulation
  import requests
  import pandas as pd
  
  # Extract the input dataset from the provided entities
  input_df_1 = Helpers.getEntityData(context, 'Dataset_Name')  # this is for reading the input dataset
  
  # Extract values from the 16th row (index 15) of the dataset
  first_row = input_df_1.iloc[15]
  
  # Specify the ID of the post to be updated
  post_id = 1  # ID of the post to be updated
  url = f"https://jsonplaceholder.typicode.com/posts/{post_id}"  # API URL for updating the post
  
  # Prepare the updated data using specific columns from the 16th row
  updated_data = {
      'title': str(first_row['JobTitle']),      # Updating the post title using 'JobTitle'
      'body': str(first_row['GivenName']),      # Updating the post body using 'GivenName'
      'userId': int(first_row['EmployeeNumber'])# Updating the userId using 'EmployeeNumber'
  }
  
  # Make a PUT request to update the post with the new data
  response = requests.put(url, json=updated_data)
  
  # Check if the PUT request was successful (status code 200)
  if response.status_code == 200:
      # Convert the API response to a DataFrame
      updated_post = response.json()
  
      # Inform that the post was successfully updated
      print("Successfully updated the post.")
  
      updated_post_df = pd.json_normalize(updated_post)
  else:
      # If there's an error, print the error status code and message for troubleshooting
      print(f"Error: {response.status_code}")
      print(response.text)
  
      # Return an empty dataframe in case of failure
      updated_post_df = pd.DataFrame()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_updated_post_dataset', data_frame=updated_post_df)`
    },
    {
      title: "Fetch data from RapidCanvas API",
      tooltip: "Fetch data from RapidCanvas API",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Fetch data from RapidCanvas API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Import necessary libraries
  import requests  # For making HTTP requests to the API
  import pandas as pd  # For data manipulation with DataFrames
  import json  # For handling JSON data
  from utils.notebookhelpers.helpers import Helpers  # Helper utilities
  
  # Define the service ID and server from which the logs will be fetched
  service_id = '****-****-****-****-************'  # Prediction service ID
  server = 'staging.dev.rapidcanvas.net'  # Server environment
  
  # Fetch the secret token used for API authorization
  token = Helpers.get_secret(context, "token")
  
  # Define the base URLs for fetching logs and expanded log details
  logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/logs"
  expand_logs_url = f"https://{server}/api/v2/prediction-services/{service_id}/expand-logs?runId="
  
  # Set up the headers for the API requests, including the authorization token
  headers = {
      "Authorization": f"Bearer {token}",  # Token-based authorization
      "Content-Type": "application/json"  # Specify JSON format for the requests
  }
  
  # Make a request to fetch the basic log entries
  logs_response = requests.get(logs_url, headers=headers)
  logs_response.raise_for_status()  # Raise an error if the request fails
  logs = logs_response.json()['data']['rows']  # Extract log entries from the JSON response
  
  # Initialize an empty list to store the detailed log data
  data = []
  
  # Iterate over each log entry to fetch expanded log details
  for log in logs:
      log_id = log['cells'][0]  # Assume the log ID is in the first cell of the log entry
      # Fetch detailed information for each log using the log ID
      log_details_response = requests.get(expand_logs_url + log_id, headers=headers)
      log_details_response.raise_for_status()  # Raise an error if the request fails
      log_details = log_details_response.json()  # Parse the detailed log response
  
      # Extract the request and response data from the detailed log
      print(log_details.get('request', '{}'))  # Debugging: print the request data
      request_data = log_details.get('request', '{}')  # Get the request data, defaulting to '{}'
      print(log_details.get('response', '{}'))  # Debugging: print the response data
      response_data = log_details.get('response', '{}')  # Get the response data, defaulting to '{}'
  
      # Append the request and response data to the 'data' list
      data.append({'req': request_data, 'res': response_data})
  
  # Convert the collected data into a pandas DataFrame for further use
  df = pd.DataFrame(data)
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_users_dataset', data_frame=df)`
    }
  ],
  "Example 3rd Party Connections": [
    {
      title: "Fetching from an API with Access Token",
      tooltip: "Fetching from an API with Access Token",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Fetching from an API with Access Token
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Import necessary libraries: requests for API interaction, pandas for data manipulation, and Helpers for token retrieval
  import requests
  import pandas as pd
  from utils.notebookhelpers.helpers import Helpers
  
  # Fetch the access token using a helper function
  access_token = Helpers.get_secret(context, "Example_Secret_Key")
  if not access_token:
      # If the access token could not be retrieved, print an error and stop execution
      print("Failed to retrieve access token.")
      raise Exception("Failed to retrieve access token.")
  
  # Define the API URL to fetch orders with specific parameters
  url_orders = "https://www.domain.shop/admin/api/2024-01/orders.json?created_at_max=2024-12-12T23:59:59-00:00&status=any"
  url = url_orders
  
  # Set up the request headers with the access token for authorization
  headers = {
      'X-Shopify-Access-Token': access_token
  }
  
  # Initialize variables to handle pagination and store the fetched data
  dataframes = []  # To store data from all pages
  i = 0  # Page counter
  next_link = True  # Indicator to check if more pages are available
  
  # Loop through paginated API results
  while True:
      if not next_link:  # Stop if there are no more next links
          break
  
      # For the first request, use the initial URL; for subsequent requests, use the 'next' link
      if i == 0:
          current_url = url
      else:
          current_url = next_url
  
      # Make the GET request to fetch orders
      response = requests.get(current_url, headers=headers)
  
      # Check if the request was successful (status code 200)
      if response.status_code == 200:
          data = response.json()
  
          # If there are no more orders, stop the loop
          if not data.get('orders'):
              break
  
          # Convert the orders data from JSON to a DataFrame
          df = pd.json_normalize(data['orders'])
          dataframes.append(df)  # Append the DataFrame to the list
          i += 1  # Increment page counter
          print('Page ' + str(i))
  
          # Check if there's a 'next' link for further pages
          if 'next' in response.links:
              next_link = response.links['next']
              next_url = next_link['url']
          else:
              print('No more next links.')
              next_link = False  # No more pages to fetch
  
      else:
          # If there's an error, print the status code and error message
          print(f"Error: {response.status_code}")
          print(response.text)
          break
  
  # Combine all fetched DataFrames into a single DataFrame and remove duplicate entries by 'id'
  orders_df = pd.concat(dataframes, ignore_index=True)
  orders_df.drop_duplicates(subset=['id'], inplace=True)
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_orders_dataset', data_frame=orders_df)`
    },
    {
      title: "Fetching data from GCS",
      tooltip: "Fetching data from GCS",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Fetch data from GCS
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  import os
  import pandas as pd
  from utils.notebookhelpers.gcs import GCSHelper
  from utils.notebookhelpers.helpers import Helpers
  
  # Retrieve the secret key by accessing the workspace's settings (in this case, the 'RC_DATA_INTERNAL_KEY' secret).
  # This key is required to authenticate with GCS (Google Cloud Storage).
  key = Helpers.get_secret(context, 'RC_DATA_INTERNAL_KEY')
  
  # Write the retrieved secret key to a local file, which will be used for GCS authentication.
  # This key file is necessary for accessing GCS resources.
  key_file = os.path.join(Helpers.getChildDir(context), "gcs.key")
  with open(key_file, "w") as f:
      f.write(key)
  
  # Initialize GCSHelper, a utility that helps interact with Google Cloud Storage.
  # We pass the key file and the name of the GCS bucket ("rapidcanvas-training-data") to the helper.
  gcs_helper = GCSHelper(key_file, "rapidcanvas-training-data")
  
  # Specify the name of the CSV file you want to download from the GCS bucket.
  file_name = "raw_txn_lite.csv"
  
  # Download the specified CSV file from the GCS bucket to the local directory.
  # 'Helpers.getChildDir(context)' provides the path where the file will be saved locally.
  gcs_helper.download_file(file_name, Helpers.getChildDir(context))
  
  # Construct the full local file path where the CSV file has been saved after the download.
  # We concatenate the directory path with the file name to create the full file path.
  file_path = Helpers.getChildDir(context) + 'raw_txn_lite.csv'
  
  # Use pandas to read the CSV file into a DataFrame, which allows easy data manipulation and analysis.
  df = pd.read_csv(file_path)
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=df)`
    },
    {
      title: "Fetching data from Slack API",
      tooltip: "Fetching data from Slack API",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Example integration with Slack API
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Helper function to fetch the list of channels from Slack
  def get_channels(token):
      # Set up the headers for the API request, including the Bearer token for authorization
      headers = {"Authorization": f"Bearer {token}"}
      # Define the parameters to fetch both public and private channels, limited to 1000 results
      params = {"limit": 1000, "types": "public_channel,private_channel"}
      # Make the API request to Slack to get the list of channels
      response = requests.get(SLACK_CHANNEL_LIST_URL, headers=headers, params=params)
      if response.status_code == 200:
          # Return the list of channels if the request was successful
          return response.json().get("channels", [])
      else:
          # Print an error message if the request failed
          print(f"Error fetching channels: {response.status_code}")
          return None
  
  # Helper function to get the ID of a specific channel by its name
  def get_channel_id(channels, channel_name):
      # Iterate through all channels and find the one that matches the provided name
      for channel in channels:
          if channel["name"] == channel_name:
              return channel["id"]
      return None  # Return None if the channel name was not found
  
  # Helper function to fetch messages from a specific Slack channel
  def fetch_slack_messages(channel_id, token, oldest_timestamp):
      # Set up the headers for the API request, including the Bearer token for authorization
      headers = {"Authorization": f"Bearer {token}"}
      # Define the parameters, including the channel ID and oldest timestamp, limited to 1000 results
      params = {"channel": channel_id, "oldest": oldest_timestamp, "limit": 1000}
      messages = []  # Initialize an empty list to collect messages
  
      # Loop to fetch messages and handle pagination if more messages exist
      while True:
          # Make the API request to Slack to fetch the messages
          response = requests.get(SLACK_API_URL, headers=headers, params=params)
          if response.status_code == 200:
              # Add the messages to the list
              json_response = response.json()
              messages += json_response.get("messages", [])
              # Check if there is a next cursor for pagination
              next_cursor = json_response.get("response_metadata", {}).get("next_cursor", "")
              if not next_cursor:
                  break  # Exit the loop if there are no more pages of messages
              else:
                  # Set the cursor for the next request if there are more messages to fetch
                  params["cursor"] = next_cursor
          else:
              # Print an error message if the request failed
              print(f"Error fetching messages: {response.status_code}")
              break
      return messages  # Return the list of messages
  
  # Helper function to get the timestamp for 30 days ago
  def get_30_days_ago_timestamp():
      # Get the current time in seconds since the epoch
      current_time = time.time()
      # Calculate 30 days in seconds
      thirty_days_in_seconds = 30 * 24 * 60 * 60
      # Return the timestamp from 30 days ago
      return int(current_time - thirty_days_in_seconds)
  
  # Helper function to process a Slack message and format it for the DataFrame
  def process_message(message, channel_name, cols):
      row = {}  # Initialize an empty dictionary to hold the message data
      # Get the message timestamp and text content
      ts = message.get("ts", "")
      text = message.get("text", "")
      attachment_text = ""  # Initialize an empty string for any attachment text
      # Collect all attachment text from the message (if any)
      for attachment in message.get("attachments", []):
          attachment_text += attachment.get("text", "")
      # Add each piece of data to the row and update the column list
      add_data("ts", ts, cols, row)
      add_data("text", text, cols, row)
      add_data("attach_text", attachment_text, cols, row)
      add_data("channel_name", channel_name, cols, row)
      return row  # Return the processed row
  
  # Helper function to add data to the row and ensure the column is included
  def add_data(key, val, cols, row):
      # Add the key to the column list if it's not already there
      if key not in cols:
          cols.append(key)
      # Add the value to the row dictionary under the given key
      row[key] = val
  
  # Import necessary libraries
  import requests  # For making API requests
  import pandas as pd  # For data manipulation with DataFrames
  import time  # For working with timestamps
  from datetime import datetime, timedelta  # For date calculations
  from utils.notebookhelpers.helpers import Helpers  # Helper utilities
  
  # Get the timestamp from 30 days ago, used to fetch messages within this time frame
  oldest_timestamp = get_30_days_ago_timestamp()
  
  # Fetch the Slack API token from secure storage
  token = Helpers.get_secret(context, "slack_token")
  print(token)  # Debugging: print the token (be careful in production with printing sensitive info)
  
  # List of Slack channel names to retrieve messages from
  channel_names = ["channel1", "channel2"]
  
  # Fetch the available channels from Slack using the token
  channels = get_channels(token)
  
  # Initialize empty lists to hold column names and the data rows
  cols = []  # Column headers for the DataFrame
  data = []  # Rows of data to be collected
  
  # Check if channels were successfully fetched
  if channels:
      # Iterate through each channel name specified in 'channel_names'
      for channel_name in channel_names:
          # Get the Slack channel ID corresponding to the channel name
          channel_id = get_channel_id(channels, channel_name)
  
          if channel_id:
              print("found channel id", channel_id, oldest_timestamp)  # Debugging: print channel ID and timestamp
  
              # Fetch messages from the Slack channel using the channel ID and token
              messages = fetch_slack_messages(channel_id, token, oldest_timestamp)
              print(messages)  # Debugging: print the retrieved messages
  
              # Process each message retrieved from Slack
              for message in messages:
                  # Process the message and store it as a row in the data list
                  row = process_message(message, channel_name, cols)
                  data.append(row)
  
  # Convert the collected data into a DataFrame for easier manipulation and analysis
  df = pd.DataFrame(data, columns=cols)
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='messages', data_frame=df)`
    }
  ],
  "Working with Artifacts": [
    {
      title: "Fetching data from an artifact",
      tooltip: "Fetching data from an artifact",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Reading data from csv stored in an artifact
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Import necessary modules
  import requests  # Used for making HTTP requests, if needed
  import pandas as pd  # Pandas library for data manipulation
  
  # Try to download the CSV file using the Helpers.downloadArtifacts function
  try:
    # Downloads the 'transaction.xlsx' file and stores it in csv_file
    csv_file = Helpers.downloadArtifacts(context, 'Name of the artifact')['transaction.csv']
  except KeyError:
    # Raise an error if 'transaction.xlsx' is not found in the downloaded artifact
    raise ValueError("File 'transaction.csv' not found in the artifact.")
  except Exception as e:
    # Handle any other error that occurs during the download process
    raise RuntimeError(f"Error downloading the file: {e}")
  
  # Try to read the CSV file and retrieve all sheets using pandas
  try:
    dataframe = pd.read_csv(csv_file)
  except Exception as e:
    # Handle any error that occurs while reading the CSV file
    raise RuntimeError(f"Error reading the CSV file: {e}")
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='output', data_frame=dataframe)`
    },
    {
      title: "Create and Save Data to Artifacts",
      tooltip: "Create and Save Data to Artifacts",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Create and Save Data to Artifacts
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # Import necessary libraries
  import requests  # For making HTTP requests (not used in this code but imported for future use)
  import pandas as pd  # For data manipulation and creation of DataFrames
  from utils.dtos.templateOutput import ArtifactOutput  # Used to structure the output for artifacts
  from utils.notebookhelpers.helpers import Helpers  # Helper utilities for managing artifact directories
  
  # Define the data that will be used to create the DataFrame
  data = {
      'Col1': ['A', 'B', 'C'],  # First column with values 'A', 'B', 'C'
      'Col2': [1, 2, 3]  # Second column with numeric values 1, 2, and 3
  }
  
  # Create a pandas DataFrame using the defined data
  df = pd.DataFrame(data)
  
  # Get or create an artifacts directory using a unique ID ('test-artifact') and store files in it
  artifactsDir = Helpers.getOrCreateArtifactsDir(context, artifactsId="test-artifact")
  
  # Save the DataFrame as a CSV file in the artifacts directory
  df.to_csv(artifactsDir + '/test.csv')  # Save as 'test.csv'
  
  # Save the first 10 rows of the DataFrame as a Pickle file in the artifacts directory
  df.head(10).to_csv(artifactsDir + '/test.pkl')  # Save as 'test.pkl'
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add artifact to output
  Helpers.save_output_artifacts(context=context, artifact_name='test-artifact')`
    }
  ],
  "ML Models": [
    {
      title: "Build a Machine Learning model",
      tooltip: "Build a Machine Learning model",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Build an Machine Learning model 
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Define a custom class for the price prediction model
  class PricePredictionModel(RCMLModel):
      import pickle
  
      # Load the saved model and encoders
      def load(self, artifacts):
          with open(artifacts['model_file'], 'rb') as model_file:
              self.ml_model = pickle.load(model_file)
          self.encoders = artifacts
  
      # Preprocess the input data to match the training data
      def pre_process(self, df_input):
          features_id = 'features.pkl'
          file_path = self.encoders[features_id]
          features_data = pickle.load(open(file_path, 'rb'))
  
          # Handle dropped, numeric, and categorical columns
          for col in features_data['dropped_cols']:
              try:
                  df_input = df_input.drop(columns=[col])
              except KeyError as e:
                  print(f'Error dropping column {col}: {e}')
  
          for col in features_data.get('num_cols', []):
              try:
                  if col in df_input.columns:
                      df_input[col].fillna(features_data[col], inplace=True)
              except KeyError:
                  pass
  
          for col in features_data.get('cat_cols', []):
              try:
                  if col in df_input.columns:
                      mode_value = features_data[col]
                      if pd.api.types.is_categorical_dtype(df_input[col]):
                          if mode_value not in df_input[col].cat.categories:
                              df_input[col].cat.add_categories([mode_value], inplace=True)
                      df_input[col].fillna(mode_value, inplace=True)
              except KeyError:
                  pass
  
          # One-hot encode categorical columns during prediction
          for ohe_col in features_data.get('cat_cols', []):
              try:
                  if ohe_col in df_input.columns:
                      encoderId = ohe_col + '_ohe.pkl'
                      file_path = self.encoders[encoderId]
                      ohe = self.pickle.load(open(file_path, 'rb'))
                      df_input_ohe = pd.DataFrame(ohe.transform(df_input[[ohe_col]]).toarray(),
                                                  columns=ohe.get_feature_names_out())
                      df_input = pd.concat([df_input, df_input_ohe], axis=1).drop(columns=[ohe_col])
              except Exception as e:
                  print(f'Error during one-hot encoding for column {ohe_col}: {e}')
  
          return df_input
  
      # Make predictions on new data
      def predict(self, model_input):
          model_input = self.pre_process(model_input)
          predictions = self.ml_model.predict(model_input)
          return pd.DataFrame(predictions)
  
  # Read the input dataset for processing
  input_df_1 = Helpers.getEntityData(context, 'Car_Price_Prediction_data')  # 'CPP data' is the input dataset
  
  # Import necessary libraries
  import pandas as pd  # For data manipulation
  import pickle  # For saving and loading model and encoders
  import os  # For file path operations
  import plotly.express as px  # For data visualization
  import numpy as np  # For numerical operations
  import plotly.io as pio  # For controlling plotly display options
  pio.templates.default = 'none'  # Disable plotly templates
  
  # Import machine learning libraries
  from sklearn.model_selection import train_test_split as data_split  # For splitting data
  from sklearn.preprocessing import OneHotEncoder  # For encoding categorical variables
  from sklearn.ensemble import RandomForestRegressor  # For training the model
  from sklearn.metrics import mean_squared_error, r2_score  # For evaluation metrics
  
  # Import platform helper functions
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutput import ModelOutput
  from utils.dtos.rc_ml_model import RCMLModel
  
  # Define which columns are numeric, categorical, dropped, and the target column
  features_data = {}
  features_data['num_cols'] = ['wheelbase', 'carlength', 'carwidth', 'carheight', 'curbweight',
                               'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower',
                               'peakrpm', 'citympg', 'highwaympg']
  features_data['cat_cols'] = ['CarName', 'fueltype', 'aspiration', 'doornumber', 'carbody',
                               'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem']
  features_data['dropped_cols'] = ['car_ID']
  features_data['target_col'] = 'price'
  
  # Drop unnecessary columns
  input_df_1 = input_df_1.drop(columns=features_data['dropped_cols'])
  
  # Handle missing numeric values by replacing them with the column mean
  for col in features_data['num_cols']:
      if input_df_1[col].isnull().any():
          mean_val = int(round(input_df_1[col].mean()))
          input_df_1[col].fillna(mean_val, inplace=True)
          features_data[col] = mean_val
  
  # Handle missing categorical values by replacing them with the mode (most frequent value)
  for col in features_data['cat_cols']:
      if input_df_1[col].isnull().any():
          mode_value = input_df_1[col].mode()[0]
          input_df_1[col].fillna(mode_value, inplace=True)
          features_data[col] = mode_value
  
  # One-hot encode the categorical variables and store the encoders for each column
  features_data['ohe_cols'] = []
  for col in features_data['cat_cols']:
      ohe = OneHotEncoder(handle_unknown='ignore', dtype=np.int64)
      ohe.fit(input_df_1[[col]])
      ohe_features = pd.DataFrame(ohe.transform(input_df_1[[col]]).toarray(),
                                  columns=ohe.get_feature_names_out())
      input_df_1 = pd.concat([input_df_1, ohe_features], axis=1).drop(columns=[col])
      encoderId = col + '_ohe.pkl'
      with open(Helpers.getChildDir(context) + encoderId, 'wb') as handle:
          pickle.dump(ohe, handle)
      features_data['ohe_cols'].append(col)
  
  # Separate the features (X) and target (y)
  X = input_df_1.drop(columns=[features_data['target_col']])
  y = input_df_1[features_data['target_col']]
  
  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = data_split(X, y, test_size=0.2, random_state=42)
  
  # Train a Random Forest Regressor model
  rf = RandomForestRegressor(random_state=42)
  rf.fit(X_train, y_train)
  
  # Save the trained model to the artifacts directory
  artifacts = {}
  model_path = os.path.join(Helpers.getChildDir(context), 'model_price_prediction.pkl')
  with open(model_path, 'wb') as f:
      pickle.dump(rf, f)
  artifacts['model_file'] = model_path
  
  # Save the feature metadata
  features_id = 'features.pkl'
  with open(Helpers.getChildDir(context) + features_id, 'wb') as handle:
      pickle.dump(features_data, handle)
  artifacts[features_id] = Helpers.getChildDir(context) + features_id
  
  # Save each one-hot encoder for future use
  for ohe_col in features_data['ohe_cols']:
      encoderId = ohe_col + '_ohe.pkl'
      artifacts[encoderId] = os.path.join(Helpers.getChildDir(context), encoderId)
  
  # Output the trained model for future predictions
  model = ModelOutput(PricePredictionModel, artifacts=artifacts)
  
  # Make predictions on the test data and generate plots for evaluation
  y_pred = rf.predict(X_test)
  
  # Plot Actual vs Predicted Prices
  fig_1 = px.scatter(x=y_test, y=y_pred, labels={'x': 'Actual Price', 'y': 'Predicted Price'},
                     title='Actual vs Predicted Prices')
  fig_1.update_layout(autosize=False, height=450, width=950)
  fig_1.show()
  
  # Plot Residuals vs Predicted Prices
  residuals = y_test - y_pred
  fig_2 = px.scatter(x=y_pred, y=residuals, labels={'x': 'Predicted Price', 'y': 'Residuals'},
                     title='Residuals vs Predicted Prices')
  fig_2.update_layout(autosize=False, height=450, width=950)
  fig_2.show()
  
  # Plot Histogram of Residuals
  fig_3 = px.histogram(residuals, nbins=30, labels={'value': 'Residuals'},
                       title='Distribution of Residuals')
  fig_3.update_layout(autosize=False, height=450, width=950)
  fig_3.show()
  
  # Plot Feature Importance
  feature_importance = rf.feature_importances_
  features = X.columns
  fig_4 = px.bar(x=features, y=feature_importance, labels={'x': 'Features', 'y': 'Importance'},
                 title='Feature Importance')
  fig_4.update_layout(autosize=False, height=450, width=950)
  fig_4.show()
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Add charts and model to output
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_1, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_2, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_3, group=None)
  Helpers.save_output_plotly_chart_as_json(context=context, chart_title='my-chart', plotly_fig=fig_4, group=None)
  Helpers.save_output_rc_ml_model(context=context, model_name='myModel', model_obj=PricePredictionModel , artifacts=artifacts)
  Helpers.save(context)`
    },
    {
      title: "Using RC model to predict outcome on new dataset",
      tooltip: "Using RC model to predict outcome on new dataset",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  #Using RC model to predict outcome on new dataset
  # Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  
  # Extract the input data for making predictions from the 'entities' dictionary
  df_for_predictions = Helpers.getEntityData(context, 'input data for predictions')
  
  # Import necessary libraries and modules
  import requests  # For making HTTP requests (if needed)
  import pandas as pd  # For data manipulation
  from utils.rc.dtos.artifact import Artifact  # For managing artifacts in the RapidCanvas environment
  from utils.notebookhelpers.helpers import Helpers  # Helper utilities provided by the platform
  import os  # For file path operations
  
  # Specify the model name created in RapidCanvas (or pre-trained)
  model_name = 'modle_created_in_rc'
  
  # Load the model using Helpers to retrieve the machine learning model from the RC platform
  imported_model = Helpers.get_rc_ml_model(context, model_name)
  
  # Define the features that will be used for making predictions
  # 'X_test' is the subset of the input DataFrame consisting of the required feature columns
  X_test = df_for_predictions[['feature_1', 'feature_2', 'feature_3', ....]]  # Add all relevant features
  
  # Extract the actual outcomes (if available) for validation or comparison purposes
  # 'y_test' is the column containing the true values for the outcome (if available in your use case)
  y_test = df_for_predictions['outcome_column']
  
  # Make predictions using the imported model. The model's output is assumed to have a format where
  # it provides predicted values (e.g., in a column named 'predict') and optionally probabilities (e.g., '0_prob', '1_prob')
  model_output = imported_model.predict(X_test)
  
  # Extract the 'predict' column from the model's output to get the predicted outcome
  # Assuming the model output is a DataFrame or dict-like object with a 'predict' key/column
  y_pred = pd.DataFrame(model_output)['predict']  # Convert to DataFrame and extract the predictions
  
  # Assign the predicted outcomes (y_pred) to a new column 'predictions' in the original input DataFrame
  df_for_predictions['predictions'] = y_pred
  
  # -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
  # Add dataset to output
  Helpers.save_output_dataset(context=context, output_name='data_with_predictions', data_frame=df_for_predictions)`
    }
  ],
  "LLM API Integration": [
    {
      title: "OpenAI Call",
      tooltip: "OpenAI Call",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset
  
  # Initialize the OpenAI client with the API key from environment variables
  client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
  # Create a chat completion request with a specific model and messages
  completion = client.chat.completions.create(
  model="gpt-4o",
  messages=[
      {"role": "developer", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Hello!"}
  ]
  )
  
  # Print the response message from the completion
  output = completion.choices[0].message.content
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)`
    },
    {
      title: "OpenAI Call with image input",
      tooltip: "OpenAI Call with image input",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset
  
  # Initialize the OpenAI client with the API key from environment variables
  client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
  # Create a chat completion request with an image input
  response = client.chat.completions.create(
      model="gpt-4o",
      messages=[
          {
              "role": "user",
              "content": [
                  {"type": "text", "text": "What's in this image?"},
                  {
                      "type": "image_url",
                      "image_url": {
                          "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                      }
                  },
              ],
          }
      ],
      max_tokens=300,
  )
  
  # Print the response from the completion
  print(response.choices[0])
  
  # Print the response message from the completion
  output = response.choices[0].message.content
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)
  `
    },
    {
      title: "OpenAI Call with base 64 image",
      tooltip: "OpenAI Call with base 64 image",
      code: `# Required imports
  import os
  import base64
  import requests
  import pandas as pd
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  
  # Initialize context
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # Image URL and download path
  image_url = "https://www.placecage.com/c/200/300"
  image_path = "/tmp/downloaded_image.jpg"
  
  # Download the image
  response = requests.get(image_url)
  if response.status_code == 200:
      with open(image_path, "wb") as file:
          file.write(response.content)
      print("✅ Image downloaded successfully.")
  else:
      print(f"❌ Failed to download image. HTTP Status: {response.status_code}")
      image_path = None
  
  # Convert image file to base64
  def image_file_to_base64(filepath):
      with open(filepath, "rb") as image_file:
          return base64.b64encode(image_file.read()).decode("utf-8")
  
  # Base64 conversion if image exists
  base64_image = image_file_to_base64(image_path) if image_path else None
  
  # OpenAI API Key and client
  api_key = os.getenv("OPEN_AI_KEY")
  client = OpenAI(api_key=api_key)
  
  if base64_image:
      payload = {
          "model": "gpt-4o",
          "messages": [
              {"role": "system", "content": "You are an expert data scientist."},
              {"role": "user", "content": "Explain this image to me."},
              {"role": "user", "content": f"data:image/jpeg;base64,{base64_image}"}
          ],
          "max_tokens": 1000,
      }
  
  
  # API request
  response = client.chat.completions.create(**payload)
  output = response.choices[0].message.content if response.choices else "No response"
  
  # Save output to DataFrame
  output_df = pd.DataFrame({"Output": [output]})
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)
  `
    },
    {
      title: "OpenAI Call with functions",
      tooltip: "OpenAI Call with functions",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset
  
  # Initialize the OpenAI client with the API key from environment variables
  client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
  # Define the tools (functions) to be used in the chat completion
  tools = [
  {
      "type": "function",
      "function": {
      "name": "get_current_weather",
      "description": "Get the current weather in a given location",
      "parameters": {
          "type": "object",
          "properties": {
          "location": {
              "type": "string",
              "description": "The city and state, e.g. San Francisco, CA",
          },
          "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
          },
          "required": ["location"],
      },
      }
  }
  ]
  
  # Define the messages for the chat completion
  messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
  
  # Create a chat completion request with the tools and messages
  completion = client.chat.completions.create(
  model="gpt-4o",
  messages=messages,
  tools=tools,
  tool_choice="auto"
  )
  
  # Print the response message from the completion
  output = completion.choices[0].message.content
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)
      `
    },
    {
      title: "OpenAI O1 Model Call",
      tooltip: "OpenAI O1 Model Call",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset
  
  # Initialize the OpenAI client with the API key from environment variables
  client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])
  
  # Create a chat completion request with the O1 model and messages
  completion = client.chat.completions.create(
  model="o1",
  messages=[
      {"role": "developer", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Hello!"}
  ]
  )
  # Print the response message from the completion
  print(completion.choices[0].message.content)
  
  # Print the response message from the completion
  output = completion.choices[0].message.content
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)
        `
    },
    {
      title: "OpenAI with fixed response schema",
      tooltip: "OpenAI with fixed response schema",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset
  
  
  
  # Define the response format schema
  response_format={
      "type": "json_schema",
      "json_schema": {
      "name": "questions_schema",
      "strict": True,
      "schema": {
          "type": "object",
          "properties": {
          "question1": {
              "type": "string",
              "description": "First question to be filled by the model."
          },
          "question2": {
              "type": "string",
              "description": "Second question to be filled by the model."
          },
          "question3": {
              "type": "string",
              "description": "Third question to be filled by the model."
          },
          "question4": {
              "type": "string",
              "description": "Fourth question to be filled by the model."
          },
          "question5": {
              "type": "string",
              "description": "Fifth question to be filled by the model."
          }
          },
          "required": [
          "question1",
          "question2",
          "question3",
          "question4",
          "question5"
          ],
          "additionalProperties": False,
          "$defs": {}
      }
      }
  }
  
  # Initialize the OpenAI client with the API key from environment variables
  client = OpenAI(api_key=os.getenv("OPEN_AI_KEY"))
  
  # Define the messages for the chat completion
  messages=[
      {"role": "developer", "content": "You are a helpful assistant."},
      {"role": "user", "content": "Hello!"}
  ]
  
  # Create a chat completion request with the response format schema
  response = client.chat.completions.create(
      model="gpt-4o",
      messages=messages,
      response_format=response_format,        
      temperature=0.1,
      seed=1
  )
  
  # Extract and print the response content
  output = response.choices[0].message.content
  print(output)
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)
          `
    },
    {
      title: "Anthropic call with PDF input and content extraction",
      tooltip: "Anthropic call with PDF input and content extraction",
      code: `# Required imports
  from utils.notebookhelpers.helpers import Helpers
  from utils.dtos.templateOutputCollection import TemplateOutputCollection
  from utils.dtos.templateOutput import TemplateOutput
  from utils.dtos.templateOutput import OutputType
  from utils.dtos.templateOutput import ChartType
  from utils.dtos.variable import Metadata
  from utils.rcclient.commons.variable_datatype import VariableDatatype
  from utils.dtos.templateOutput import FileType
  from utils.dtos.rc_ml_model import RCMLModel
  from utils.notebookhelpers.helpers import Helpers
  from openai import OpenAI
  import os
  import pandas as pd
  from anthropic import Anthropic
  import base64
  
  context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())
  
  # Fetch the access token using a helper function
  access_token = Helpers.get_secret(context, "anthropic")
  if not access_token:
  # If the access token could not be retrieved, print an error and stop execution
      print("Failed to retrieve access token.")
      return {}
  
  # While PDF support is in beta, you must pass in the correct beta header
  client = Anthropic(default_headers={
      "anthropic-beta": "pdfs-2024-09-25"
      },
      api_key=access_token,
  )
  
  # For now, only claude-3-5-sonnet-20241022 supports PDFs
  MODEL_NAME = "claude-3-5-sonnet-20241022"
  
  # Make a useful helper function.
  def get_completion(messages):
      response = client.messages.create(
          model=MODEL_NAME,
          max_tokens=8192,
          temperature=0,
          messages=messages
      )
      return response.content[0].text
  
  # Fetch PDF file from artifact
  try:
      pdf_file_from_artifact = Helpers.downloadArtifacts(context, 'Artifact Name')['File Name.pdf'] 
  except KeyError:
      raise ValueError("File 'File Name' not found in the artifact.")
  except Exception as e:
      raise RuntimeError(f"Error downloading the file: {e}")
  
  # Start by reading in the PDF and encoding it as base64.
  with open(pdf_file_from_artifact, "rb") as pdf_file:
      binary_data = pdf_file.read()
      base_64_encoded_data = base64.b64encode(binary_data)
      base64_string = base_64_encoded_data.decode('utf-8')
  
  # Define the messages for the chat completion
  messages = [
      {
          "role": 'user',
          "content": [
              {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": base64_string}},
              {"type": "text", "text": "Extract the data from this file in a structured format"}
          ]
      }
  ]
  
  # Fetch and process the document summary
  output = get_completion(messages)
  print(output)
  
  #convert output to a dataframe
  output_df = pd.DataFrame({"Output": [output]})
  
  Helpers.save_output_dataset(context=context, output_name='output_df', data_frame=output_df)`
    },
    {
      title: "OpenAI Call to send row by row data to LLM",
      tooltip: "OpenAI Call to send row by row data to LLM",
      requirements: "",
      code: `# Required imports
from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutputCollection import TemplateOutputCollection
from utils.dtos.templateOutput import TemplateOutput
from utils.dtos.templateOutput import OutputType
from utils.dtos.templateOutput import ChartType
from utils.dtos.variable import Metadata
from utils.rcclient.commons.variable_datatype import VariableDatatype
from utils.dtos.templateOutput import FileType
from utils.dtos.rc_ml_model import RCMLModel
from utils.notebookhelpers.helpers import Helpers
from openai import OpenAI
import os
import pandas as pd
import json

context = Helpers.getOrCreateContext(contextId='contextId', localVars=locals())

Titanic_prediction_df = Helpers.getEntityData(context, 'Titanic_prediction') # This is for reading the input dataset

# Initialize the OpenAI client with the API key from environment variables
client = OpenAI(api_key=os.environ["OPEN_AI_KEY"])

response_format={
    "type": "json_schema",
    "json_schema": {
      "name": "value_schema",
      "strict": True,
      "schema": {
        "type": "object",
        "properties": {
          "random_value": {
            "type": "string",
            "description": "random value to be filled by the model"
          }
        },
        "required": [
          "random_value"
        ],
        "additionalProperties": False,
        "$defs": {}
      }
    }
  }

# Send each row of data to the model. Send only the first 10 rows for demonstration purposes
for index, row in Titanic_prediction_df.head(10).iterrows():
    # Extract the full row of data
    data = row.to_dict()
    # Send row to the model 
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "developer", "content": "You are a helpful assistant."},
            {"role": "user", "content": "fill a random value between 1 and 1000 in the response format:" + f"{data}"}
        ],
        response_format=response_format
    )
    # Print the response message from the completion
    output = completion.choices[0].message.content
    print(output)
    # Parse the output as JSON
    output_json = json.loads(output)
    # Extract random_value from the output
    random_value = output_json['random_value']
    # Add the output back to the row in a new column of the dataframe
    Titanic_prediction_df.loc[index, 'Output'] = random_value

Helpers.save_output_dataset(context=context, output_name='Titanic_prediction_df', data_frame=Titanic_prediction_df)`
    }
  ],
  "Rag Ingestion": [
    {
      title: "PDF Ingestion into Vector Store",
      tooltip: "PDF Ingestion into Vector Store",
      requirements: "",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
'''
# Required Libraries
These requirements are essential for this code. Please ensure you copy and paste them into the Requirements tab and save them before running or testing the recipe.

${RAG_REQUIREMENTS}
'''
# Please have a vector store created and attached before using this syntax
## IMPORTS
# Import helper functions and classes for context management and output handling.
from utils.notebookhelpers.helpers import Helpers
# Import the QdrantHosted implementation for managing vector stores on hosted infrastructure.
from utils.libutils.vectorStores.qdrantHosted import QdrantHosted
import os

# Import document readers and directory readers from LlamaIndex.
from llama_index.readers.docling import DoclingReader
from llama_index.core import SimpleDirectoryReader
from llama_index.node_parser.docling import DoclingNodeParser
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex

# Import OpenAI embeddings for generating document embeddings.
from llama_index.embeddings.openai import OpenAIEmbedding
import openai
import time

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Optional: Initialize context and set a unique context ID.
contextId = "Rapid Rag"

# Please specify output vector store name
vector_store_name = "test_store"

# Please specify open ai key secret name
secret_name = "openai"

# Please specify name of artifact containing documents 
artifact_name = "user doc"

context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())

# Please add a secret containing openai key and add to the workspace and provide an appropriate name
openai.api_key = Helpers.get_secret(context ,secret_name)


files = Helpers.downloadArtifacts(context, artifact_name)
files_dir = os.path.dirname(list(files.values())[0])

print(files_dir)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Reset the performance timer.
now_time = time.perf_counter()

dir_reader = SimpleDirectoryReader(
    input_dir=files_dir,
    file_extractor={".pdf": DoclingReader()},
)
documents = dir_reader.load_data(show_progress=True, num_workers=4)
print("ingestion done")
# Print the time taken for document ingestion.
# Initialize a node parser for processing documents into nodes.
node_parser = DoclingNodeParser()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Set up the vector store and index.

qdrant_obj = QdrantHosted(context, vector_store_name)
qdrant_obj.create_connection()
qdrant_client = qdrant_obj.client

# Configure the Qdrant vector store for hybrid search using OpenAI embeddings.
vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=qdrant_obj.collection_name,
    enable_hybrid=True,
    embed_model=OpenAIEmbedding(),
)

now_time = time.perf_counter()

# Create a storage context based on the configured vector store.
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# Create an index from the loaded documents, using the storage context and node parser.
vector_store_index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    node_parser=node_parser,
    show_progress=True,
)
# Print the time taken to build the index.
print("Vector Store Index Created")
print(time.perf_counter() - now_time)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

# Generate an output artifact for the vector store and add it to the output collection.
Helpers.save_output_vector_store(context=context, vector_store_obj=qdrant_obj)

# -------------------------------------------------------------------------------`
    },
    {
      title: "TXT Ingestion into Vector Store",
      tooltip: "TXT Ingestion into Vector Store",
      requirements: RAG_REQUIREMENTS,
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
## DONOT EDIT THIS
## IMPORTS
# Import helper functions and classes for context management and output handling.
'''
# Required Libraries
These requirements are essential for this code. Please ensure you copy and paste them into the Requirements tab and save them before running or testing the recipe.

${RAG_REQUIREMENTS}
'''
# Please have a vector store created and attached before using this syntax
from utils.notebookhelpers.helpers import Helpers
# Import the QdrantHosted implementation for managing vector stores on hosted infrastructure.
from utils.libutils.vectorStores.qdrantHosted import QdrantHosted
from llama_index.core import Document

# Import node parser utilities for processing documents.
from llama_index.core.node_parser import SentenceSplitter

# Import Qdrant vector store integration and core classes for storage context and indexing.
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex

# Import OpenAI embeddings for generating document embeddings.
from llama_index.embeddings.openai import OpenAIEmbedding
# Import time for performance measurements.
import time
import openai
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Initialize context and set a unique context ID.
# Optional: Initialize context and set a unique context ID.
contextId = "Rapid Rag"

# Please specify output vector store name
vector_store_name = "test_store_from_text"

# Please specify open ai key secret name
secret_name = "openai"

# Please specify name of artifact containing documents 
artifact_name = "user doc"

context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Process all input text files
documents = {}
for file in context['files_data']:
    document = Helpers.get_file_data(context,file['file_name'])
    documents[file['file_name']] = document

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Initialize a SentenceSplitter as the node parser with specified chunk size and overlap.
node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
# Process the document into nodes for indexing.
nodes = node_parser.get_nodes_from_documents(
    [Document(text=documents[document],id=document) for document in documents.keys()], show_progress=True
)
print("Nodes Parsed")
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Set up the vector store and index.
now_time = time.perf_counter()  # Start performance timer

#Create connection with qdrant and build vector index
qdrant_obj = QdrantHosted(context, vector_store_name)
qdrant_obj.create_connection()


qdrant_client = qdrant_obj.client

vector_store = QdrantVectorStore(
    client=qdrant_client,
    collection_name=qdrant_obj.collection_name,
    enable_hybrid=True,
    embed_model=OpenAIEmbedding(),

)
 # Print time taken for vector store configuration
now_time = time.perf_counter()  # Reset the performance timer

# Create a storage context based on the configured vector store.
storage_context = StorageContext.from_defaults(vector_store=vector_store)

vector_store_index = VectorStoreIndex(
    nodes=nodes,
    storage_context=storage_context,
    show_progress=True,
)
# Print the time taken to build the index.
print("Vector Store Index Created")
print(time.perf_counter() - now_time)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Create an output collection to store the final Qdrant vector store output.
Helpers.save_output_vector_store(context=context, vector_store_obj=qdrant_obj)

# -------------------------------------------------------------------------------
`
    }
  ],
  "Rag Inference": [
    {
      title: "Vector Store Query on Qdrant",
      tooltip: "Vector Store Query on Qdrant",
      requirements: "",
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Required imports

# Importing utility classes and functions for various helper methods
'''
# Required Libraries
These requirements are essential for this code. Please ensure you copy and paste them into the Requirements tab and save them before running or testing the recipe.

${RAG_REQUIREMENTS}
'''

from utils.notebookhelpers.helpers import Helpers

from utils.dtos.templateOutput import TemplateOutput, OutputType, ChartType, FileType
from utils.libutils.vectorStores.utils import VectorStoreUtils
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import  VectorStoreIndex
from llama_index.embeddings.openai import OpenAIEmbedding
import openai

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Optional: Initialize context and set a unique context ID.
contextId = "Rapid Rag"

# Please specify input vector store name
vector_store_name = "test_store"

# Please specify open ai key secret name
secret_name = "openai"

context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())
openai.api_key = Helpers.get_secret(context ,secret_name)

# Retrieve an existing Qdrant vector store object using helper functions
qdrant_obj = VectorStoreUtils.get_vector_store(context, vector_store_name)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

question = "How do bobcats live?"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Set up the Qdrant vector store to store and retrieve vector embeddings
vector_store = QdrantVectorStore(
    client=qdrant_obj.client,  # Qdrant client instance for managing vector storage
    collection_name=qdrant_obj.collection_name,  # Collection name for storing embeddings
    enable_hybrid=True,  # Enables hybrid search (dense + sparse retrieval)
    embed_model=OpenAIEmbedding(
    ),  # OpenAI-based embedding model for vectorization
)

# Create a vector-based index using the Qdrant vector store
index = VectorStoreIndex.from_vector_store(vector_store)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Convert the index into a query engine that uses OpenAI as the LLM backend
openai = OpenAI()
query_index = index.as_query_engine(llm_model=openai)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Perform the query using the previously loaded question
response = query_index.query(question)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Store response output.
Helpers.save_output_file(context, "response", str(response), FileType.TEXT)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE`
    },
    {
      title: "Chat on Qdrant",
      tooltip: "Chat on Qdrant",
      requirements: RAG_REQUIREMENTS,
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Required imports

# Importing utility classes and functions for various helper methods
'''
# Required Libraries
These requirements are essential for this code. Please ensure you copy and paste them into the Requirements tab and save them before running or testing the recipe.

${RAG_REQUIREMENTS}
'''
# Please have a vector store created and attached before using this syntax
from utils.notebookhelpers.helpers import Helpers

from utils.dtos.templateOutput import TemplateOutput, OutputType, ChartType, FileType

# Importing vector store utilities and Qdrant implementation for disk-backed storage
from utils.libutils.vectorStores.utils import VectorStoreUtils

# Importing OpenAI model from LlamaIndex for LLM-based query handling
from llama_index.llms.openai import OpenAI

# Importing Qdrant VectorStore and related components for vector-based search
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import  VectorStoreIndex

# Importing OpenAI Embeddings for generating vector representations of text
from llama_index.embeddings.openai import OpenAIEmbedding
import openai
import openai
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Optional: Initialize context and set a unique context ID.
contextId = "Rapid Rag"

# Please specify input vector store name
vector_store_name = "test_store"

# Please specify open ai key secret name
secret_name = "openai"

context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())
openai.api_key = Helpers.get_secret(context ,secret_name)

# Retrieve an existing Qdrant vector store object using helper functions
qdrant_obj = VectorStoreUtils.get_vector_store(context, vector_store_name)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

question = "How do bobcats stragise their life?"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Initialize the OpenAI model, which will be used for LLM-powered responses
openai = OpenAI()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Set up the Qdrant vector store to store and retrieve vector embeddings
vector_store = QdrantVectorStore(
    client=qdrant_obj.client,  # Qdrant client instance for managing vector storage
    collection_name=qdrant_obj.collection_name,  # Collection name for storing embeddings
    enable_hybrid=True,  # Enables hybrid search (dense + sparse retrieval)
    embed_model=OpenAIEmbedding(
    ),  # OpenAI-based embedding model for vectorization
)

# Create a vector-based index using the Qdrant vector store
index = VectorStoreIndex.from_vector_store(vector_store)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Convert the index into a query engine that uses OpenAI as the LLM backend
chat_engine = index.as_chat_engine(llm_model=openai)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Perform the query using the previously loaded question
response = chat_engine.chat(question)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Store response output.

Helpers.save_output_file(context, "chat response", str(response), FileType.TEXT)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE`
    },

    {
      title: "Customizing Query Engine Prompts",
      tooltip: "Customizing Query Engine Prompts",
      requirements: RAG_REQUIREMENTS,
      code: `# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Required imports
# Import helper utilities, DTOs for output handling, and necessary libraries.
'''
# Required Libraries
These requirements are essential for this code. Please ensure you copy and paste them into the Requirements tab and save them before running or testing the recipe.

${RAG_REQUIREMENTS}
'''
# Please have a vector store created and attached before using this syntax
from utils.notebookhelpers.helpers import Helpers
from utils.dtos.templateOutput import TemplateOutput, OutputType, ChartType, FileType

from utils.libutils.vectorStores.utils import VectorStoreUtils


# Import OpenAI LLM interface and Qdrant vector store integration from LlamaIndex.
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.qdrant import QdrantVectorStore

# Import core components for storage context, indexing, and prompt templating.
from llama_index.core import StorageContext, VectorStoreIndex, PromptTemplate

# Import OpenAI embedding module.
from llama_index.embeddings.openai import OpenAIEmbedding
import openai

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Optional: Initialize context and set a unique context ID.
contextId = "Rapid Rag"

# Please specify input vector store name
vector_store_name = "test_store"

# Please specify open ai key secret name
secret_name = "openai"

context = Helpers.getOrCreateContext(contextId=contextId, localVars=locals())
openai.api_key = Helpers.get_secret(context ,secret_name)

# Retrieve an existing Qdrant vector store object using helper functions
qdrant_obj = VectorStoreUtils.get_vector_store(context, vector_store_name)
# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE

question = "How can I get reimbursements and what is the daily limit on food?"

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Configure vector store with Qdrant and OpenAI embedding.
# Set up a Qdrant vector store using OpenAI embeddings for hybrid search.
vector_store = QdrantVectorStore(
    client=qdrant_obj.client,
    collection_name=qdrant_obj.collection_name,
    enable_hybrid=True,  # Enable hybrid search capabilities.
    embed_model=OpenAIEmbedding(
    )
)


# Build an index from the vector store revived vector store
index = VectorStoreIndex.from_vector_store(vector_store)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Convert index into a query engine.
openai = OpenAI()
query_index = index.as_query_engine(llm_model=openai)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Display query engine prompts.
# Import display utilities from IPython to format output.
from IPython.display import Markdown, display

# Iterate over and display each prompt from the query engine.
for k, p in query_index.get_prompts().items():
    text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
    display(Markdown(text_md))
    print(p.get_template())
    display(Markdown("<br><br>"))

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Retrieve and display available prompts.
# Get the dictionary of prompt templates and display their keys.
prompts = query_index.get_prompts()
prompts.keys()

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Define custom prompt template.
# Create a custom prompt that provides context and instructs the model to answer in a McKinsey Consultant style.
prompt_str = """
    "Context information is below.\\n"
    "---------------------\\n"
    "{context_str}\\n"
    "---------------------\\n"
    "Given the context information and not prior knowledge, "
    "answer the query in the style of a McKinsey Consultant\\n"
    "Query: {query_str}\\n"
    "Answer: "
"""
# Instantiate a PromptTemplate with the custom string.
prompt = PromptTemplate(prompt_str)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Update prompt template in query engine.
# Replace the default prompt with the custom prompt for response synthesis.
query_index.update_prompts({"response_synthesizer:text_qa_template": prompt})

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Execute query.
# Process the user query through the query engine and capture the response.
response = query_index.query(question)

# -------------------------------------------------------------------------------- NOTEBOOK-CELL: CODE
# Store response output.
Helpers.save_output_file(context, "Custom chat response", str(response), FileType.TEXT)`
    }
  ]
};
