Source code for reemote.utilities.convert_to_df

# Copyright (c) 2025 Kim Jarvis TPF Software Services S.A. kim.jarvis@tpfsystems.com 
# This software is licensed under the MIT License. See the LICENSE file for details.
#
import json
import pandas as pd

[docs] def convert_to_df(data, columns=None): """Converts structured JSON data into a pandas DataFrame. This module provides the `convert_to_df` function, which takes structured data (as a JSON string or list of dictionaries) and transforms it into a pandas DataFrame. It is designed to flatten a nested JSON structure, often from command-line tool outputs, based on a predefined set of extraction rules. The main function, `convert_to_df`, performs several key transformations: - Parses the input if it is a JSON-formatted string. - Extracts data from nested objects (e.g., 'op', 'cp') into top-level columns. - Gracefully handles missing keys or objects by using empty strings as fallback values. - Allows the user to select a custom subset of columns for the final DataFrame. - Validates requested column names against the list of available fields. The function signature is: convert_to_df(data, columns=None) Args: data (str or list[dict]): The input data. This can be a JSON-formatted string or a list of dictionaries representing the rows. columns (list[str], optional): A list of column names to be included in the output DataFrame. If None, all predefined columns will be used. Defaults to None. Returns: pd.DataFrame: A pandas DataFrame containing the extracted and flattened data, with columns ordered as specified. Raises: ValueError: If the input `data` is a string that cannot be parsed as JSON, or if a requested column name in the `columns` list is not available. """ # If data is a string, parse it as JSON if isinstance(data, str): try: data = json.loads(data) except json.JSONDecodeError as e: raise ValueError(f"Failed to parse JSON string: {e}") # Define all available columns with their extraction logic all_columns = { 'command': lambda item: item['op']['command'] if isinstance(item, dict) and item.get('op') else '', 'host': lambda item: item['host'] if isinstance(item, dict) else '', 'guard': lambda item: item['op']['guard'] if isinstance(item, dict) and item.get('op') else '', 'changed': lambda item: item['changed'] if isinstance(item, dict) else '', 'executed': lambda item: item['executed'] if isinstance(item, dict) else '', 'stdout': lambda item: item['cp']['stdout'] if isinstance(item, dict) and item.get('cp') else '', 'stderr': lambda item: item['cp']['stderr'] if isinstance(item, dict) and item.get('cp') else '', 'exit_status': lambda item: item['cp']['exit_status'] if isinstance(item, dict) and item.get('cp') else '', 'returncode': lambda item: item['cp']['returncode'] if isinstance(item, dict) and item.get('cp') else '', 'env': lambda item: item['cp'].get('env', '') if isinstance(item, dict) and item.get('cp') else '', 'subsystem': lambda item: item['cp'].get('subsystem', '') if isinstance(item, dict) and item.get('cp') else '', 'exit_signal': lambda item: item['cp'].get('exit_signal', '') if isinstance(item, dict) and item.get('cp') else '', 'error': lambda item: item.get('error', '') if isinstance(item, dict) else '' } # If no columns specified, use all columns if columns is None: columns = list(all_columns.keys()) # Validate that all requested columns exist for col in columns: if col not in all_columns: raise ValueError(f"Column '{col}' is not available. Available columns: {list(all_columns.keys())}") rows = [] for item in data: # Skip items that are not dictionaries if not isinstance(item, dict): print(f"Warning: Skipping non-dictionary item: {item}") continue row = {} for col in columns: try: row[col] = all_columns[col](item) except (KeyError, TypeError) as e: # Fallback to empty string if there's an error extracting the value row[col] = '' print(f"Warning: Could not extract column '{col}' from item: {e}") rows.append(row) # Create DataFrame with only the specified columns return pd.DataFrame(rows, columns=columns)