Source code for smlmlp.modules.Locs_LP._functions.append_df

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author        : Lancelot PINCET
# GitHub        : https://github.com/LancelotPincet

"""Append one DataFrame to an existing quoted-header CSV file."""

import csv
from contextlib import nullcontext
from pathlib import Path


def _max_existing_index(path):
    """Return the biggest numeric index found in an existing CSV file."""
    if not path.exists():
        return 0

    max_index = 0
    with path.open("r", newline="") as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) == 0:
                continue
            token = str(row[0]).strip().strip('"').strip("'")
            if token == "":
                continue
            try:
                value = int(token)
            except ValueError:
                try:
                    value_float = float(token)
                except ValueError:
                    continue
                if not value_float.is_integer():
                    continue
                value = int(value_float)
            if value > max_index:
                max_index = value
    return max_index


[docs] def append_df(df, path, head2save=None, printer=None): """Append a DataFrame to CSV while preserving SMLM header quoting.""" timeit = ( nullcontext() if printer is None else printer.timeit(f"appending {df.__class__.__name__} into {path}") ) with timeit: path = Path(path) original_columns = list(df.columns) original_index = df.index.copy() max_index = _max_existing_index(path) if max_index > 0 and len(df.index) > 0: index_array = df.index.to_numpy(copy=True) positive = index_array > 0 if positive.any(): index_array[positive] = index_array[positive] + max_index df.index = index_array quoted_columns = [f'"{col}"' for col in original_columns] df.columns = quoted_columns if head2save is None: columns_list = quoted_columns else: columns_list = [col for col in quoted_columns if col[1:-1] in head2save] df.to_csv( path, mode="a", header=not path.exists(), columns=columns_list, quoting=csv.QUOTE_NONE, float_format="%.3f", ) df.columns = original_columns df.index = original_index