Source code for smlmlp.modules.Locs_LP._functions.append_df

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author        : Lancelot PINCET
# GitHub        : https://github.com/LancelotPincet

"""Append one DataFrame to an existing quoted-header CSV file."""

import csv
from contextlib import nullcontext
from pathlib import Path


def _max_existing_index(path):
    """Return the biggest numeric index found in an existing CSV file."""
    if not path.exists():
        return 0

    max_index = 0
    with path.open("r", newline="") as file:
        reader = csv.reader(file)
        for row in reader:
            if len(row) == 0:
                continue
            token = str(row[0]).strip().strip('"').strip("'")
            if token == "":
                continue
            try:
                value = int(token)
            except ValueError:
                try:
                    value_float = float(token)
                except ValueError:
                    continue
                if not value_float.is_integer():
                    continue
                value = int(value_float)
            if value > max_index:
                max_index = value
    return max_index



[docs]
def append_df(df, path, head2save=None, printer=None):
    """Append a DataFrame to CSV while preserving SMLM header quoting."""
    timeit = (
        nullcontext()
        if printer is None
        else printer.timeit(f"appending {df.__class__.__name__} into {path}")
    )

    with timeit:
        path = Path(path)
        original_columns = list(df.columns)
        original_index = df.index.copy()

        max_index = _max_existing_index(path)
        if max_index > 0 and len(df.index) > 0:
            index_array = df.index.to_numpy(copy=True)
            positive = index_array > 0
            if positive.any():
                index_array[positive] = index_array[positive] + max_index
                df.index = index_array

        quoted_columns = [f'"{col}"' for col in original_columns]
        df.columns = quoted_columns

        if head2save is None:
            columns_list = quoted_columns
        else:
            columns_list = [col for col in quoted_columns if col[1:-1] in head2save]

        df.to_csv(
            path,
            mode="a",
            header=not path.exists(),
            columns=columns_list,
            quoting=csv.QUOTE_NONE,
            float_format="%.3f",
        )

        df.columns = original_columns
        df.index = original_index