Source code for moldf.write_mol2

# MolDF
# Author: Ruibin Liu <ruibinliuphd@gmail.com>
# License: MIT
# Code Repository: https://github.com/Ruibin-Liu/MolDF
"""MOL2 format writing.

Write a dict of ``Pandas DataFrame`` back to a MOL2 file.

Currently, only the ``MOLECULE``, ``ATOM``, and ``BOND`` categories can be written back.

"""
from __future__ import annotations

import os
from datetime import date

import pandas as pd  # type: ignore

from .version import __version__ as moldf_version

IMPLEMENTED_MOL2_CATS = ["MOLECULE", "ATOM", "BOND", "HEADER"]
"""MOL2 categories that are currently implemented."""


[docs] def write_mol2( mol2: dict[str, pd.DataFrame], file_name: str | os.PathLike | None = None, ) -> None: """Write a dict of ``Pandas DataFrame`` s into a MOL2 file. See https://is.muni.cz/th/fzk5s/dp_jakub_Vana.pdf p19 for column definitions. Args: mol2 (required): a ``dict`` of ``Pandas DataFrame`` s to write. file_name (optional): file name to write a MOL2 file. If ``None``, ``moldf_output.mol2`` will be used as the file name. Defaults to **None**. Raises: TypeError: if ``mol2`` is not a valid dict of ``DataFrame``. ValueError: if the ``mol2`` contains other than supported categories. """ if not file_name: file_name = "moldf_output.mol2" if not isinstance(mol2, dict): raise TypeError(f"'mol2' has to be a dict but {type(mol2)} is provided.") implemented = ", ".join(IMPLEMENTED_MOL2_CATS) for key in mol2: if key not in IMPLEMENTED_MOL2_CATS: raise ValueError(f"Only {implemented} are implemented for the MOL2 format.") if not isinstance(mol2[key], pd.DataFrame): raise TypeError(f"{mol2[key]} is not a Pandas DataFrame object.") with open(file_name, "w", encoding="utf-8") as out_file: out_file.write("###\n") today = date.today().strftime("%Y-%m-%d") out_file.write(f"### Created by moldf v{moldf_version} {today}\n") if "HEADER" in mol2: df_header = mol2["HEADER"] for col_name in df_header.columns: header_line = df_header[col_name].to_list()[0] if col_name.startswith("info_"): out_file.write(f"### Original header: {header_line}\n") else: out_file.write(f"### {col_name}: {header_line}\n") out_file.write("###\n\n") out_file.write("@<TRIPOS>MOLECULE\n") if "MOLECULE" in mol2: df_molecule = mol2["MOLECULE"] mol_name = df_molecule.mol_name.to_list()[0] num_atoms = df_molecule.num_atoms.to_list()[0] num_bonds = df_molecule.num_bonds.to_list()[0] num_subst = df_molecule.num_subst.to_list()[0] num_feat = df_molecule.num_feat.to_list()[0] num_sets = df_molecule.num_sets.to_list()[0] mol_type = df_molecule.mol_type.to_list()[0] charge_type = df_molecule.charge_type.to_list()[0] status_bits, mol_comment = "", "" if status_bits in df_molecule.columns: status_bits = df_molecule.status_bits.to_list()[0] if mol_comment in df_molecule.columns: mol_comment = df_molecule.mol_comment.to_list()[0] out_file.write(f"{mol_name}\n") out_file.write( f" {num_atoms} {num_bonds} {num_subst} {num_feat} {num_sets}\n" ) out_file.write(f"{mol_type}\n") out_file.write(f"{charge_type}\n") if status_bits: out_file.write(f"{status_bits}\n") if mol_comment: out_file.write(f"{mol_comment}\n") if "ATOM" in mol2: out_file.write("\n@<TRIPOS>ATOM\n") for _, row in mol2["ATOM"].iterrows(): atom_id = row["atom_id"] atom_name = row["atom_name"] x = row["x"] y = row["y"] z = row["z"] atom_type = row["atom_type"] try: subst_id = row["subst_id"] except KeyError: subst_id = "" try: subst_name = row["subst_name"] except KeyError: subst_name = "" try: charge = row["charge"] except KeyError: charge = "" try: status_bit = row["status_bit"] except KeyError: status_bit = "" line = f"{atom_id:>6d} {atom_name:<8s} {x:>10.4f} {y:>10.4f} {z:>10.4f}" line += f" {atom_type:<9s} {subst_id:<2d} {subst_name:<7s} " line += f"{charge:>10.4f} {status_bit}" line = line.rstrip() out_file.write(line + "\n") if "BOND" in mol2: out_file.write("@<TRIPOS>BOND\n") for _, row in mol2["BOND"].iterrows(): bond_id = row["bond_id"] origin_atom_id = row["origin_atom_id"] target_atom_id = row["target_atom_id"] bond_type = row["bond_type"] try: status_bit = row["status_bit"] except KeyError: status_bit = "" line = f"{bond_id:>6d} {origin_atom_id:>6d} {target_atom_id:>6d} " line += f" {bond_type:1s} {status_bit:s}" line = line.rstrip() out_file.write(line + "\n")