Source code for moldf.covalent_bond

# MolDF
# Author: Ruibin Liu <ruibinliuphd@gmail.com>
# License: MIT
# Code Repository: https://github.com/Ruibin-Liu/MolDF
"""Gets covalent bonds by covalent radii cutoff or ligand templates."""
from __future__ import annotations

import os
import warnings
from itertools import product
from pathlib import Path

import numpy as np  # type: ignore
import pandas as pd  # type: ignore

from moldf.read_pdbx import read_pdbx



[docs]
def get_covalent_bond_cutoffs(
    element_symbols: list | set, single_radii_set: str | None = None
) -> tuple[dict, dict, dict]:
    """Gets the DataFrame for additive covalent radii of relevant elements.

    Args:
        element_symbols (required): a list or set of element symbols whose covalent
            radii cutoffs are returned.
        single_radii_set (optional): radii sets to use. If ``None``, ``single_C``
            is used as to Cordero (PMID 18478144). Another option is ``single_PA``
            which refers to Pyykkö's studies (PMID 19058281;19856342;15832398,
            and doi:10.1103/PhysRevB.85.024115). Defaults to **None**.

    Raises:
        ValueError: if the ``single_radii_set`` is not valid.

    Returns:
        a tuple of the three dictionaries of the distance cutoffs for the pairs of
            the queried element symbols: (single_bonds, double_bonds, triple_bonds).
    """
    if single_radii_set is None:
        single_radii_set = "single_C"
    if single_radii_set not in ["single_C", "single_PA"]:
        message = "The 'single_radii_set' has to be one of "
        message += "'single_C' and 'single_PA', but "
        message += f"{single_radii_set} is provided."
        raise ValueError(message)

    this_dir = os.path.dirname(__file__)
    cov_radii_file = Path(this_dir).absolute() / "covalent_bonds" / "covalent_radii.csv"
    covalent_radii = pd.read_csv(cov_radii_file)
    covalent_radii.replace("-", np.nan, inplace=True)
    covalent_radii[
        ["atomic_number", single_radii_set, "double", "triple"]
    ] = covalent_radii[["atomic_number", single_radii_set, "double", "triple"]].astype(
        float,
    )

    element_set = set(element_symbols)
    if "D" in element_set or "T" in element_set:
        element_set.update({"H"})
    covalent_radii = covalent_radii[covalent_radii.element_symbol.isin(element_set)]

    single_bonds = {}
    for first, second in product(element_set, repeat=2):
        first_radius = covalent_radii[covalent_radii.element_symbol == first][
            single_radii_set
        ].to_list()[0]
        second_radius = covalent_radii[covalent_radii.element_symbol == second][
            single_radii_set
        ].to_list()[0]
        first = first.rjust(2)
        second = second.rjust(2)
        single_bonds[(first, second)] = (
            (first_radius + second_radius) / 100 + 0.2
        ) ** 2
        single_bonds[(second, first)] = single_bonds[(first, second)]
        # TODO: make it possible to use 'single_PA' as radii criteria option.
        # Is it possible to fit a function purely based on closest 6 distances?
    double_bonds = {}
    for first, second in product(element_set, repeat=2):
        first_radius = covalent_radii[
            covalent_radii.element_symbol.str.upper() == first
        ]["double"].to_list()[0]
        second_radius = covalent_radii[
            covalent_radii.element_symbol.str.upper() == second
        ]["double"].to_list()[0]
        first = first.rjust(2)
        second = second.rjust(2)
        double_bonds[(first, second)] = (
            (first_radius + second_radius) / 100 + 0.05
        ) ** 2
        double_bonds[(second, first)] = double_bonds[(first, second)]
    triple_bonds = {}
    for first, second in product(element_set, repeat=2):
        first_radius = covalent_radii[
            covalent_radii.element_symbol.str.upper() == first
        ]["triple"].to_list()[0]
        second_radius = covalent_radii[
            covalent_radii.element_symbol.str.upper() == second
        ]["triple"].to_list()[0]
        first = first.rjust(2)
        second = second.rjust(2)
        triple_bonds[(first, second)] = (
            (first_radius + second_radius) / 100 + 0.05
        ) ** 2
        triple_bonds[(second, first)] = triple_bonds[(first, second)]

    return single_bonds, double_bonds, triple_bonds




[docs]
def get_residue_template(
    residue_name: str,
    parent_name: str | None = None,
    residue_template_file: str | os.PathLike | None = None,
    save_template_file: bool = True,
    template_file_dir: str | os.PathLike | None = None,
) -> dict:
    """Gets the intra- covalent bonds of a residue or ligand.

    Args:
        residue_name (required): residue or ligand name to query.
        parent_name (optional): parent name of the residue or ligand. This is useful if
            the ``residue_name`` in the PDB file is used by RCSB for a different residue
            or ligand. If ``None``, it is the same as ``residue_name``.
            Defaults to **None**.
        residue_template_file (optional): residue or ligand template file name/path.
            If ``None``, this function queries RCSB by the ``parent_name``. If it's not
            a path, this function looks at the current working directory first and then
            ``template_file_dir`` if it is not ``None``.
            Defaults to **None**.
        save_template_file (optional): whether to save the downloaded (from RCSB)
            template file. Defaults to **True**.
        template_file_dir (optional): directory to save fetched template files.
            If ``None`` but ``save_template_file`` is ``True``, './template_files'
            directory is used. Defaults to **None**.

    Returns:
        a dictionary of bonds for the residue. The dictionary key format is
            (atom_id_1: str, atom_id_2: str), and the dictionary value format is
            (bond_order: str, is_aromatic: bool, stereo_flag: str)

    Raises:
        ValueError: if a ``.cif`` file for the residue/ligand cannot be downloaded from
            RCSB for any reason if ``residue_template_file`` is ``None``. Check the url
            provided in the error message to download manually if possible.
        FileNotFoundError: if ``residue_template_file`` cannot be found if not ``None``.

    Warnings:
        RuntimeWarning: if the template file contains other residue/ligands.
    """
    if parent_name is None:
        parent_name = residue_name
    parent_name = parent_name.upper()

    if residue_template_file is None:
        if template_file_dir is None:
            template_file_dir = "./template_files"
            template_file_dir = Path(template_file_dir)
            if not template_file_dir.exists():
                template_file_dir.mkdir(parents=True, exist_ok=True)
        residue_template = read_pdbx(
            pdb_id=parent_name,
            save_pdbx_file=save_template_file,
            pdbx_file_dir=template_file_dir,
            category_names=["_chem_comp_bond"],
        )["_chem_comp_bond"]
        residue_template_file = Path(template_file_dir) / f"{parent_name}.cif"
    else:
        residue_template_file = Path(residue_template_file)
        if not residue_template_file.exists():
            if template_file_dir is not None:
                template_file_dir = Path(template_file_dir)
                residue_template_file = Path(template_file_dir, residue_template_file)
            if not residue_template_file.exists():
                raise FileNotFoundError(f"File {residue_template_file} not found.")
        residue_template = read_pdbx(
            pdbx_file=residue_template_file,
            category_names=["_chem_comp_bond"],
        )["_chem_comp_bond"]

    if residue_template.comp_id.unique() != [parent_name]:
        message = f"The {residue_template_file} contains other compounds, "
        message += f"but only {parent_name} is used."
        warnings.warn(message, RuntimeWarning, stacklevel=2)
        residue_template = residue_template[residue_template.comp_id == parent_name]

    results: dict = {}
    atom_id_1 = residue_template.atom_id_1
    atom_id_2 = residue_template.atom_id_2
    bond_order = residue_template.value_order
    aromatic_flag = residue_template.pdbx_aromatic_flag
    stereo_flag = residue_template.pdbx_stereo_config
    for id1, id2, bo, af, sf in zip(
        atom_id_1, atom_id_2, bond_order, aromatic_flag, stereo_flag
    ):
        is_aromatic = True
        if af == "Y":
            is_aromatic = False
        results[(id1, id2)] = (
            bo.upper(),  # bond_order
            is_aromatic,
            sf,  # stereo_flag
        )
        results[(id2, id1)] = results[(id1, id2)]

    return results