Source code for moldf.covalent_bond

# MolDF
# Author: Ruibin Liu <ruibinliuphd@gmail.com>
# License: MIT
# Code Repository: https://github.com/Ruibin-Liu/MolDF
"""Gets covalent bonds by covalent radii cutoff or ligand templates."""
from __future__ import annotations

import os
import warnings
from itertools import product
from pathlib import Path

import numpy as np  # type: ignore
import pandas as pd  # type: ignore

from moldf.read_pdbx import read_pdbx


[docs] def get_covalent_bond_cutoffs( element_symbols: list | set, single_radii_set: str | None = None ) -> tuple[dict, dict, dict]: """Gets the DataFrame for additive covalent radii of relevant elements. Args: element_symbols (required): a list or set of element symbols whose covalent radii cutoffs are returned. single_radii_set (optional): radii sets to use. If ``None``, ``single_C`` is used as to Cordero (PMID 18478144). Another option is ``single_PA`` which refers to Pyykkö's studies (PMID 19058281;19856342;15832398, and doi:10.1103/PhysRevB.85.024115). Defaults to **None**. Raises: ValueError: if the ``single_radii_set`` is not valid. Returns: a tuple of the three dictionaries of the distance cutoffs for the pairs of the queried element symbols: (single_bonds, double_bonds, triple_bonds). """ if single_radii_set is None: single_radii_set = "single_C" if single_radii_set not in ["single_C", "single_PA"]: message = "The 'single_radii_set' has to be one of " message += "'single_C' and 'single_PA', but " message += f"{single_radii_set} is provided." raise ValueError(message) this_dir = os.path.dirname(__file__) cov_radii_file = Path(this_dir).absolute() / "covalent_bonds" / "covalent_radii.csv" covalent_radii = pd.read_csv(cov_radii_file) covalent_radii.replace("-", np.nan, inplace=True) covalent_radii[ ["atomic_number", single_radii_set, "double", "triple"] ] = covalent_radii[["atomic_number", single_radii_set, "double", "triple"]].astype( float, ) element_set = set(element_symbols) if "D" in element_set or "T" in element_set: element_set.update({"H"}) covalent_radii = covalent_radii[covalent_radii.element_symbol.isin(element_set)] single_bonds = {} for first, second in product(element_set, repeat=2): first_radius = covalent_radii[covalent_radii.element_symbol == first][ single_radii_set ].to_list()[0] second_radius = covalent_radii[covalent_radii.element_symbol == second][ single_radii_set ].to_list()[0] first = first.rjust(2) second = second.rjust(2) single_bonds[(first, second)] = ( (first_radius + second_radius) / 100 + 0.2 ) ** 2 single_bonds[(second, first)] = single_bonds[(first, second)] # TODO: make it possible to use 'single_PA' as radii criteria option. # Is it possible to fit a function purely based on closest 6 distances? double_bonds = {} for first, second in product(element_set, repeat=2): first_radius = covalent_radii[ covalent_radii.element_symbol.str.upper() == first ]["double"].to_list()[0] second_radius = covalent_radii[ covalent_radii.element_symbol.str.upper() == second ]["double"].to_list()[0] first = first.rjust(2) second = second.rjust(2) double_bonds[(first, second)] = ( (first_radius + second_radius) / 100 + 0.05 ) ** 2 double_bonds[(second, first)] = double_bonds[(first, second)] triple_bonds = {} for first, second in product(element_set, repeat=2): first_radius = covalent_radii[ covalent_radii.element_symbol.str.upper() == first ]["triple"].to_list()[0] second_radius = covalent_radii[ covalent_radii.element_symbol.str.upper() == second ]["triple"].to_list()[0] first = first.rjust(2) second = second.rjust(2) triple_bonds[(first, second)] = ( (first_radius + second_radius) / 100 + 0.05 ) ** 2 triple_bonds[(second, first)] = triple_bonds[(first, second)] return single_bonds, double_bonds, triple_bonds
[docs] def get_residue_template( residue_name: str, parent_name: str | None = None, residue_template_file: str | os.PathLike | None = None, save_template_file: bool = True, template_file_dir: str | os.PathLike | None = None, ) -> dict: """Gets the intra- covalent bonds of a residue or ligand. Args: residue_name (required): residue or ligand name to query. parent_name (optional): parent name of the residue or ligand. This is useful if the ``residue_name`` in the PDB file is used by RCSB for a different residue or ligand. If ``None``, it is the same as ``residue_name``. Defaults to **None**. residue_template_file (optional): residue or ligand template file name/path. If ``None``, this function queries RCSB by the ``parent_name``. If it's not a path, this function looks at the current working directory first and then ``template_file_dir`` if it is not ``None``. Defaults to **None**. save_template_file (optional): whether to save the downloaded (from RCSB) template file. Defaults to **True**. template_file_dir (optional): directory to save fetched template files. If ``None`` but ``save_template_file`` is ``True``, './template_files' directory is used. Defaults to **None**. Returns: a dictionary of bonds for the residue. The dictionary key format is (atom_id_1: str, atom_id_2: str), and the dictionary value format is (bond_order: str, is_aromatic: bool, stereo_flag: str) Raises: ValueError: if a ``.cif`` file for the residue/ligand cannot be downloaded from RCSB for any reason if ``residue_template_file`` is ``None``. Check the url provided in the error message to download manually if possible. FileNotFoundError: if ``residue_template_file`` cannot be found if not ``None``. Warnings: RuntimeWarning: if the template file contains other residue/ligands. """ if parent_name is None: parent_name = residue_name parent_name = parent_name.upper() if residue_template_file is None: if template_file_dir is None: template_file_dir = "./template_files" template_file_dir = Path(template_file_dir) if not template_file_dir.exists(): template_file_dir.mkdir(parents=True, exist_ok=True) residue_template = read_pdbx( pdb_id=parent_name, save_pdbx_file=save_template_file, pdbx_file_dir=template_file_dir, category_names=["_chem_comp_bond"], )["_chem_comp_bond"] residue_template_file = Path(template_file_dir) / f"{parent_name}.cif" else: residue_template_file = Path(residue_template_file) if not residue_template_file.exists(): if template_file_dir is not None: template_file_dir = Path(template_file_dir) residue_template_file = Path(template_file_dir, residue_template_file) if not residue_template_file.exists(): raise FileNotFoundError(f"File {residue_template_file} not found.") residue_template = read_pdbx( pdbx_file=residue_template_file, category_names=["_chem_comp_bond"], )["_chem_comp_bond"] if residue_template.comp_id.unique() != [parent_name]: message = f"The {residue_template_file} contains other compounds, " message += f"but only {parent_name} is used." warnings.warn(message, RuntimeWarning, stacklevel=2) residue_template = residue_template[residue_template.comp_id == parent_name] results: dict = {} atom_id_1 = residue_template.atom_id_1 atom_id_2 = residue_template.atom_id_2 bond_order = residue_template.value_order aromatic_flag = residue_template.pdbx_aromatic_flag stereo_flag = residue_template.pdbx_stereo_config for id1, id2, bo, af, sf in zip( atom_id_1, atom_id_2, bond_order, aromatic_flag, stereo_flag ): is_aromatic = True if af == "Y": is_aromatic = False results[(id1, id2)] = ( bo.upper(), # bond_order is_aromatic, sf, # stereo_flag ) results[(id2, id1)] = results[(id1, id2)] return results