# MolDF# Author: Ruibin Liu <ruibinliuphd@gmail.com># License: MIT# Code Repository: https://github.com/Ruibin-Liu/MolDF"""PDBx/mmCIF format writing.Write a dict of ``Pandas DataFrame`` back to a PDBx file."""from__future__importannotationsimportosfromcollectionsimportdefaultdictfrompathlibimportPathimportpandasaspd# type: ignore
[docs]defwrite_pdbx(pdbx:dict[str,pd.DataFrame],file_name:str|os.PathLike|None=None)->None:"""Writes a ``dict`` of ``Pandas DataFrame`` s into a PDBx file. Args: pdbx (required): a ``dict`` of ``Pandas DataFrame`` s to write. file_name (optional): file name to write a PDBx file. If ``None``, ``moldf_output.cif`` will be used as the file name. Defaults to **None**. Raises: TypeError: if ``pdbx`` is not a valid dict of ``DataFrame``. """ifnotfile_name:file_name="moldf_output.cif"ifnotisinstance(pdbx,dict):raiseTypeError(f"pdbx has to be a dict but {type(pdbx)} is provided.")multi_record:dict[str,int]=defaultdict(bool)max_tag_length:dict[str,int]=defaultdict(int)forcategory_name,recordsinpdbx.items():ifnotisinstance(records,pd.DataFrame):raiseTypeError(f"pdbx values have to be Pandas DataFrames but {category_name} is a {type(records)}."# noqa)iflen(records)>1:multi_record[category_name]=Trueforcolinrecords.columns:tag_length=len(category_name)+1+len(col)max_tag_length[category_name]=max(max_tag_length[category_name],tag_length)withopen(file_name,"w",encoding="utf-8")asf:# write headertarget_name=Path(file_name).nameif".cif"==target_name[-4:]:f.write(f"data_{target_name[:-4]}\n")else:f.write(f"data_{target_name}\n")# write each categoryforcategory_name,recordsinpdbx.items():# categories that only have a recordf.write("#\n")ifnotmulti_record[category_name]:forcolinrecords.columns:tag=f"{category_name}.{col}"f.write(f"{tag:{max_tag_length[category_name]+3}}")content=records[col].iloc[0]if'"'incontentand"'"incontent:raiseValueError(f"'{content}' cannot be written into a PDBx file.")elif"'"incontent:content=f'"{content}"'elif'"'incontent:content=f"'{content}'"elif" "incontent:content=f"'{content}'"content_length=len(content)iftag_length+content_length>130:content=content.strip('"').strip("'")f.write("\n;")ifcategory_name=="_struct_ref":foriinrange(0,content_length//80):f.write(f"{content[80*i:80*(i+1)]}\n")else:f.write(f"{content}\n")f.write(";\n")else:f.write(f"{content}\n")# categories that have multiple recordselse:max_col_length=defaultdict(int)forcolinrecords.columns:ifrecords[col].dtype=="int":max_col_length[col]=len(str(max(records[col])))elifcol=="occupancy":max_col_length[col]=4elifrecords[col].dtype=="float":max_int_width=max(len(str(int(max(records[col])))),len(str(int(min(records[col])))),)max_col_length[col]=max_int_width+4ifcol=="B_iso_or_equiv":max_col_length[col]=max_int_width+3else:max_col_length[col]=max(records[col].str.len())ifrecords[col].str.contains(" ").any():max_col_length[col]=max_col_length[col]+1f.write(f"{category_name}.{col}\n")for_,recordinrecords.iterrows():forcolinrecords.columns:content=record[col]pad_length=max_col_length[col]ifisinstance(content,str):if'"'incontentand"'"incontent:raiseValueError(f"'{content}' cannot be written into a PDBx file.")elif"'"incontent:content=f'"{content}"'elif'"'incontent:content=f"'{content}'"elif" "incontent:content=f"'{content}'"f.write(f"{content:<{pad_length+1}}")elifisinstance(content,int):f.write(f"{content:<{pad_length+1}}")elifisinstance(content,float)andcolin["Cartn_x","Cartn_y","Cartn_z",]:f.write(f"{content:<{pad_length+1}.3f}")elifisinstance(content,float)andcolin["occupancy","B_iso_or_equiv",]:f.write(f"{content:<{pad_length+1}.2f}")f.write("\n")f.write("#")