# MolDF# Author: Ruibin Liu <ruibinliuphd@gmail.com># License: MIT# Code Repository: https://github.com/Ruibin-Liu/MolDF"""Split a line in mmCIF files."""from__future__importannotations
[docs]defsplit_line(line:str,delimiter:str|None=None)->list:"""Splits a string line into tokens separated by ``delimiter`` s, assuming all ``'`` and ``"`` in the start character or following a `delimiter` are paired to quote a token. Args: line (required): line as a string delimiter (optional): ``delimiter`` to split the line. If ``None``, ``' '`` (one space) is used. Defaults to **None**. Returns: A list of tokens """ifnotdelimiter:delimiter=" "words=[]# without quotes, using shlexif'"'notinlineand"'"notinline:ifdelimiter==" ":returnline.split()returnline.split(delimiter)# with quotessingle_start=Falsedouble_start=Falsetmp:list[str]=[]fori,charinenumerate(line):# quoted by single quotes ''ifi==0andchar=="'":# line starting with 'single_start=Trueelif(char=="'"andline[i-1]==delimiterandnotdouble_startandnotsingle_start):# a new part quoted with 'single_start=Trueelifchar=="'"andsingle_start:# a part quoted with ' endedsingle_start=False# reset# quoted by double quotes ""elifi==0andchar=='"':# line starting with "double_start=Trueelif(char=='"'andline[i-1]==delimiterandnotdouble_startandnotsingle_start):# a new part quoted with "double_start=Trueelifchar=='"'anddouble_start:# a part quoted with " endeddouble_start=False# resetelif(charin[delimiter,"\n","\r"]andnotsingle_startandnotdouble_start):# a part not quoted endediftmp:words.append("".join(tmp))tmp=[]else:# Other characters including space in quotestmp.append(char)if(tmpandi==len(line)-1):# in case no '\r', '\n', or delimiter is at the endwords.append("".join(tmp))ifsingle_startordouble_start:raiseValueError("Bad line: quotes not paired!")returnwords