Source code for sparc.sparc_parsers.atoms

"""Convert ase atoms to structured dict following SPARC format
and vice versa
"""

from copy import deepcopy
from warnings import warn

import numpy as np
from ase import Atom, Atoms
from ase.constraints import FixAtoms, FixedLine, FixedPlane
from ase.units import Bohr

from .inpt import _inpt_cell_to_ase_cell
from .ion import _ion_coord_to_ase_pos
from .pseudopotential import find_pseudo_path
from .utils import make_reverse_mapping

# from .sparc_parsers.ion import read_ion, write_ion



[docs]
def atoms_to_dict(
    atoms,
    sort=True,
    direct=False,
    wrap=False,
    ignore_constraints=False,
    psp_dir=None,
    pseudopotentials={},
    comments="",
):
    """Given an ASE Atoms object, convert to SPARC ion and inpt data dict

    psp_dir: search path for psp8 files pseudopotentials: a mapping
    between symbol and psp file names, similar to QE like 'Na':
    'Na-pbe.psp8'. If the file name does not contain path information,
    use psp_dir / filname, otherwise use the file path.

    We don't do any env variable replace ment for psp_dir, it should be handled by the
    explicit _write_ion_and_inpt() function

    At this step, the copy_psp is not applied, since we don't yet know the location to write

    """
    # Step 1: if we should sort the atoms?
    # origin_atoms = atoms.copy()
    # sort = True re-calculate the sorting information
    # sort = list re-uses the sorting information
    if sort:
        if isinstance(sort, list):
            sort_ = np.array(sort)
            resort_ = make_reverse_mapping(sort_)
        else:
            sort_ = np.argsort(atoms.get_chemical_symbols(), kind="stable")
            resort_ = make_reverse_mapping(sort_)
        # This is the sorted atoms object
        atoms = atoms[sort_]
    else:
        sort_ = []
        resort_ = []

    # Step 2: determine the counts of each element
    symbol_counts = count_symbols(atoms.get_chemical_symbols())
    write_spin = np.any(atoms.get_initial_magnetic_moments() != 0)
    has_charge = np.any(atoms.get_initial_charges() != 0)
    if has_charge:
        warn(
            "SPARC currently doesn't support changing total number of electrons! "
            "via nomimal charges. The initial charges in the structure will be ignored."
        )

    relax_mask = relax_from_all_constraints(atoms.constraints, len(atoms))
    write_relax = (len(relax_mask) > 0) and (not ignore_constraints)

    atom_blocks = []
    # Step 3: write each block
    for symbol, start, end in symbol_counts:
        block_dict = {}
        block_dict["ATOM_TYPE"] = symbol
        block_dict["N_TYPE_ATOM"] = end - start
        # TODO: make pseudo finding work
        # TODO: write comment that psp file may not exist
        try:
            psp_file = find_pseudo_path(symbol, psp_dir, pseudopotentials)
            # TODO: add option to determine if psp file exists!
            block_dict["PSEUDO_POT"] = psp_file.resolve().as_posix()

        except Exception:
            warn(
                (
                    f"Failed to find pseudo potential file for symbol {symbol}. I will use a dummy file name"
                )
            )
            block_dict[
                "PSEUDO_POT"
            ] = f"{symbol}-dummy.psp8        # Please replace with real psp file name!"
        # TODO: atomic mass?
        p_atoms = atoms[start:end]
        if direct:
            pos = p_atoms.get_scaled_positions(wrap=wrap)
            block_dict["COORD_FRAC"] = pos
        else:
            # TODO: should we use default converter?
            pos = p_atoms.get_positions(wrap=wrap) / Bohr
            block_dict["COORD"] = pos
        if write_spin:
            # TODO: should we process atoms with already calculated magmoms?
            n_atom = len(p_atoms)
            block_dict["SPIN"] = p_atoms.get_initial_magnetic_moments().reshape(
                n_atom, -1
            )
        if write_relax:
            relax_this_block = relax_mask[start:end]
            block_dict["RELAX"] = relax_this_block
        # TODO: get write_relax
        atom_blocks.append(block_dict)

    # Step 4: inpt part
    # TODO: what if atoms does not have cell?
    cell_au = atoms.cell / Bohr
    inpt_blocks = {"LATVEC": cell_au, "LATVEC_SCALE": [1.0, 1.0, 1.0]}

    # Step 5: retrieve boundary condition information
    # TODO: have to use space to join the single keywords
    # breakpoint()
    inpt_blocks.update(atoms_bc_to_sparc(atoms))

    if not isinstance(comments, list):
        comments = comments.split("\n")
    ion_data = {
        "atom_blocks": atom_blocks,
        "comments": comments,
        "sorting": {"sort": sort_, "resort": resort_},
        "extra": {},
    }
    inpt_data = {"params": inpt_blocks, "comments": []}
    return {"ion": ion_data, "inpt": inpt_data}




[docs]
def dict_to_atoms(data_dict):
    """Given a SPARC struct dict, construct the ASE atoms object

    Note: this method supports only 1 Atoms at a time
    """
    ase_cell = _inpt_cell_to_ase_cell(data_dict)
    # TODO: is the new_data_dict really needed?
    new_data_dict = deepcopy(data_dict)
    _ion_coord_to_ase_pos(new_data_dict, ase_cell)
    # Now the real thing to construct an atom object
    atoms = Atoms()
    atoms.cell = ase_cell
    relax_dict = {}

    atoms_count = 0
    atom_blocks = new_data_dict["ion"]["atom_blocks"]
    for block in atom_blocks:
        element = block["ATOM_TYPE"]
        positions = block["_ase_positions"]
        if positions.ndim == 1:
            positions = positions.reshape(1, -1)
            # Consider moving spins to another function
        spins = block.get("SPIN", None)
        if spins is None:
            spins = np.zeros(len(positions))
        for pos, spin in zip(positions, spins):
            # TODO: What about charge?
            atoms.append(Atom(symbol=element, position=pos, magmom=spin))
        relax = block.get("RELAX", np.array([]))
        # Reshape relax into 2d array
        relax = relax.reshape((-1, 3))
        for i, r in enumerate(relax, start=atoms_count):
            relax_dict[i] = r
        atoms_count += len(positions)

    if "sorting" in data_dict["ion"]:
        resort = data_dict["ion"]["sorting"].get("resort", np.arange(len(atoms)))
        # Resort may be None
        if len(resort) == 0:
            resort = np.arange(len(atoms))
    else:
        resort = np.arange(len(atoms))

    if len(resort) != len(atoms):
        # TODO: new exception
        raise ValueError(
            "Length of resort mapping is different from the number of atoms!"
        )
    # TODO: check if this mapping is correct
    # print(relax_dict)
    sort = make_reverse_mapping(resort)
    # print(resort, sort)
    sorted_relax_dict = {sort[i]: r for i, r in relax_dict.items()}
    # Now we do a sort on the atom indices. The atom positions read from
    # .ion correspond to the `sort` and we use `resort` to transform

    # TODO: should we store the sorting information in SparcBundle?

    atoms = atoms[resort]
    constraints = constraints_from_relax(sorted_relax_dict)
    atoms.constraints = constraints

    # @2023.08.31 add support for PBC
    # TODO: move to a more modular function
    # TODO: Datatype for BC in the API, should it be string, or string array?
    sparc_bc = new_data_dict["inpt"]["params"].get("BC", "P P P").split()
    twist_angle = float(new_data_dict["inpt"]["params"].get("TWIST_ANGLE", 0))
    modify_atoms_bc(atoms, sparc_bc, twist_angle)

    # @2025.06.04 HUBBARD parameters
    # The hubbard parameters are only recorded in the atoms
    # for the last calculator results. Users should be careful
    # when reusing them
    hubbard_flag = new_data_dict["inpt"]["params"].get("HUBBARD_FLAG", 0)
    if hubbard_flag > 0:
        u_pairs = new_data_dict["ion"]["extra"].get("hubbard", {})
        # TODO: make sure it makes sense for gpaw-like setups
        # we should keep whatever value SPARC uses when recording
        # the HUBBARD-U, and use eV-Angstrom only when using
        # the SPARC calculator instance
        # TODO: may need consistent naming for info
        atoms.info["hubbard_u (hartree)"] = u_pairs
    return atoms




[docs]
def count_symbols(symbols):
    """Count the number of consecutive elements.
    Output tuple is: element, start, end
    For example, "CHCHHO" --> [('C', 0, 1), ('H', 1, 2), ('C', 2, 3), ('H', 3, 5), ('O', 5, 6)]
    """
    counts = []
    current_count = 1
    current_symbol = symbols[0]
    for i, symbol in enumerate(symbols[1:], start=1):
        if symbol == current_symbol:
            current_count += 1
        else:
            counts.append((current_symbol, i - current_count, i))
            current_count = 1
            current_symbol = symbol
    end = len(symbols)
    counts.append((current_symbol, end - current_count, end))
    return counts




[docs]
def constraints_from_relax(relax_dict):
    """
    Convert the SPARC RELAX fields to ASE's constraints

    Arguments
    relax: bool vector of size Nx3, i.e. [[True, True, True], [True, False, False]]

    Supported ase constraints will be FixAtoms, FixedLine and FixedPlane.
    For constraints in the same direction, all indices will be gathered.

    Note: ase>=3.22 will have FixedLine and FixedPlane accepting only 1 index at a time!

    The relax vector must be already sorted!
    """
    if len(relax_dict) == 0:
        return []

    cons_list = []
    # gathered_indices is an intermediate dict that contains
    # key: relax mask if not all True
    # indices: indices that share the same mask
    #
    gathered_indices = {}

    # breakpoint()
    for i, r in relax_dict.items():
        r = np.array(r)
        r = tuple(np.ndarray.tolist(r.astype(bool)))
        if np.all(r):
            continue

        if r not in gathered_indices:
            gathered_indices[r] = [i]
        else:
            gathered_indices[r].append(i)

    for relax_type, indices in gathered_indices.items():
        degree_freedom = 3 - relax_type.count(False)

        # DegreeF == 0 --> fix atom
        if degree_freedom == 0:
            cons_list.append(FixAtoms(indices=indices))
        # DegreeF == 1 --> move along line, fix line
        elif degree_freedom == 1:
            for ind in indices:
                cons_list.append(FixedLine(ind, np.array(relax_type).astype(int)))
        # DegreeF == 1 --> move along line, fix plane
        elif degree_freedom == 2:
            for ind in indices:
                cons_list.append(FixedPlane(ind, (~np.array(relax_type)).astype(int)))
    return cons_list




[docs]
def relax_from_constraint(constraint):
    """returns dict of {atom_index: relax_dimensions} for the given constraint"""
    type_name = constraint.todict()["name"]
    if isinstance(constraint, FixAtoms):
        dimensions = [False] * 3
        expected_free = 0
    elif isinstance(constraint, FixedLine):
        # Only supports orthogonal basis!
        dimensions = [d == 1 for d in constraint.dir]
        expected_free = 1
    elif isinstance(constraint, FixedPlane):
        dimensions = [d != 1 for d in constraint.dir]
        expected_free = 2
    else:
        warn(
            f"The constraint type {type_name} is not supported by"
            " SPARC's .ion format. This constraint will be"
            " ignored"
        )
        return {}
    if dimensions.count(True) != expected_free:
        warn(
            "SPARC's .ion filetype can only support freezing entire "
            f"dimensions (x,y,z). The {type_name} constraint will be ignored"
        )
        return {}
    return {i: dimensions for i in constraint.get_indices()}  # atom indices




[docs]
def relax_from_all_constraints(constraints, natoms):
    """converts ASE atom constraints to SPARC relaxed dimensions for the atoms"""
    if len(constraints) == 0:
        return []

    relax = [
        [True, True, True],
    ] * natoms  # assume relaxed in all dimensions for all atoms
    for c in constraints:
        for atom_index, rdims in relax_from_constraint(c).items():
            if atom_index >= natoms:
                raise ValueError(
                    (
                        "Number of total atoms smaller than the constraint indices!\n"
                        "Please check your input"
                    )
                )
            # There might be multiple constraints applied on one index,
            # always make it more constrained
            relax[atom_index] = list(np.bitwise_and(relax[atom_index], rdims))
    return relax




[docs]
def modify_atoms_bc(atoms, sparc_bc, twist_angle=0):
    """Modify the atoms boundary conditions in-place from the bc information
    sparc_bc is a keyword from inpt
    twist_angle is the helix twist angle in inpt

    conversion rules:
    BC: P --> pbc=True
    BC: D, H, C --> pbc=False
    """
    ase_bc = []
    # print(sparc_bc, type(sparc_bc))
    for bc_ in sparc_bc:
        if bc_.upper() in ["C", "H"]:
            warn(
                (
                    "Parsing SPARC's helix or cyclic boundary conditions"
                    " into ASE atoms is only partially supported. "
                    "Saving the atoms object into other format may cause "
                    "data-loss of the SPARC-specific BC information."
                )
            )
            pbc = (
                False  # Do not confuse ase-gui, we'll manually handle the visualization
            )
        elif bc_.upper() == "D":
            pbc = False
        elif bc_.upper() == "P":
            pbc = True
        else:
            raise ValueError("Unknown BC keyword values!")
        ase_bc.append(pbc)
    atoms.info["sparc_bc"] = [bc_.upper() for bc_ in sparc_bc]
    if twist_angle != 0:
        atoms.info["twist_angle (rad/Bohr)"] = twist_angle
    atoms.pbc = ase_bc
    return




[docs]
def atoms_bc_to_sparc(atoms):
    """Use atoms' internal pbc and info to construct inpt blocks

    Returns:
    a dict containing 'BC' or 'TWIST_ANGLE'
    """
    sparc_bc = ["P" if bc_ else "D" for bc_ in atoms.pbc]

    # If "sparc_bc" info is stored in the atoms object, convert again
    if "sparc_bc" in atoms.info.keys():
        converted_bc = []
        stored_sparc_bc = atoms.info["sparc_bc"]
        for bc1, bc2 in zip(sparc_bc, stored_sparc_bc):
            # We store helix and cyclic BCs as non-periodic in ase-atoms
            # print(bc1, bc2)
            if ((bc1 == "D") and (bc2 != "P")) or ((bc1 == "P") and (bc2 == "P")):
                converted_bc.append(bc2)
            else:
                raise ValueError(
                    "Boundary conditions stored in ASE "
                    "atoms.pbc and atoms.info['sparc_bc'] "
                    "are different!"
                )
        sparc_bc = converted_bc
    block = {"BC": " ".join(sparc_bc)}
    if "twist_angle" in atoms.info.keys():
        block["TWIST_ANGLE"] = atoms.info["twist_angle (rad/Bohr)"]
    return block