Source code for sparc.sparc_parsers.ion

"""
Created on Thu Oct 18 14:16:21 2018

Ben Comer (Georgia Tech)

This file has been heavily modified since SPARC 0.1

TODO: more descriptions about this file io parser
"""
import re
import textwrap
from warnings import warn

import numpy as np
from ase.units import Bohr

# Safe wrappers for both string and fd
from ase.utils import reader, writer

from ..api import SparcAPI
from .utils import (
    bisect_and_strip,
    make_reverse_mapping,
    read_block_input,
    strip_comments,
)



[docs]
class InvalidSortingComment(ValueError):
    def __init__(self, message):
        self.message = message



defaultAPI = SparcAPI()


@reader
def _read_ion(fileobj, validator=defaultAPI):
    """
    Read information from the .ion file. Note, this method does not return an atoms object,
    but rather return a dict. Thus the label option is not necessary to keep


    Reads an ion file. Because some of the information necessary to create
    an atoms object is found in the .inpt file, this function also attemtps to read
    that as a source of data. If the file is not found or the information is invalid,
    it will look for it in the comments of the ion file, as written.
    """
    contents = fileobj.read()
    # label = get_label(fileobj, ".ion")
    data, comments = strip_comments(contents)
    # We do not read the cell at this time!
    sort, resort, new_comments = _read_sort_comment(comments)

    # find the index for all atom type lines. They should be at the top of their block
    # @TT 2025.06.04 HUBBARD block comes at the end of the
    # ion file defines the last line in data
    hubbard_bounds = [i for i, x in enumerate(data) if "HUBBARD" in x]
    if len(hubbard_bounds) == 0:
        atom_type_bounds_lim = len(data)
    elif len(hubbard_bounds) == 1:
        # The hubbard bounds
        atom_type_bounds_lim = hubbard_bounds[0]
    else:
        # TODO: make it a format error
        raise ValueError("Bad .ion file format, multiple HUBBARD sections exist.")

    atom_type_bounds = [i for i, x in enumerate(data) if re.match("^ATOM_TYPE", x)]
    atom_type_bounds += [atom_type_bounds_lim]
    atom_blocks = [
        read_block_input(data[start:end], validator=validator)
        for start, end in zip(atom_type_bounds[:-1], atom_type_bounds[1:])
    ]

    extra_blocks = {}
    # Now handle hubbard information --> extra
    # the hubbard block is currently the last to appear in
    # .ion file
    if len(hubbard_bounds) == 1:
        hubbard_settings = _parse_hubbard_block(
            data[hubbard_bounds[0] :], validator=validator
        )
        extra_blocks["hubbard"] = hubbard_settings

    return {
        "ion": {
            "atom_blocks": atom_blocks,
            "comments": new_comments,
            "extra": extra_blocks,
            "sorting": {"sort": sort, "resort": resort},
        }
    }


@writer
def _write_ion(
    fileobj,
    data_dict,
    validator=defaultAPI,
):
    """
    Writes the ion file content from the atom_dict

    Please note this is not a Atoms-compatible function!

    The data_dict takes similar format as _read_ion

    Basically, we want to ensure
    data_dict = _read_ion("some.ion")
    _write_ion("some.ion", data_dict)
    shows the same format
    """
    ion_dict = data_dict.get("ion", None)
    if ion_dict is None:
        raise ValueError("No ion data provided in the input!")
    if "atom_blocks" not in ion_dict:
        raise ValueError(
            "Must provide a data-section in the data_dict (blocks of atomic information)"
        )

    comments = ion_dict.get("comments", [])
    banner = "Ion File Generated by SPARC ASE Calculator"
    if len(comments) == 0:
        comments = [banner]
    elif "ASE" not in comments[0]:
        comments = [banner] + comments

    # Handle the sorting mapping
    # the line wrap is 80 words
    if "sorting" in ion_dict:
        # print(ion_dict["sorting"])
        resort = ion_dict["sorting"].get("resort", [])
        # Write resort information only when it's actually useful
        if len(resort) > 0:
            comments.append("ASE-SORT:")
            index_lines = textwrap.wrap(" ".join(map(str, resort)), width=80)
            comments.extend(index_lines)
            comments.append("END ASE-SORT")

    for line in comments:
        fileobj.write(f"# {line}\n")

    fileobj.write("\n")
    blocks = ion_dict["atom_blocks"]
    for block in blocks:
        for key in [
            "ATOM_TYPE",
            "N_TYPE_ATOM",
            "PSEUDO_POT",
            "COORD_FRAC",
            "COORD",
            "SPIN",
            "RELAX",
        ]:
            val = block.get(key, None)
            # print(key, val)
            if (key not in ["RELAX", "COORD", "COORD_FRAC", "SPIN"]) and (val is None):
                raise ValueError(f"Key {key} is not provided! Abort writing ion file")
            # TODO: change the API version
            if val is None:
                continue

            val_string = validator.convert_value_to_string(key, val)
            # print(val_string)
            # TODO: make sure 1 line is accepted
            # TODO: write pads to vector lines
            if (val_string.count("\n") > 0) or (
                key in ["COORD_FRAC", "COORD", "RELAX", "SPIN"]
            ):
                output = f"{key}:\n{val_string}\n"
            else:
                output = f"{key}: {val_string}\n"
            fileobj.write(output)
            # TODO: check extra keys
            # TODO: how to handle multiple psp files?
        # Write a split line
        # TODO: do we need to distinguish the last line?
        fileobj.write("\n")

    # @TT 2025.06.04 add support for HUBBARD parameters
    extra_blocks = ion_dict.get("extra", {})
    if "hubbard" in extra_blocks:
        _check_hubbard_block(ion_dict, extra_blocks["hubbard"])
        _write_hubbard_block(fileobj, extra_blocks["hubbard"], validator)
    return


def _ion_coord_to_ase_pos(data_dict, cell=None):
    """Convert the COORD or COORD_FRAC from atom blocks to ASE's positions

    Arguments:
    cell: a unit cell in ASE-unit (i.e. parsed from inpt._inpt_cell_to_ase_cell)

    This function modifies the data_dict in-place to add a field '_ase_positions'
    to the atom_blocks
    """
    treated_blocks = []
    can_have_coord_frac = cell is not None
    ion_atom_blocks = data_dict["ion"]["atom_blocks"]
    for i, block in enumerate(ion_atom_blocks):
        if ("COORD" in block.keys()) and ("COORD_FRAC" in block.keys()):
            raise KeyError("COORD and COORD_FRAC cannot co-exist!")
        if (not can_have_coord_frac) and ("COORD_FRAC" in block.keys()):
            raise KeyError("COORD_FRAC must be acompanied by a cell!")
        coord = block.get("COORD", None)
        if coord is not None:
            coord = coord * Bohr
        else:
            coord_frac = block["COORD_FRAC"]
            # Cell is already in Bohr
            coord = np.dot(coord_frac, cell)
        data_dict["ion"]["atom_blocks"][i]["_ase_positions"] = coord
    return


def _read_sort_comment(lines):
    """Parse the atom sorting info from the comment lines
    Format

    ASE-SORT:
    r_i r_j r_k ....
    END ASE-SORT
    where r_i etc are the indices in the original ASE atoms object
    """
    i = 0
    resort = []
    record = False
    new_lines = []
    while i < len(lines):
        line = lines[i]
        key, value = bisect_and_strip(line, ":")
        i += 1
        if key == "ASE-SORT":
            record = True
        elif key == "END ASE-SORT":
            record = False
            break
        elif record is True:
            resort += list(map(int, line.strip().split(" ")))
        else:
            # Put original lines in new_lines
            new_lines.append(line)
    # Put all remaining lines in new_lines
    for j in range(i, len(lines)):
        line = lines[j]
        if "ASE-SORT" in line:
            raise InvalidSortingComment(
                "There appears to be multiple sorting information in the ion comment section!"
            )
        new_lines.append(line)
    if record:
        warn(
            "ASE atoms resort comment block is not properly formatted, this may cause data loss!"
        )
    sort = make_reverse_mapping(resort)
    assert set(sort) == set(resort), "Sort and resort info are of different length!"
    return sort, resort, new_lines


def _parse_hubbard_block(block, validator=defaultAPI):
    """Parse the hubbard blocks into the following list
    [{"U_ATOM_TYPE": <atom-name>,
    "U_VAL": array},
    ]

    A hubbard block (after stripping the extra comments) may look like:
    ['HUBBARD:'
    'U_ATOM_TYPE: Ni',
    'U_VAL: 0 0 0.05 0']

    The U_ATOM_TYPE and U_VAL must come in pairs ordered
    """
    if "HUBBARD:" not in block[0]:
        raise ValueError("Ill-formatted HUBBARD block in .ion file!")
    if (len(block) - 1) % 2 != 0:
        raise ValueError("U_ATOM_TYPE and U_VAL are not paired in the HUBBARD block!")
    u_pairs = []
    for i in range((len(block) - 1) // 2):
        u_sub_block = block[i + 1 : i + 3]
        u_dict = read_block_input(u_sub_block, validator)
        u_pairs.append(u_dict)
    return u_pairs


def _check_hubbard_block(ion_dict, hubbard_u_pairs):
    """Sanity check for hubbard parameters
    1. U_ATOM_TYPE must match one existing element
    2. No duplicated element of U_ATOM_TYPE
    3. U value must be 4-tuples
    """
    structure_elements = set([entry["ATOM_TYPE"] for entry in ion_dict["atom_blocks"]])
    hubbard_elements = set()
    for pair in hubbard_u_pairs:
        elem = pair["U_ATOM_TYPE"]
        if elem not in structure_elements:
            raise ValueError(
                f"Element {elem} in the HUBBARD setting does not exist in the input structure!"
            )
        if elem in hubbard_elements:
            raise ValueError(f"Element {elem} is duplicated in the HUBBARD setting!")
        hubbard_elements.add(elem)
        val = pair["U_VAL"]
        if len(val) != 4:
            raise ValueError(f"U_VAL for element {elem} must have length of 4!")


def _write_hubbard_block(fileobj, u_pairs=[], validator=defaultAPI):
    """Write the HUBBARD U-blocks at the end of the .ion file
    format

    HUBBARD:
    U_ATOM_TYPE: Ni
    U_VAL: 0 0 0.05 0

    U_ATOM_TYPE: Cr
    U_VAL: 0 0 0.05 0
    """
    if len(u_pairs) == 0:
        return
    fileobj.write("HUBBARD:\n")
    for u_pair in u_pairs:
        # TODO: add value checker
        for key in ("U_ATOM_TYPE", "U_VAL"):
            val = u_pair[key]
            val_string = validator.convert_value_to_string(key, val)
            fileobj.write(f"{key}: {val_string}\n")
        fileobj.write("\n")
    return