Coverage for sparc/docparser.py: 83%

1# -*- coding: utf-8 -*-

2"""

3A module to parse the latex documents provided by SPARC

4and convert to its Python API

6Created on Wed Mar 1 15:32:31 EST 2023

8Tian Tian (alchem0x2a@gmail.com)

9"""

10import json

11import re

12from copy import copy

13from datetime import datetime

14from pathlib import Path

15from warnings import warn

17import numpy as np

19# Some fields in master SPARC doc may cause auto type detection

20# to fail, need hard-coded post-processing for now

21postprocess_items = {

22 "RELAX_FLAG": {"allow_bool_input": False},

23 "NPT_SCALE_CONSTRAINTS": {"type": "string"},

24 "NPT_SCALE_VECS": {"type": "integer array"},

25 "TOL_POISSON": {"type": "double"},

26}

28sparc_repo_url = "https://github.com/SPARC-X/SPARC.git"

31class SparcDocParser(object):

32 """Parses LaTeX documentation of SPARC-X and converts it into a Python API.

34 This class extracts parameter information from LaTeX source files,

35 organizing it into a structured format that can be easily used in

36 Python. It supports parsing of version details, parameter types,

37 units, and other relevant information.

39 Attributes:

40 version (str): Parsed SPARC version, based on the documentation.

41 parameter_categories (list): Categories of parameters extracted.

42 parameters (dict): Extracted parameters with detailed information.

43 other_parameters (dict): Additional parameters not categorized.

45 Methods:

46 find_main_file(main_file_pattern): Finds the main LaTeX file based on a pattern.

47 get_include_files(): Retrieves a list of included LaTeX files.

48 parse_version(parse): Parses and sets the SPARC version.

49 parse_parameters(): Extracts parameters from LaTeX files.

50 postprocess(): Applies hard-coded post-processing to some parameters.

51 to_dict(): Converts parsed information into a dictionary.

52 json_from_directory(directory, include_subdirs, **kwargs): Class method to create JSON from a directory.

53 json_from_repo(url, version, include_subdirs, **kwargs): Class method to create JSON from a repository.

55 """

57 def __init__(

58 self,

59 directory=".",

60 main_file="*Manual.tex",

61 intro_file="Introduction.tex",

62 params_from_intro=True,

63 parse_version=True,

64 ):

65 """Create the doc parser pointing to the root of the doc file of SPARC

67 The SPARC doc is organized as follows:

68 SPARC/doc/.LaTeX/

69 |---- Manual.tex

70 |---- Introduction.tex

71 |---- {Section}.tex

73 For parameters additional to the standard SPARC options, such as the SQ / cyclix

74 options, we merge the dict from the sub-dirs

76 Args:

77 doc_root: root directory to the LaTeX files, may look like `SPARC/doc/.LaTeX`

78 main_file: main LaTeX file for the manual

79 intro_file: LaTeX file for the introduction

80 params_from_intro: only contain the parameters that can be parsed in `intro_file`

81 parse_date: get the SPARC version by date

82 """

83 self.root = Path(directory)

84 self.main_file = self.find_main_file(main_file)

85 self.intro_file = self.root / intro_file

86 if not self.intro_file.is_file():

87 raise FileNotFoundError(f"Introduction file {intro_file} is missing!")

88 self.include_files = self.get_include_files()

89 self.params_from_intro = params_from_intro

90 self.parse_version(parse_version)

91 self.parse_parameters()

92 self.postprocess()

94 def find_main_file(self, main_file_pattern):

95 """

96 Finds the main LaTeX file that matches the given pattern, e.g. Manual.tex or Manual_cyclix.te

98 Args:

99 main_file_pattern (str): Pattern to match the main LaTeX file name.

100

101 Returns:

102 Path: Path to the main LaTeX file.

103

104 Raises:

105 FileNotFoundError: If no or multiple files match the pattern.

106 """

107 candidates = list(self.root.glob(main_file_pattern))

108 if len(candidates) != 1:

109 raise FileNotFoundError(

110 f"Main file {main_file_pattern} is missing or more than 1 exists!"

111 )

112 return candidates[0]

113

114 def get_include_files(self):

115 """

116 Retrieves a list of LaTeX files included in the main LaTeX document, e.g. Manual.tex.

117

118 Returns:

119 list: A list of paths to the included LaTeX files.

120 """

121 pattern = r"\\begin\{document\}(.*?)\\end\{document\}"

122 text = open(self.main_file, "r", encoding="utf8").read()

123 # Only the first begin/end document will be matched

124 match = re.findall(pattern, text, re.DOTALL)[0]

125 pattern_include = r"\\include\{(.+?)\}"

126 include = re.findall(pattern_include, match, re.DOTALL)

127 include_files = []

128 for name in include:

129 tex_file = self.root / f"{name}.tex"

130 if tex_file.is_file():

131 include_files.append(tex_file)

132 else:

133 warn(

134 (

135 f"TeX file {tex_file} is missing! It may be a typo in the document, "

136 "ignore parameters from this file."

137 )

138 )

139 return include_files

140

141 def parse_version(self, parse=True):

142 """

143 Parses and sets the SPARC version based on the C-source file, if possible.

144 The date for the SPARC code is parsed from initialization.c in the "YYYY.MM.DD"

145 format.

146

147 Args:

148 parse (bool): Whether to parse the version from the documentation.

149

150 Sets:

151 self.version (str): The parsed version in 'YYYY.MM.DD' format or None,

152 if either parse=False, or the C-source code is missing

153 """

154 if parse is False:

155 self.version = None

156 return

157 init_c = self.root.parents[1] / "src" / "initialization.c"

158 if not init_c.is_file():

159 warn(

160 'Cannot find the c source file "initialization.c", skip version parsing!'

161 )

162 self.version = None

163 return

164 text = open(init_c, "r", encoding="utf8").read()

165 pattern_version = r"SPARC\s+$\s*?version(.*?)$"

166 match = re.findall(pattern_version, text)

167 if len(match) != 1:

168 warn(

169 'Parsing c source file "initialization.c" for version is unsuccessful!'

170 )

171 self.version = None

172 return

173 # We need to add more spacing matching in case the source code includes extra

174 date_str = re.sub(r"\s+", " ", match[0].strip().replace(",", " "))

175 # Older version of SPARC doc may contain abbreviated month format

176 date_version = None

177 for fmt in ("%b %d %Y", "%B %d %Y"):

178 try:

179 date_version = datetime.strptime(date_str, fmt).strftime("%Y.%m.%d")

180 break

181 except Exception:

182 continue

183 if date_version is None:

184 raise ValueError(f"Cannot parse date time {date_str}")

185 self.version = date_version

186 return

187

188 def __parse_parameter_from_frame(self, frame):

189 """Parse the parameters from a single LaTeX frame

190

191 Args:

192 frame (str): a string containing the LaTeX frame (e.g. \\begin{frame} ... \\end{frame})

193

194 Returns:

195 dict: a key-value paired dict parsed from the frame. Some field names include:

196 name: TOL_POISSON

197 type: Double | Integer | String | Character | Double array

198 unit: specified in the doc

199 """

200 pattern_label = r"\\texttt\{(.*?)\}.*?\\label\{(.*?)\}"

201 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"

202 match_label = re.findall(pattern_label, frame, re.DOTALL | re.MULTILINE)

203 if len(match_label) != 1:

204 warn("Provided a non-structured frame for parsing, skip.")

205 return {}

206 symbol, label = (

207 convert_tex_parameter(match_label[0][0].strip()),

208 match_label[0][1].strip(),

209 )

210 # Every match contains the (name, content) pair of the blocks

211 matches = re.findall(pattern_block, frame, re.DOTALL | re.MULTILINE)

212 param_dict = {"symbol": symbol, "label": label}

213 # TODO: add more type definition

214 for key, content in matches:

215 key = key.lower()

216 content = content.strip()

217 # Do not parse commented-out values

218

219 if (key == "type") and (content.startswith("%")):

220 warn(f"Parameter {symbol} is disabled in the doc, ignore!")

221 return {}

222 if key in ("example",):

223 content = convert_tex_example(content)

224 param_dict[key] = content

225 # Sanitize 1: Convert types

226 param_dict = sanitize_type(param_dict)

227 # Sanitize 2: Convert default values

228 param_dict = sanitize_default(param_dict)

229 # Sanitize 3: Remove TeX components in description and remark

230 param_dict = sanitize_description(param_dict)

231

232 return param_dict

233

234 def __parse_frames_from_text(self, text):

235 """Extract all the frames that aren't commented in the text

236

237 Arguments:

238 text (str): Full LaTeX text

239 Returns:

240 list: Matched LaTeX Beamer frame fragments

241 """

242 pattern_frame = r"\\begin\{frame\}(.*?)\\end\{frame\}"

243 matches = re.findall(pattern_frame, text, re.DOTALL | re.MULTILINE)

244 return matches

245

246 def __parse_intro_file(self):

247 """Parse the introduction file

248

249 Returns:

250 parameter_dict (dict): dictionary using the parameter category as the main key

251 (following order in Introduction.tex)

252 parameter_categories (list): list of categories

253 """

254 text_intro = open(self.intro_file, "r", encoding="utf8").read()

255 pattern_params = (

256 r"^\\begin\{frame\}.*?\{Input file options\}.*?$(.*?)\\end\{frame\}"

257 )

258 pattern_block = r"\\begin\{block\}\{(.*?)\}([\s\S]*?)\\end\{block\}"

259 pattern_line = r"\\hyperlink\{(.*?)\}{\\texttt\{(.*?)\}\}"

260 text_params = re.findall(pattern_params, text_intro, re.DOTALL | re.MULTILINE)[

261 0

262 ]

263 parameter_categories = []

264 parameter_dict = {}

265 for match in re.findall(pattern_block, text_params):

266 cat = match[0].lower()

267 # print(cat)

268 if cat in parameter_categories:

269 raise ValueError(

270 f"Key {cat} already exists! You might have a wrong LaTeX doc file!"

271 )

272 parameter_categories.append(cat)

273 parameter_dict[cat] = []

274 param_lines = match[1].split("\n")

275 for line in param_lines:

276 matches = re.findall(pattern_line, line)

277 if len(matches) == 0:

278 continue

279 # Each match should contain 2 items, the "Link" that matches a reference in included-tex files

280 # symbol is the actual symbol name (in text-format)

281 # In most cases the link and symbol should be the same

282 for match in matches:

283 label, symbol = match[0].strip(), convert_tex_parameter(

284 match[1].strip()

285 )

286 parameter_dict[cat].append({"label": label, "symbol": symbol})

287 return parameter_categories, parameter_dict

288

289 def __parse_all_included_files(self):

290 """Pop up all known parameters from included files

291 Returns:

292 dict: All known parameters from included files

293 """

294 all_params = {}

295 for f in self.include_files:

296 # Do not parse intro file since it's waste of time

297 if f.resolve() == self.intro_file.resolve():

298 continue

299 text = open(f, "r", encoding="utf8").read()

300 frames = self.__parse_frames_from_text(text)

301 for frame in frames:

302 dic = self.__parse_parameter_from_frame(frame)

303 if len(dic) > 0:

304 label = dic["label"]

305 all_params[label] = dic

306 return all_params

307

308 def parse_parameters(self):

309 """The actual thing for parsing parameters

310

311 Sets:

312 parameters (dict): All parsed parameters

313 parameter_categoris (list): List of categories

314 other_parameters (dict): Any parameters that are not included in the categories

315 """

316 parameter_categories, parameter_dict = self.__parse_intro_file()

317 all_params = self.__parse_all_included_files()

318 self.parameter_categories = parameter_categories

319 # parameters contain only the "valid" ones that are shown in the intro

320 # all others are clustered in "other_parameters"

321 self.parameters = {}

322 for cat, params in parameter_dict.items():

323 for p in params:

324 label = p["label"]

325 symbol = p["symbol"]

326 param_details = all_params.pop(label, {})

327 if param_details != {}:

328 param_details["category"] = cat

329 self.parameters[symbol] = param_details

330

331 self.other_parameters = {}

332 for param_details in all_params.values():

333 symbol = param_details["symbol"]

334 self.other_parameters[symbol] = param_details

335 return

336

337 def postprocess(self):

338 """Use the hardcoded dict prostprocess_items to fix some issues"""

339 for param, fix in postprocess_items.items():

340 if param in self.parameters:

341 self.parameters[param].update(**fix)

342 return

343

344 def to_dict(self):

345 """Output a json dict from current document parser

346

347 Returns:

348 dict: All API schemes in dict

349 """

350 doc = {}

351 doc["sparc_version"] = self.version

352 doc["categories"] = self.parameter_categories

353 doc["parameters"] = {k: v for k, v in sorted(self.parameters.items())}

354 doc["other_parameters"] = {

355 k: v for k, v in sorted(self.other_parameters.items())

356 }

357 doc["data_types"] = sorted(set([p["type"] for p in self.parameters.values()]))

358 return doc

359

360 @classmethod

361 def json_from_directory(cls, directory=".", include_subdirs=True, **kwargs):

362 """

363 Recursively add parameters from all Manual files

364 Arguments:

365 directory (str or PosixPath): The directory to the LaTeX files, e.g. <sparc-root>/doc/.LaTeX

366 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT

367 Returns:

368 str: Formatted json-string of the API

369 """

370 directory = Path(directory)

371 root_dict = cls(directory=directory, **kwargs).to_dict()

372 if include_subdirs:

373 for sub_manual_tex in directory.glob("*/*Manual.tex"):

374 subdir = sub_manual_tex.parent

375 try:

376 sub_dict = cls(directory=subdir, parse_version=False).to_dict()

377 except FileNotFoundError:

378 print(

379 subdir,

380 " Latex files not found. Check naming conventions for Manual.tex. Expects format *Manual.tex",

381 )

382 continue

383 for param, param_desc in sub_dict["parameters"].items():

384 if param not in root_dict["parameters"]:

385 root_dict["parameters"][param] = param_desc

386 # Combine the subdir categories

387 for sub_category in sub_dict["categories"]:

388 if sub_category not in root_dict["categories"]:

389 root_dict["categories"].append(sub_category)

390 # Combine data types

391 for sub_dt in sub_dict["data_types"]:

392 if sub_dt not in root_dict["data_types"]:

393 root_dict["data_types"].append(sub_dt)

394

395 json_string = json.dumps(root_dict, indent=True)

396 return json_string

397

398 @classmethod

399 def json_from_repo(

400 cls, url=sparc_repo_url, version="master", include_subdirs=True, **kwargs

401 ):

402 """

403 Download the source code from git and use json_from_directory to parse

404 Arguments:

405 url (str): URL for the repository of SPARC, default is "https://github.com/SPARC-X/SPARC.git"

406 version (str): Git version or commit hash of the SPARC repo

407 include_subdirs (bool): If true, also parse the manual files in submodules, e.g. cyclix, highT

408 Returns:

409 str: Formatted json-string of the API

410 """

411 import tempfile

412 from subprocess import run

413

414 with tempfile.TemporaryDirectory() as tmpdir:

415 tmpdir = Path(tmpdir)

416 download_dir = tmpdir / "SPARC"

417 download_cmds = ["git", "clone", "--depth", "1", str(url), "SPARC"]

418 run(download_cmds, cwd=tmpdir)

419 if version not in ["master", "HEAD"]:

420 fetch_cmds = ["git", "fetch", "--depth", "1", str(version)]

421 run(fetch_cmds, cwd=download_dir)

422 checkout_cmds = ["git", "checkout", str(version)]

423 run(checkout_cmds, cwd=download_dir)

424 json_string = cls.json_from_directory(

425 directory=download_dir / "doc" / ".LaTeX",

426 include_subdirs=include_subdirs,

427 **kwargs,

428 )

429 return json_string

430

431

432def convert_tex_parameter(text):

433 """

434 Conver a TeX string to non-escaped name (for parameter only)

435 Arguments:

436 text (str): Parameter name in LaTeX format

437 Returns:

438 str: Text with sanitized parameter

439 """

440 return text.strip().replace("\_", "_")

441

442

443def convert_tex_example(text):

444 """Convert TeX codes of examples as much as possible

445 The examples follow the format

446 SYMBOL: values (may contain new lines)

447 Arguments:

448 text (str): Single or multiline LaTeX contents

449 Returns:

450 str: Sanitized literal text

451 """

452 mapper = {"\\texttt{": "", "\_": "_", "}": "", "\\": "\n"}

453 new_text = copy(text)

454 for m, r in mapper.items():

455 new_text = new_text.replace(m, r)

456

457 symbol, values = new_text.split(":")

458 symbol = symbol.strip()

459 values = re.sub("\n+", "\n", values.strip())

460 # Remove all comment lines

461 values = "\n".join(

462 [l for l in values.splitlines() if not l.lstrip().startswith("%")]

463 )

464 new_text = f"{symbol}: {values}"

465 return new_text

466

467

468def convert_tex_default(text, desired_type=None):

469 """Convert default values as much as possible.

470 The desire type will convert the default values

471 to the closest format

472

473 Currently supported conversions

474 1. Remove all surrounding text modifiers (texttt)

475 2. Remove all symbol wrappers $

476 3. Convert value to single or array

477

478 Arguments:

479 text (str): Raw text string for value

480 desired_type (str or None): Data type to be converted to. If None, preserve the string format

481

482 Returns:

483 converted: Value converted from raw text

484 """

485 mapper = {

486 "\\texttt{": "",

487 "}": "",

488 "{": "",

489 "\\_": "_",

490 "\_": "_",

491 "\\\\": "\n",

492 "$": "",

493 }

494 text = text.strip()

495 text = re.sub(r"\\hyperlink\{.*?\}", "", text)

496 text = re.sub(r"\\times", "x", text)

497 for m, r in mapper.items():

498 text = text.replace(m, r)

499 text = re.sub(r"\n+", "\n", text)

500 # Remove all comment lines

501 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])

502

503 # print(text)

504 converted = None

505 if "none" in text.lower():

506 converted = None

507 elif "no default" in text.lower():

508 converted = None

509 elif "automat" in text.lower():

510 converted = "auto"

511 else:

512 # try type conversion

513 if desired_type is None:

514 converted = text

515 elif desired_type == "string":

516 converted = text

517 else:

518 converted = text2value(text, desired_type)

519 return converted

520

521

522def convert_comment(text):

523 """Used to remove TeX-specific commands in description and remarks

524 as much as possible

525

526 Arguments:

527 text (str): Raw LaTeX code for the comment section in manual

528

529 Returns:

530 str: Sanitized plain text

531 """

532 mapper = {

533 "\\texttt{": "",

534 "}": "",

535 "{": "",

536 "\\_": "_",

537 "\_": "_",

538 "\\\\": "\n",

539 "$": "",

540 }

541 text = text.strip()

542 text = re.sub(r"\\hyperlink\{.*?\}", "", text)

543 text = re.sub(r"\\href\{.*?\}", "", text)

544 text = re.sub(r"\\times", "x", text)

545 for m, r in mapper.items():

546 text = text.replace(m, r)

547 text = re.sub(r"\n+", "\n", text)

548 # Remove all comment lines

549 text = "\n".join([l for l in text.splitlines() if not l.lstrip().startswith("%")])

550 return text

551

552

553def text2value(text, desired_type):

554 """Convert raw text to a desired type

555

556 Arguments:

557 text (str): Text contents for the value

558 desired_type (str): Target data type from 'string', 'integer',

559 'integer array', 'double', 'double array',

560 'bool', 'bool array'

561 Returns:

562 converted: Value converted to the desired type

563 """

564 if desired_type is None:

565 return text

566 desired_type = desired_type.lower()

567 if desired_type == "string":

568 return text.strip()

569

570 try:

571 arr = np.genfromtxt(text.splitlines(), delimiter=" ", dtype=float)

572 if np.isnan(arr).any():

573 warn(

574 f"Some fields in {text} cannot converted to a numerical array, will skip conversion."

575 )

576 arr = None

577 except Exception as e:

578 warn(

579 f"Cannot transform {text} to array, skip converting. Error message is:\n {e}"

580 )

581 arr = None

582

583 if arr is None:

584 return None

585

586 # Upshape ndarray to at least 1D

587 if arr.shape == ():

588 arr = np.reshape(arr, [1])

589

590 converted = None

591 from contextlib import suppress

592

593 # Ignore all failures and make conversion None

594 with suppress(Exception):

595 if desired_type == "integer":

596 converted = int(arr[0])

597 elif desired_type == "bool":

598 converted = bool(arr[0])

599 elif desired_type == "double":

600 converted = float(arr[0])

601 elif desired_type == "integer array":

602 converted = np.ndarray.tolist(arr.astype(int))

603 elif desired_type == "bool array":

604 converted = np.ndarray.tolist(arr.astype(bool))

605 elif desired_type == "double array":

606 converted = np.ndarray.tolist(arr.astype(float))

607 return converted

608

609

610def is_array(text):

611 """Simply try to convert a string into a numpy array and compare if length is larger than 1

612 it is only used to compare a float / int value

613 """

614 val = np.fromstring(text, sep=" ")

615 if len(val) == 1:

616 return False

617 else:

618 return True

619

620

621def contain_only_bool(text):

622 """Check if a string only contains 0 1 or spaces"""

623 if any([c in text for c in (".", "+", "-", "e", "E")]):

624 return False

625 digits = re.findall(r"[-+e\d]+", text, re.DOTALL)

626 for d in digits:

627 val = int(d)

628 if val not in (0, 1):

629 return False

630 return True

631

632

633def sanitize_description(param_dict):

634 """Sanitize the description and remark field

635

636 Arguments:

637 param_dict (dict): Raw dict for one parameter entry

638

639 Returns:

640 dict: Sanitized parameter dict with comment, remark and description

641 converted to human-readable formats

642 """

643 sanitized_dict = param_dict.copy()

644

645 original_desc = sanitized_dict["description"]

646 sanitized_dict["description_raw"] = original_desc

647

648 original_remark = sanitized_dict.get("remark", "")

649 sanitized_dict["remark_raw"] = original_remark

650

651 sanitized_dict["description"] = convert_comment(original_desc)

652 sanitized_dict["remark"] = convert_comment(original_remark)

653 return sanitized_dict

654

655

656def sanitize_default(param_dict):

657 """Sanitize the default field

658 1. Create an extra field `default_remark` that copies original default

659 2. Use `convert_tex_default` to convert values as much as possible

660

661 This function should be called after sanitize_type

662 """

663 sanitized_dict = param_dict.copy()

664 original_default = sanitized_dict["default"]

665 sanitized_dict["default_remark"] = original_default

666 converted_default = convert_tex_default(original_default, param_dict["type"])

667 sanitized_dict["default"] = converted_default

668 return sanitized_dict

669

670

671def sanitize_type(param_dict):

672 """Sanitize the param dict so that the type are more consistent

673

674 For example, if type is Double / Integer,

675 but parameter is a vector,

676 make a double vector or integer vector

677 """

678 sanitized_dict = param_dict.copy()

679 symbol = param_dict["symbol"]

680 origin_type = param_dict.get("type", None)

681 if origin_type is None:

682 print("Dict does not have type!")

683 return sanitized_dict

684 origin_type = origin_type.lower()

685

686 sanitized_type = None

687 sanitized_dict["allow_bool_input"] = False

688 # First pass, remove all singular types

689 if origin_type == "0 or 1":

690 origin_type = "integer"

691 elif "permutation" in origin_type:

692 sanitized_type = "integer"

693 elif origin_type in ("string", "character"):

694 sanitized_type = "string"

695 elif "array" in origin_type:

696 sanitized_type = origin_type

697

698 # Pass 2, test if int values are arrays

699 if (origin_type in ["int", "integer", "double"]) and (sanitized_type is None):

700 if "int" in origin_type:

701 origin_type = "integer"

702 # Test if the value from example is a single value or array

703 try:

704 example_value = param_dict["example"].split(":")[1]

705 default = param_dict["default"]

706 _array_test = is_array(example_value)

707 _bool_test = contain_only_bool(example_value) and contain_only_bool(default)

708 except Exception as e:

709 warn(

710 f"Array conversion failed for {example_value}, ignore."

711 f"The error is {e}"

712 )

713 _array_test = False # Retain

714

715 if _array_test is True:

716 sanitized_type = f"{origin_type} array"

717 else:

718 sanitized_type = origin_type

719

720 # Pass 3: int to boolean test. This should be done very tight

721 if _bool_test and ("integer" in sanitized_type):

722 sanitized_dict["allow_bool_input"] = True

723

724 if sanitized_type is None:

725 # Currently there is only one NPT_NH_QMASS has this type

726 # TODO: think of a way to format a mixed array?

727 warn(f"Type of {symbol} if not standard digit or array, mark as others.")

728 sanitized_type = "other"

729 # TODO: how about provide a true / false type?

730 sanitized_dict["type"] = sanitized_type

731 return sanitized_dict

732

733

734if __name__ == "__main__":

735 # Run the module as independent script to extract a json-formatted parameter list

736 from argparse import ArgumentParser

737

738 argp = ArgumentParser(description="Parse the LaTeX doc to json")

739 argp.add_argument(

740 "-o",

741 "--output",

742 default="parameters.json",

743 help="Output file name (json-formatted)",

744 )

745 argp.add_argument(

746 "--include-subdirs",

747 action="store_true",

748 help="Parse manual parameters from subdirs",

749 )

750 argp.add_argument("--git", action="store_true")

751 argp.add_argument(

752 "--version",

753 default="master",

754 help="Version of the doc. Only works when using git repo",

755 )

756 argp.add_argument(

757 "root",

758 nargs="?",

759 help=(

760 "Root of the SPARC doc LaTeX files, or remote git repo link. If not provided and --git is enables, use the default github repo"

761 ),

762 )

763

764 args = argp.parse_args()

765 output = Path(args.output).with_suffix(".json")

766 if args.git:

767 if args.root is None:

768 root = sparc_repo_url

769 else:

770 root = args.root

771 json_string = SparcDocParser.json_from_repo(

772 url=root, version=args.version, include_subdirs=args.include_subdirs

773 )

774 else:

775 json_string = SparcDocParser.json_from_directory(

776 directory=Path(args.root), include_subdirs=args.include_subdirs

777 )

778 with open(output, "w", encoding="utf8") as fd:

779 fd.write(json_string)

780 print(f"SPARC parameter specifications written to {output}!")

781 print("If you need to fintune the definitions, please edit them manually.")